RZ

001: // ExtractLocalLookup.hh # the actual synthesize algorithm
002:
003: #ifndef _ExtractLocalLookup_HH
004: #define _ExtractLocalLookup_HH 1
005:
006: #include <cstdio>
007: #include <functional>
008: #include <algorithm>
009: #include <utility>
010:
011: #include "RZMagic.hh"
012: #include "ExtractIntermediate.hh"
013:
014: namespace RZ {
015:     struct LocalSums: Sums {
016:         unsigned int block_size;
017:         LocalSums(struct RZWrapper const &);
018:         LocalSums(struct IntermediateWrapper const &);
019:     };
020:
021:     struct LookupTable;
022: }
023:
024: // #### signature lookup table --- sorted and hashed for fast access ####
025:
026: // Every 8192 byte interval within the scanned file, of which there are
027: // ( size - 8191 ) in any given file, has its checksum calculated and is
028: // looked up against the known signatures of remote blocks.  Where they
029: // match marks a candidate for further checksums to verify the match.
030:
031: // These data structures and routines do that lookup, returning a range
032: // into the table of remote signatures of all those lookup signatures that
033: // match.  This range can be iterated over for further processing
034:
035: // Being called from the innermost loop, ( size - 8191 ) times
036: // within a scanned file, there is a heavy premium on efficiency
037: // The fast_range inline achieves this through the use of a supplementary
038: // hash table based on the highest ( 32 - sig_bit_shift ) bits of the
039: // signature.
040:
041: struct RZ::LookupTable {
042:     struct LookupEntry { unsigned int sig_a, sig_b, sig_c, bk_i; };
043:
044:     typedef std::vector< LookupEntry > Lookup;
045:     typedef std::pair< Lookup::iterator, Lookup::iterator > Range;
046:
047: private:
048:     struct LookupLess {
049:         bool operator()(unsigned int c, LookupEntry b) { return c < b.sig_c; }
050:         bool operator()(LookupEntry a, unsigned int c) { return a.sig_c < c; }
051:     };
052:     struct EntryLess: std::binary_function< LookupEntry, LookupEntry, bool > {
053:         bool operator()(LookupEntry a, LookupEntry b)
054:         {
055:             return a.sig_c < b.sig_c;
056:         }
057:     };
058:     struct LowerBound: std::unary_function< LookupEntry, bool > {
059:         unsigned int lower_bound;
060:         bool operator()(LookupEntry b) { return lower_bound <= b.sig_c; }
061:         LowerBound(unsigned int l): lower_bound(l) {}
062:     };
063:
064:
065:
066: private:
067:     Lookup lookup_table;
068:
069:
070:
071:     unsigned int sig_bit_shift;
072:     std::vector< Lookup::iterator > fast_lookup_hash_table;
073:
074: public:
075:     inline Range fast_range(unsigned int sigc)
076:     {
077:         unsigned int hash = sigc >> sig_bit_shift;
078:         return std::equal_range(
079:             fast_lookup_hash_table[hash], fast_lookup_hash_table[hash+1],
080:             sigc, LookupLess());
081:     }
082:     inline Range slow_range(unsigned int sigc)
083:     {
084:         return std::equal_range(
085:             lookup_table.begin(), lookup_table.end(), sigc, LookupLess());
086:     }
087:
088: private:
089:     inline void setup_fast_lookup()
090:     {
091:         sig_bit_shift = 24;  // signature bits shifted away to make hash key
092:         for (unsigned int n = lookup_table.size(); n > 256; n >>= 1)
093:             --sig_bit_shift;
094:
095:         // the hash table is merely a quick entry for the binary search
096:         // algorithm into the already sorted signature table
097:         {
098:             Lookup::iterator b = lookup_table.begin(), e = lookup_table.end();
099:             {
100:                 unsigned int cut = 0, step = 1 << sig_bit_shift;
101:                 do {
102:                     b = std::find_if(b, e, LowerBound(cut));
103:                     fast_lookup_hash_table.push_back(b);
104:                 } while (cut += step, cut - step < cut);
105:             }
106:             fast_lookup_hash_table.push_back(e);
107:
108:             // P.S. do-while condition is a magic trick! Works on both
109:             // signed and unsigned integers without modification by
110:             // simulating an overflow or carry flag respectively.
111:         }
112:     }
113:
114:
115:
116: public:
117:     LookupTable() {}
118:     LookupTable(RZSignatureTable::Signature const &sigtab)
119:     {
120:         extract_lookup_table(sigtab);
121:         std::sort(lookup_table.begin(), lookup_table.end(), EntryLess());
122:         setup_fast_lookup();
123:     }
124:     LookupTable(unsigned int const nblks, std::FILE * const RZWRAPPER)
125:     {
126:         extract_lookup_table(nblks, RZWRAPPER);
127:         std::sort(lookup_table.begin(), lookup_table.end(), EntryLess());
128:         setup_fast_lookup();
129:     }
130:
131: private:
132:     void extract_lookup_table(RZSignatureTable::Signature const &);
133:     void extract_lookup_table(unsigned int, std::FILE *);
134: };
135:
136: // ExtractLocalLookup.hh # the actual synthesize algorithm
137:
138: #endif