RZ

001: // FileAnalyze.cc # the actual analyze algorithm
002: 
003: #include "FileAnalyze.hh"
004: 
005: // ######## ######## ######## ######## ######## ######## ######## ########
006: 
007: #include <sstream>
008: #include <algorithm>
009: 
010: #include "AlgoZlib.hh"
011: #include "RZError.hh"
012: #include "RZNetOrder.hh"
013: #include "RZIO.hh"
014: #include "RZMagic.hh"
015: 
016: // ######## analyze and concentrate the source stream ########
017: 
018: void RZ::file_analyze(int sourcefd, int rzafd, int rzbfd, int rzcfd)
019: {
020:     // rzafd - header and stream structure
021:     // rzbfd - signature table
022:     // rzcfd - compressed data stream
023: 
024:     struct RZModuli default_moduli;
025:     struct Sums const sums(default_moduli);
026: 
027:     {
028:         char buffer[ 32 ];
029: 
030:         std::copy(RZMagic::file_magic, RZMagic::file_magic + 16, buffer);
031: 
032:         net_out(&buffer[ 16 ], default_moduli.modulus_a);
033:         net_out(&buffer[ 20 ], default_moduli.modulus_b);
034:         net_out(&buffer[ 24 ], default_moduli.modulus_c);
035:         net_out(&buffer[ 28 ], default_moduli.blocks_of);
036: 
037:         fd_write(rzafd, buffer, 32);
038:     }
039: 
040:     int screm = 0; // leftover source
041: 
042:     int nblks = 0; // number of complete 8192K source blocks
043:     int rzcrc = 0; // crc of the compressed stream
044:     int scend = 0; // end (size) of the source stream
045:     int rzend = 0; // end (size) of the compressed stream
046: 
047:     int ncont = 0; // number of blocks before compression reinitializes
048:     int rzczr = 0; // initial (zero) crc value
049:     int scoff = 0; // initial offset into source stream before block 0
050:     int rzoff = 0; // initial offset into compressed stream before block 0
051: 
052:     struct Zlib::Deflate z1("Compression");
053:     int s;
054: 
055:     int scr;
056:     char next8KB[ 8192 ];
057:     char deflated[ 16384 ];
058: 
059:     for (
060:         nblks = scend = rzend = 0;
061:         scend += scr = fd_read_some(sourcefd, next8KB, 8192), scr;
062:         ++nblks)
063:     {
064:         // deflated <= deflate(next8KB)
065:         z1.z_deflate.next_in = (Zlib::Bytef*)next8KB;
066:         z1.z_deflate.avail_in = scr;
067:         z1.z_deflate.next_out = (Zlib::Bytef*)deflated;
068:         z1.z_deflate.avail_out = 16384;
069:         s = z1.zlib_deflate(Z_SYNC_FLUSH);
070:         if (s != Z_OK || z1.z_deflate.avail_in != 0) {
071:             std::ostringstream err;
072:             err << "Compression deflation failed at block " << nblks
073:                 << ": " << s;
074:             throw AssertionError(err.str());
075:         }
076: 
077:         int rzr = 16384 - z1.z_deflate.avail_out;
078:         rzend += rzr;
079:         rzcrc = crc32(rzcrc, (Zlib::Bytef*)deflated, rzr);
080: 
081:         fd_write(rzcfd, deflated, rzr);
082: 
083:         if (scr != 8192) {
084:             screm = scr;
085:             break;
086:         }
087: 
088:         char buffer[ 16 ];
089:         net_out(&buffer[ 0 ], sums.a.checksum32(next8KB));
090:         net_out(&buffer[ 4 ], sums.b.checksum32(next8KB));
091:         net_out(&buffer[ 8 ], sums.c.checksum32(next8KB));
092:         net_out(&buffer[ 12 ], rzend);
093: 
094:         fd_write(rzbfd, buffer, 16);
095:     }
096: 
097:     if (read(sourcefd, next8KB, 8192)) {
098:         throw AssertionError("Ended too early reading the source");
099:     }
100: 
101:     // deflated <= deflate(next8KB)
102:     z1.z_deflate.next_in = (Zlib::Bytef*)next8KB;
103:     z1.z_deflate.avail_in = 0;
104:     z1.z_deflate.next_out = (Zlib::Bytef*)deflated;
105:     z1.z_deflate.avail_out = 16384;
106:     s = z1.zlib_deflate(Z_FINISH);
107:     if (s == Z_OK) {
108:         throw AssertionError("Unexpected compressed stream left over");
109:     }
110:     else if (s != Z_STREAM_END) {
111:         std::ostringstream err;
112:         err << "Compression flush failed: " << s;
113:         throw AssertionError(err.str());
114:     }
115: 
116:     int rzr = 16384 - z1.z_deflate.avail_out;
117:     rzend += rzr;
118:     rzcrc = crc32(rzcrc, (Zlib::Bytef*)deflated, rzr);
119: 
120:     fd_write(rzcfd, deflated, rzr);
121: 
122:     if (nblks != (scend >> 13)) {
123:         throw AssertionError("Mismatch in calculated number of blocks");
124:     }
125:     if (screm != scend - (nblks << 13)) {
126:         throw AssertionError("Mismatch in calculated source remainder");
127:     }
128: 
129:     {
130:         char buffer[ 32 ];
131: 
132:         net_out(&buffer[  0 ], nblks); // num complete 8192K src blks
133:         net_out(&buffer[  4 ], rzcrc); // crc of the compressd stream
134:         net_out(&buffer[  8 ], scend); // end (size) source stream
135:         net_out(&buffer[ 12 ], rzend); // end (size) compressd stream
136: 
137:         net_out(&buffer[ 16 ], ncont); // num blks befor comp reinits
138:         net_out(&buffer[ 20 ], rzczr); // init (zero) crc value
139:         net_out(&buffer[ 24 ], scoff); // init offset src befor blk 0
140:         net_out(&buffer[ 28 ], rzoff); // init offset comprs bf blk 0
141: 
142:         fd_write(rzafd, buffer, 32);
143:     }
144: }
145: 
146: // ######## ######## ######## ######## ######## ######## ######## ########
147: 
148: // FileAnalyze.cc # the actual analyze algorithm