RZ

001: // FileAnalyze.cc # the actual analyze algorithm
002:
003: #include "FileAnalyze.hh"
004:
005: // ######## ######## ######## ######## ######## ######## ######## ########
006:
007: #include <sstream>
008: #include <algorithm>
009:
010: #include "AlgoZlib.hh"
011: #include "RZError.hh"
012: #include "RZNetOrder.hh"
013: #include "RZIO.hh"
014: #include "RZMagic.hh"
015:
016: // ######## analyze and concentrate the source stream ########
017:
018: void RZ::file_analyze(int sourcefd, int rzafd, int rzbfd, int rzcfd)
019: {
020:     // rzafd - header and stream structure
021:     // rzbfd - signature table
022:     // rzcfd - compressed data stream
023:
024:     struct RZModuli default_moduli;
025:     struct Sums const sums(default_moduli);
026:
027:     {
028:         char buffer[ 32 ];
029:
030:         std::copy(RZMagic::file_magic, RZMagic::file_magic + 16, buffer);
031:
032:         net_out(&buffer[ 16 ], default_moduli.modulus_a);
033:         net_out(&buffer[ 20 ], default_moduli.modulus_b);
034:         net_out(&buffer[ 24 ], default_moduli.modulus_c);
035:         net_out(&buffer[ 28 ], default_moduli.blocks_of);
036:
037:         fd_write(rzafd, buffer, 32);
038:     }
039:
040:     int screm = 0; // leftover source
041:
042:     int nblks = 0; // number of complete 8192K source blocks
043:     int rzcrc = 0; // crc of the compressed stream
044:     int scend = 0; // end (size) of the source stream
045:     int rzend = 0; // end (size) of the compressed stream
046:
047:     int ncont = 0; // number of blocks before compression reinitializes
048:     int rzczr = 0; // initial (zero) crc value
049:     int scoff = 0; // initial offset into source stream before block 0
050:     int rzoff = 0; // initial offset into compressed stream before block 0
051:
052:     struct Zlib::Deflate z1("Compression");
053:     int s;
054:
055:     int scr;
056:     char next8KB[ 8192 ];
057:     char deflated[ 16384 ];
058:
059:     for (
060:         nblks = scend = rzend = 0;
061:         scend += scr = fd_read_some(sourcefd, next8KB, 8192), scr;
062:         ++nblks)
063:     {
064:         // deflated <= deflate(next8KB)
065:         z1.z_deflate.next_in = (Zlib::Bytef*)next8KB;
066:         z1.z_deflate.avail_in = scr;
067:         z1.z_deflate.next_out = (Zlib::Bytef*)deflated;
068:         z1.z_deflate.avail_out = 16384;
069:         s = z1.zlib_deflate(Z_SYNC_FLUSH);
070:         if (s != Z_OK || z1.z_deflate.avail_in != 0) {
071:             std::ostringstream err;
072:             err << "Compression deflation failed at block " << nblks
073:                 << ": " << s;
074:             throw AssertionError(err.str());
075:         }
076:
077:         int rzr = 16384 - z1.z_deflate.avail_out;
078:         rzend += rzr;
079:         rzcrc = crc32(rzcrc, (Zlib::Bytef*)deflated, rzr);
080:
081:         fd_write(rzcfd, deflated, rzr);
082:
083:         if (scr != 8192) {
084:             screm = scr;
085:             break;
086:         }
087:
088:         char buffer[ 16 ];
089:         net_out(&buffer[ 0 ], sums.a.checksum32(next8KB));
090:         net_out(&buffer[ 4 ], sums.b.checksum32(next8KB));
091:         net_out(&buffer[ 8 ], sums.c.checksum32(next8KB));
092:         net_out(&buffer[ 12 ], rzend);
093:
094:         fd_write(rzbfd, buffer, 16);
095:     }
096:
097:     if (read(sourcefd, next8KB, 8192)) {
098:         throw AssertionError("Ended too early reading the source");
099:     }
100:
101:     // deflated <= deflate(next8KB)
102:     z1.z_deflate.next_in = (Zlib::Bytef*)next8KB;
103:     z1.z_deflate.avail_in = 0;
104:     z1.z_deflate.next_out = (Zlib::Bytef*)deflated;
105:     z1.z_deflate.avail_out = 16384;
106:     s = z1.zlib_deflate(Z_FINISH);
107:     if (s == Z_OK) {
108:         throw AssertionError("Unexpected compressed stream left over");
109:     }
110:     else if (s != Z_STREAM_END) {
111:         std::ostringstream err;
112:         err << "Compression flush failed: " << s;
113:         throw AssertionError(err.str());
114:     }
115:
116:     int rzr = 16384 - z1.z_deflate.avail_out;
117:     rzend += rzr;
118:     rzcrc = crc32(rzcrc, (Zlib::Bytef*)deflated, rzr);
119:
120:     fd_write(rzcfd, deflated, rzr);
121:
122:     if (nblks != (scend >> 13)) {
123:         throw AssertionError("Mismatch in calculated number of blocks");
124:     }
125:     if (screm != scend - (nblks << 13)) {
126:         throw AssertionError("Mismatch in calculated source remainder");
127:     }
128:
129:     {
130:         char buffer[ 32 ];
131:
132:         net_out(&buffer[  0 ], nblks); // num complete 8192K src blks
133:         net_out(&buffer[  4 ], rzcrc); // crc of the compressd stream
134:         net_out(&buffer[  8 ], scend); // end (size) source stream
135:         net_out(&buffer[ 12 ], rzend); // end (size) compressd stream
136:
137:         net_out(&buffer[ 16 ], ncont); // num blks befor comp reinits
138:         net_out(&buffer[ 20 ], rzczr); // init (zero) crc value
139:         net_out(&buffer[ 24 ], scoff); // init offset src befor blk 0
140:         net_out(&buffer[ 28 ], rzoff); // init offset comprs bf blk 0
141:
142:         fd_write(rzafd, buffer, 32);
143:     }
144: }
145:
146: // ######## ######## ######## ######## ######## ######## ######## ########
147:
148: // FileAnalyze.cc # the actual analyze algorithm