RZ

001: // FileRealize.cc # the actual realize algorithm
002:
003: #include "FileRealize.hh"
004:
005: // ######## ######## ######## ######## ######## ######## ######## ########
006:
007: #include <sys/stat.h>
008: #include <sstream>
009: #include <vector>
010: #include <algorithm>
011:
012: #include "AlgoZlib.hh"
013: #include "RZError.hh"
014: #include "RZNetOrder.hh"
015: #include "RZIO.hh"
016: #include "RZMagic.hh"
017:
018: // ###################### Reconstitute the source stream ######################
019:
020: void RZ::file_realize(int rzfd, int reconsfd)
021: {
022:     // ######## realize rzfd -> reconsfd ########
023:
024:     // ######## read the header --- the magic and version number ########
025:     char header[ 32 ];
026:     fd_read(rzfd, header, 32);
027:     if (!std::equal(RZMagic::file_magic, RZMagic::file_magic + 16, header))
028:         throw ContentError("The magic goes away --- not an rz file");
029:
030:     struct Sums const sums(net4in< RZModuli >(header + 16));
031:
032:     // ######## read the stream structure ########
033:     char structure[ 32 ];
034:     fd_read(rzfd, structure, 32);
035:     
036:     int nblks = net_in(&structure[  0 ]); // num complete 8192K src blks
037:     int rzcrc = net_in(&structure[  4 ]); // crc of the compressed stream
038:     int scend = net_in(&structure[  8 ]); // end (size) source stream
039:     int rzend = net_in(&structure[ 12 ]); // end (size) compressed stream
040:
041:     //int ncont = net_in(&structure[ 16 ]); // num blks before comp reinits
042:     int rzc   = net_in(&structure[ 20 ]); // init (zero) crc value
043:     int scoff = net_in(&structure[ 24 ]); // init offset src before blk 0
044:     int rzoff = net_in(&structure[ 28 ]); // init offset comprs bef blk 0
045:
046:     if (scend >> 13 != nblks) {
047:         throw ContentError("Quotient is wrong!");
048:     }
049:     if (scoff != 0) {
050:         throw ContentError("I don't know how to skip an initial offset");
051:     }
052:     if (rzoff != 0) {
053:         throw ContentError("I don't know how to skip an initial offset");
054:     }
055:
056:     // ######## read the signature table ########
057:     std::vector< char > signature(16 * nblks);
058:     fd_read(rzfd, &signature[ 0 ], 16 * nblks);
059:     std::vector< struct RZSignatureTable::SignatureEntry > sigtab;
060:     sigtab.reserve(nblks);
061:     for (int i = 0; i < nblks; ++i) {
062:         char *buffer = &signature[ i * 16 ];
063:
064:         struct RZSignatureTable::SignatureEntry e;
065:         e.sig_a = net_in(&buffer[  0 ]);
066:         e.sig_b = net_in(&buffer[  4 ]);
067:         e.sig_c = net_in(&buffer[  8 ]);
068:         e.rz_to = net_in(&buffer[ 12 ]);
069:         sigtab.push_back(e);
070:     }
071:
072:     // ######## the indices ########
073:     int rzi, rzr, sci, scr;
074:
075:     // ######## set up the decompression stream ########
076:     struct Zlib::Inflate z0("Decompression");
077:     int s;
078:
079:     // ######## data buffers ########
080:     char buffer[ 16385 ];
081:     char inflated8KB[ 8193 ];
082:     char * const inflated = inflated8KB;
083:
084:     // ######## realize data blocks ########
085:     int bki;
086:     for (
087:         bki = 0, rzi = 0, sci = 0;
088:         bki < nblks && (rzr = sigtab[ bki ].rz_to - rzi);
089:         ++bki, rzi += rzr, sci += 8192)
090:     {
091:         fd_read(rzfd, buffer, rzr);
092:
093:         rzc = crc32(rzc, (Zlib::Bytef*)buffer, rzr);
094:
095:         // ######## inflated8KB <= inflate(buffer) ########
096:         z0.z_inflate.next_in = (Zlib::Bytef*)buffer;
097:         z0.z_inflate.avail_in = rzr;
098:         z0.z_inflate.next_out = (Zlib::Bytef*)inflated8KB;
099:         z0.z_inflate.avail_out = 8192 + 1;
100:         s = z0.zlib_inflate();
101:         if (!(s == Z_OK || s == Z_STREAM_END)) {
102:             std::ostringstream err;
103:             err << "Decompression inflation failed at block "
104:                 << bki<< ": " << s;
105:             throw ContentError(err.str());
106:         }
107:         if (!(z0.z_inflate.avail_in == 0 && z0.z_inflate.avail_out == 1)) {
108:             std::ostringstream err;
109:             err << "Inflated block too small at block " << bki;
110:             throw ContentError(err.str());
111:         }
112:
113:         fd_write(reconsfd, inflated8KB, 8192);
114:
115:         if ( !(sums.a.checksum32(inflated8KB) == sigtab[ bki ].sig_a
116:             && sums.b.checksum32(inflated8KB) == sigtab[ bki ].sig_b
117:             && sums.c.checksum32(inflated8KB) == sigtab[ bki ].sig_c))
118:         {
119:             std::ostringstream err;
120:             err << "Inflated block does not match its signature at block " 
121:                 << bki;
122:             throw ContentError(err.str());
123:         }
124:     }
125:
126:     // #### append the remaining bytes which are not divisible by 8192 ####
127:     for (; (rzr = read(rzfd, buffer, 16384)); rzi += rzr) {
128:         rzc = crc32(rzc, (Zlib::Bytef*)buffer, rzr);
129:
130:         // inflated <= inflate(buffer)
131:         z0.z_inflate.next_in = (Zlib::Bytef*)buffer;
132:         z0.z_inflate.avail_in = rzr;
133:         z0.z_inflate.next_out = (Zlib::Bytef*)inflated;
134:         z0.z_inflate.avail_out = 8192 + 1;
135:         s = z0.zlib_inflate();
136:         if (!(s == Z_OK || s == Z_STREAM_END)) {
137:             std::ostringstream err;
138:             err << "Decompression inflation failed: " << s;
139:             throw ContentError(err.str());
140:         }
141:
142:         scr = (8192 + 1 - z0.z_inflate.avail_out);
143:
144:         fd_write(reconsfd, inflated, scr);
145:
146:         sci += scr;
147:     }
148:
149:     // ######## sanity checks ########
150:     if (rzend != rzi) {
151:         std::ostringstream err;
152:         err << "Mismatch in calculated compressed stream size: "
153:             << rzi << " =/= " << rzend;
154:         throw ContentError(err.str());
155:     }
156:     if (scend != sci) {
157:         std::ostringstream err;
158:         err << "Mismatch in calculated source file size: "
159:             << sci << " =/= " << scend;
160:         throw ContentError(err.str());
161:     }
162:     if (rzcrc != rzc) {
163:         throw ContentError("Mismatch in calculated compressed stream CRC");
164:     }
165:
166:     // ######## one final sanity check ########
167:     lseek(reconsfd, 0, 0);
168:     {
169:         struct stat stbuf;
170:         if (fstat(reconsfd, &stbuf)) {
171:             IOError::what_if("Can't stat file", true);
172:         }
173:         IOError::what_if("Can't stat file size", !stbuf.st_size);
174:         if (scend != stbuf.st_size) {
175:             throw ContentError("Mismatch in calculated source file size");
176:         }
177:     }
178: }
179:
180: // ######## ######## ######## ######## ######## ######## ######## ########
181:
182: // FileRealize.cc # the actual realize algorithm