I wrote some code like this:
std::vector<char> unzip(std::vector<char> const& compressed)
{
std::vector<char> decompressed;
boost::iostreams::filtering_ostream os;
os.push(boost::iostreams::gzip_decompressor());
os.push(boost::iostreams::back_inserter(decompressed));
boost::iostreams::write(os, &compressed[0], compressed.size());
os.reset();
return decompressed;
}
If the compressed
is a zip bomb, what will happen? I think the memory will exhaust and the process will crash.
So how to avoid this? How can I check the raw data size before decompress?
You would do it, just like always: pay attention while unzipping.
You can either use a buffer with a fixed/limited capacity (like use boost::iostreams::array_sink
) or you can wrap your copy operation with a guard for the maximum size.
Also, in your example the input is an in-memory buffer, so it makes more sense to use a device than a stream for input. So here's a simple take:
std::vector<char> unzip(size_t limit, std::vector<char> const& compressed) {
std::vector<char> decompressed;
boost::iostreams::filtering_istream is;
is.push(boost::iostreams::gzip_decompressor());
is.push(boost::iostreams::array_source(compressed.data(), compressed.size()));
while (is && (decompressed.size() < limit)) {
char buf[512];
is.read(buf, sizeof(buf));
decompressed.insert(decompressed.end(), buf, buf + is.gcount());
}
return decompressed;
}
When creating a simple mini-bomb of 60 bytes that will expand into 20 kilobytes of NUL chars:
int main() {
std::vector<char> const bomb = {
char(0x1f), char(0x8b), char(0x08), char(0x08), char(0xd1), char(0x6d), char(0x0e), char(0x5b), char(0x00), char(0x03), char(0x62), char(0x6f),
char(0x6d), char(0x62), char(0x00), char(0xed), char(0xc1), char(0x31), char(0x01), char(0x00), char(0x00), char(0x00), char(0xc2), char(0xa0),
char(0xf5), char(0x4f), char(0x6d), char(0x0a), char(0x3f), char(0xa0), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00),
char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00),
char(0x00), char(0x80), char(0xb7), char(0x01), char(0x60), char(0x83), char(0xbc), char(0xe6), char(0x00), char(0x50), char(0x00), char(0x00)
};
auto max10k = unzip(10*1024, bomb);
auto max100k = unzip(100*1024, bomb);
std::cout << "max10k: " << max10k.size() << " bytes\n";
std::cout << "max100k: " << max100k.size() << " bytes\n";
}
Prints Live On Coliru
max10k: 10240 bytes
max100k: 20480 bytes
Of course you can opt to throw if the limit is exceeded:
std::vector<char> unzip(size_t limit, std::vector<char> const& compressed) {
std::vector<char> decompressed;
boost::iostreams::filtering_istream is;
is.push(boost::iostreams::gzip_decompressor());
is.push(boost::iostreams::array_source(compressed.data(), compressed.size()));
while (is) {
char buf[512];
is.read(buf, sizeof(buf)); // can't detect EOF before attempting read on some streams
if (decompressed.size() + is.gcount() >= limit)
throw std::runtime_error("unzip limit exceeded");
decompressed.insert(decompressed.end(), buf, buf + is.gcount());
}
return decompressed;
}