Posted on Leave a comment

GZip, GZipStream

GZIP file format specification version 4.3

Can GZip compression (via .net) increase file size?

Yes. It can. It has been fixed in .net 4.

The compression algorithms for the System.IO.Compression..::.DeflateStream and System.IO.Compression..::.GZipStream classes have improved so that data that is already compressed is no longer inflated. This results in much better compression ratios. Also, the 4-gigabyte size restriction for compressing streams has been removed.

How does GZipStream determine the size of compressed data

Compress byte array

[code language=”csharp”]
using System.IO;
using System.IO.Compression;
using System.Text;

class Program
{
static void Main()
{
byte[] text = Encoding.ASCII.GetBytes(new string(‘A’, 10000));
byte[] compress = Compress(text);
File.WriteAllBytes("compress.gz", compress);
}

public static byte[] Compress(byte[] raw)
{
using (MemoryStream memory = new MemoryStream())
{
using (GZipStream gzip = new GZipStream(memory,
CompressionMode.Compress, true))
{
gzip.Write(raw, 0, raw.Length);
}
return memory.ToArray();
}
}
}
[/code]

Decompress byte array

[code language=”csharp”]
using System;
using System.IO;
using System.IO.Compression;

class Program
{
static void Main()
{
byte[] file = File.ReadAllBytes("C:\\filename.gz");
byte[] decompressed = Decompress(file);
Console.WriteLine(file.Length);
Console.WriteLine(decompressed.Length);
}

static byte[] Decompress(byte[] gzip)
{
using (GZipStream stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress))
{
const int size = 4*1024;
byte[] buffer = new byte[size];
using (MemoryStream memory = new MemoryStream())
{
int count = 0;
do
{
count = stream.Read(buffer, 0, size);
if (count > 0)
{
memory.Write(buffer, 0, count);
}
}
while (count > 0);
return memory.ToArray();
}
}
}
}
[/code]

Compress and decompress

[code language=”csharp”]
public static void Compress(String fileSource, String fileDestination, int buffsize)
{
using (var fsInput = new FileStream(fileSource, FileMode.Open, FileAccess.Read))
{
using (var fsOutput = new FileStream(fileDestination, FileMode.Create, FileAccess.Write))
{
using (var gzipStream = new GZipStream(fsOutput, CompressionMode.Compress))
{
var buffer = new Byte[buffsize];
int h;
while ((h = fsInput.Read(buffer, 0, buffer.Length)) > 0)
{
gzipStream.Write(buffer, 0, h);
}
gzipStream.Flush();
}
}
}
}

public static void Decompress(String fileSource, String fileDestination, int buffsize)
{
using (var fsInput = new FileStream(fileSource, FileMode.Open, FileAccess.Read))
{
using (var fsOutput = new FileStream(fileDestination, FileMode.Create, FileAccess.Write))
{
using (var gzipStream = new GZipStream(fsInput, CompressionMode.Decompress))
{
var buffer = new Byte[buffsize];
int h;
while ((h = gzipStream.Read(buffer, 0, buffer.Length)) > 0)
{
fsOutput.Write(buffer, 0, h);
}
}
}
}
}
[/code]

Multithreading

[code language=”csharp”]
public static void Compress(string inFileName)
{
int dataPortionSize = Environment.SystemPageSize / threadNumber;
try
{
FileStream inFile = new FileStream(inFileName, FileMode.Open);
FileStream outFile = new FileStream(inFileName + ".gz", FileMode.Append);

int _dataPortionSize;
Thread[] tPool;

Console.Write("Compressing…");

while (inFile.Position < inFile.Length)
{
Console.Write(".");
tPool = new Thread[threadNumber];
for (int portionCount = 0;
(portionCount < threadNumber) && (inFile.Position < inFile.Length);
portionCount++)
{
if (inFile.Length – inFile.Position <= dataPortionSize)
{
_dataPortionSize = (int)(inFile.Length – inFile.Position);
}
else
{
_dataPortionSize = dataPortionSize;
}
dataArray[portionCount] = new byte[_dataPortionSize];
inFile.Read(dataArray[portionCount], 0, _dataPortionSize);

tPool[portionCount] = new Thread(CompressBlock);
tPool[portionCount].Start(portionCount);
}

for (int portionCount = 0; (portionCount < threadNumber) && (tPool[portionCount] != null); )
{
if (tPool[portionCount].ThreadState == ThreadState.Stopped)
{
BitConverter.GetBytes(compressedDataArray[portionCount].Length+1)
.CopyTo(compressedDataArray[portionCount], 4);
outFile.Write(compressedDataArray[portionCount], 0, compressedDataArray[portionCount].Length);
portionCount++;
}
}

}

outFile.Close();
inFile.Close();
}
catch (Exception ex)
{
Console.WriteLine("ERROR:" + ex.Message);
}
}

public static void CompressBlock(object i)
{
using (MemoryStream output = new MemoryStream(dataArray[(int)i].Length))
{
using (GZipStream cs = new GZipStream(output, CompressionMode.Compress))
{
cs.Write(dataArray[(int)i], 0, dataArray[(int)i].Length);
}
compressedDataArray[(int)i] = output.ToArray();
}
}

public static void Decompress(string inFileName)
{
try
{
FileStream inFile = new FileStream(inFileName, FileMode.Open);
FileStream outFile = new FileStream(inFileName.Remove(inFileName.Length – 3), FileMode.Append);
int _dataPortionSize;
int compressedBlockLength;
Thread[] tPool;
Console.Write("Decompressing…");
byte[] buffer = new byte[8];

while (inFile.Position < inFile.Length)
{
Console.Write(".");
tPool = new Thread[threadNumber];
for (int portionCount = 0;
(portionCount < threadNumber) && (inFile.Position < inFile.Length);
portionCount++)
{
inFile.Read(buffer, 0, 8);
compressedBlockLength = BitConverter.ToInt32(buffer, 4);
compressedDataArray[portionCount] = new byte[compressedBlockLength+1];
buffer.CopyTo(compressedDataArray[portionCount], 0);

inFile.Read(compressedDataArray[portionCount], 8, compressedBlockLength – 8);
_dataPortionSize = BitConverter.ToInt32(compressedDataArray[portionCount], compressedBlockLength – 4);
dataArray[portionCount] = new byte[_dataPortionSize];

tPool[portionCount] = new Thread(DecompressBlock);
tPool[portionCount].Start(portionCount);
}

for (int portionCount = 0; (portionCount < threadNumber) && (tPool[portionCount] != null); )
{
if (tPool[portionCount].ThreadState == ThreadState.Stopped)
{
outFile.Write(dataArray[portionCount], 0, dataArray[portionCount].Length);
portionCount++;
}
}
}

outFile.Close();
inFile.Close();
}
catch (Exception ex)
{
Console.WriteLine("ERROR:" + ex.Message);
}
}

public static void DecompressBlock(object i)
{
using (MemoryStream input = new MemoryStream(compressedDataArray[(int)i]))
{

using (GZipStream ds = new GZipStream(input, CompressionMode.Decompress))
{
ds.Read(dataArray[(int)i], 0, dataArray[(int)i].Length);
}

}
}
[/code]

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.