From a5f2a96b1b8c49b88727bb77636003dd5058246a Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 18 Mar 2024 14:33:12 +0530 Subject: [PATCH 1/2] Soham Haldar| compressor.py decompressor.py| actually made sure that the file was being compressed --- compressor.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ decompressor.py | 30 +++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 compressor.py create mode 100644 decompressor.py diff --git a/compressor.py b/compressor.py new file mode 100644 index 00000000..7c5a79dc --- /dev/null +++ b/compressor.py @@ -0,0 +1,63 @@ +import math +def compress(ifile, codes, ofile): + symbol_codes = {} + with open(codes, 'r') as f: + for line in f: + last_space_index = line.rfind(' ') + if last_space_index != -1: + symbol = line[:last_space_index] + code = line[last_space_index + 1:].strip() + symbol_codes[symbol] = code + with open(ifile, 'r') as f: + itext = f.read().strip() + compressed = '' + for c in itext: + if c.isalpha() : + c=c.upper() + if c in symbol_codes: + compressed += symbol_codes[c] + compressed_bytes = compress_bits(compressed) + with open(ofile, 'wb') as f: + f.write(compressed_bytes) + entropy(itext,compressed) +def entropy(a,b): + D = {} + E = {} + for i in a: + if i not in D: + D[i]=1 + else: + D[i]+=1 + for i in b: + if i not in E: + E[i]=1 + else: + E[i]+=1 + entropya=0 + for i in D: + entropya+=(D[i]/len(a))*math.log2((len(a)/D[i])) + entropyb=0 + for i in E: + entropyb+=(E[i]/len(b))*math.log2((len(b)/E[i])) + print("The information gain from compression is: ",entropya-entropyb) +def compress_bits(compressed): + compressed_bytes = bytearray() + current_byte = 0 + bit_count = 0 + for bit in compressed: + current_byte <<= 1 + current_byte |= int(bit) + bit_count += 1 + if bit_count == 8: + compressed_bytes.append(current_byte) + current_byte = 0 + bit_count = 0 + if bit_count > 0: + current_byte <<= (8 - bit_count) + compressed_bytes.append(current_byte) + return bytes(compressed_bytes) +ifile = input("Enter your input filename: ") +codes = input("Enter the filename where you have saved your codes: ") +ofile = input("Enter the filename where you want your compressed document saved: ") +compress(ifile, codes, ofile) +print("Compressed code has been written to:", ofile) \ No newline at end of file diff --git a/decompressor.py b/decompressor.py new file mode 100644 index 00000000..68dc7467 --- /dev/null +++ b/decompressor.py @@ -0,0 +1,30 @@ +def decompress(ifile, codes,ofile): + symbol_codes = {} + with open(codes, 'r') as f: + for line in f: + last_space_index = line.rfind(' ') + if last_space_index != -1: + symbol = line[:last_space_index] + code = line[last_space_index + 1:].strip() + symbol_codes[code] = symbol + with open(ifile, 'rb') as f: # Open the compressed file in binary mode + compressed = f.read() + + # Convert binary data to a binary string + compressed_bits = ''.join(format(byte, '08b') for byte in compressed) + decompressed = '' + current = '' + for bit in compressed_bits: + current += bit + if current in symbol_codes.keys(): + decompressed += symbol_codes[current] + current = '' + with open(ofile, 'w') as f: + f.write(decompressed) + +ifile = input("Enter your input (compressed) filename: ") +codes = input("Enter the filename where you have saved your codes: ") +ofile = input("Enter the filename where you want your decompressed document saved: ") +decompress(ifile, codes,ofile) +print("Decompressed code has been written to:", ofile) + From f11390f2e544d8f85e65aa7fef620682b510a2e4 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 18 Mar 2024 14:44:34 +0530 Subject: [PATCH 2/2] Soham Haldar| compressor.py decompressor.py README(1).md| Added README(1).md for my repo and actually made sure that the file was being compressed --- README(1).md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 README(1).md diff --git a/README(1).md b/README(1).md new file mode 100644 index 00000000..4398be6f --- /dev/null +++ b/README(1).md @@ -0,0 +1,3 @@ +Huffman compressor: +Use huffman.cpp to generate codes file and use compressor.py/decompressor.py for compression/decompression +Note: we only consider spaces or alphabets here and not numbers. \ No newline at end of file