CCAtAlvis · rohanBrid18 · Oct 19, 2020 · Oct 29, 2020 · Oct 29, 2020 · Oct 30, 2020
diff --git a/huffman.py b/huffman.py
@@ -0,0 +1,118 @@
+def huffman_tree(nodes, huff):
+    pos = 0
+    newnode = []
+    if len(nodes) > 1:
+        nodes.sort()
+        nodes[pos].append("0")
+        nodes[pos+1].append("1")
+        combined_node1 = nodes[pos][0] + nodes[pos+1][0]
+        combined_node2 = nodes[pos][1] + nodes[pos+1][1]
+        newnode.append(combined_node1)
+        newnode.append(combined_node2)
+        newnodes = []
+        newnodes.append(newnode)
+        newnodes = newnodes + nodes[2:]
+        nodes = newnodes
+        huff.append(nodes)
+        huffman_tree(nodes, huff)
+    return huff
+
+
+def huff_encode(characters, checklist, data):
+    char_bin = []
+    if len(characters) == 1:
+        char_code = [characters[0], "0"]
+        char_bin.append(char_code * len(data))
+    else:
+        for char in characters:
+            charcode = ""
+            for node in checklist:
+                if len(node) > 2 and char in node[1]:
+                    charcode += node[2]
+            char_code = [char, charcode]
+            char_bin.append(char_code)
+
+    bitstring = ""
+    for char in data:
+        for item in char_bin:
+            if char in item:
+                bitstring += item[1]
+    return bitstring, char_bin
+
+
+def huff_decode(enc_data, character_binary):
+    uncompressed_data = ""
+    code = ""
+    for bit in enc_data:
+        code += bit
+        pos = 0
+        for item in character_binary:
+            if code == item[1]:
+                uncompressed_data += character_binary[pos][0]
+                code = ""
+            pos += 1
+    return uncompressed_data
+
+
+def huffman(data):
+    size = len(data) * 8
+    print("Initial data size: {} Kb\n".format(size / 1000))
+
+    frequency = []
+    characters = []
+    for symbol in data:
+        if symbol not in frequency:
+            frequency.append(data.count(symbol))
+            frequency.append(symbol)
+            characters.append(symbol)
+
+    nodes = []
+    while len(frequency) > 0:
+        nodes.append(frequency[0:2])
+        frequency = frequency[2:]
+
+    nodes.sort()
+    huff = []
+    huff.append(nodes)
+
+    newnodes = huffman_tree(nodes, huff)
+    huff.sort(reverse=True)
+
+    checklist = []
+    for level in huff:
+        for node in level:
+            if node not in checklist:
+                checklist.append(node)
+            else:
+                level.remove(node)
+
+    # print Huffman Tree
+    # count = 0
+    # for level in huff:
+    #     print("Level", count, ":", level)
+    #     count += 1
+    # print()
+
+    encoded_data, character_binary = huff_encode(characters, checklist, data)
+
+    # print character codes
+    # print("Character\tCode")
+    # for item in character_binary:
+    #     print("{}\t\t{}".format(item[0], item[1]))
+
+    # print("\nCompressed Data: {}\n".format(encoded_data))
+    print("Compressed data size: {} Kb\n".format(len(encoded_data) / 1000))
+    compression = round((100 - (len(encoded_data) / size) * 100), 2)
+    print("Compression: {}%\n".format(compression))
+
+    return encoded_data, character_binary
+
+    # decoded_data = huff_decode(encoded_data, character_binary)
+
+    # print("Original data:", decoded_data)
+    # print("Original data size: {} Kb".format((len(decoded_data) * 8) / 1000))
+
+
+# data = "Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum."
+
+# huffman(data)
diff --git a/stegano.py b/stegano.py
@@ -1,6 +1,9 @@
 from PIL import Image
 import numpy as np
 
+from huffman import huffman
+from huffman import huff_decode
+
 def image_load(infilename):
     img = Image.open(infilename)
     img.load()
@@ -31,28 +34,29 @@ def image_create(img):
 
 def encode(img, data):
     img_new = []
-    data_c = 0
+    # data_c = 0
+    k = 0
 
     for i in range(len(img)):
         img_new.append([])
         for j in range(len(img[i])):
             t = img[i][j]
-            # print(t)
-            if data_c < len(data):
+            if k < len(data):
                 # print(t)
                 # print(data[data_c], int(data[data_c], 2), chr(int(data[data_c], 2)))
-                t[0] = t[0][:-3] + data[data_c][0:3]
-                t[1] = t[1][:-3] + data[data_c][3:6]
-                t[2] = t[2][:-2] + data[data_c][6:8]
+                t[0] = t[0][:-3] + data[k+0:k+3]
+                t[1] = t[1][:-3] + data[k+3:k+6]
+                t[2] = t[2][:-3] + data[k+6:k+9]
                 # print(data[data_c][0:3], data[data_c][3:6], data[data_c][6:8])
                 # print(t)
                 # print('-'*50)
-                data_c += 1
-            elif data_c == len(data):
-                t[0] = t[0][0:-3] + '111'
-                t[1] = t[1][0:-3] + '111'
-                t[2] = t[2][0:-2] + '11'
-                data_c += 1
+                # data_c += 1
+                k += 9
+            elif k == len(data):
+                t[0] = t[0][0:-4] + '1111'
+                t[1] = t[1][0:-4] + '1111'
+                t[2] = t[2][0:-4] + '1111'
+                k += 1
 
             img_new[i].append(t)
     return img_new
@@ -61,13 +65,16 @@ def decode(img):
     data = ''
     for i in range(len(img)):
         for j in range(len(img[i])):
+            t = img[i][j]
             t = [np.binary_repr(k, width=8) for k in img[i][j]]
-            d = t[0][-3:] + t[1][-3:] + t[2][-2:]
-            if d == '11111111':
+            last = t[0][-4:] + t[1][-4:] + t[2][-4:]
+            if last == '111111111111':
                 return data
-            e = int(d, 2)
-            f = chr(e)
-            data += f
+            d = t[0][-3:] + t[1][-3:] + t[2][-3:]
+            data += d
+            # e = int(d, 2)
+            # f = chr(e)
+            # data += f
             # print(t)
             # print(t[0][-3:], t[1][-3:], t[2][-2:])
             # print(d, e, f)
@@ -86,7 +93,13 @@ def decode(img):
 with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop
 publishing software like Aldus PageMaker including versions of Lorem Ipsum."""
 # data = "lets make some dummy data"
-data_binary = [np.binary_repr(ord(i), width=8) for i in data]
+
+# data_binary = [np.binary_repr(ord(i), width=8) for i in data]
+data_binary, char_binary = huffman(data)
+padding = 9 - (len(data_binary) % 9)
+for i in range(padding):
+    data_binary += '0'
+# print(len(data_binary))
 
 # img_encoded = image_create(encode(img_bin, data_binary))
 img_encoded = encode(img_bin, data_binary)
@@ -97,8 +110,8 @@ def decode(img):
 image_save(img_crt, './images/output.png')
 img_ip = image_load('./images/output.png')
 
-i=0
-j=0
+# i=0
+# j=0
 
 # print(original[i][j])
 # print([np.binary_repr(i, width=8) for i in original[i][j]])
@@ -108,4 +121,7 @@ def decode(img):
 
 # data_decoded = decode(image_load('./images/output-new.jpg'))
 data_decoded = decode(img_ip)
-print(data_decoded)
+data_decoded = data_decoded[:-padding]
+uncompressed_data = huff_decode(data_decoded, char_binary)
+print("Decoded data:", uncompressed_data)
+print("\nDecoded data size: {} Kb".format((len(uncompressed_data) * 8) / 1000))