From 57340db38c8edb86f50e550f051ad0bd4f26745c Mon Sep 17 00:00:00 2001
From: rohanBrid18 <bridrohan1122@gmail.com>
Date: Mon, 19 Oct 2020 19:22:25 +0530
Subject: [PATCH 1/5] data encode

---
 huffman.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 huffman.py

diff --git a/huffman.py b/huffman.py
new file mode 100644
index 0000000..ca4a166
--- /dev/null
+++ b/huffman.py
@@ -0,0 +1,30 @@
+import heapq
+from collections import defaultdict
+
+def encode(freq):
+    heap = [[weight, [symbol, '']] for symbol, weight in freq.items()]
+    heapq.heapify(heap)
+    while len(heap) > 1:
+        lo = heapq.heappop(heap)
+        hi = heapq.heappop(heap)
+        for pair in lo[1:]:
+            pair[1] = '0' + pair[1]
+        for pair in hi[1:]:
+            pair[1] = '1' + pair[1]
+        heapq.heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
+    return sorted(heapq.heappop(heap)[1:], key=lambda p: (len(p[-1]), p))
+
+data = "Lorem Ipsum is simply dummy text of the printing and typesetting industry."
+frequency = defaultdict(int)
+
+for symbol in data:
+    frequency[symbol] += 1
+
+huff = encode(frequency)
+enc_data = []
+
+for symbol in data:
+    for i in range(len(huff)):
+        if huff[i][0] == symbol:
+            enc_data.append(huff[i][1])
+print(enc_data)
\ No newline at end of file

From 06f14aa2c3e58dc73c994fe0d2996d7c74c35511 Mon Sep 17 00:00:00 2001
From: rohanBrid18 <bridrohan1122@gmail.com>
Date: Thu, 29 Oct 2020 22:12:01 +0530
Subject: [PATCH 2/5] built huffman tree

---
 huffman.py | 76 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 25 deletions(-)

diff --git a/huffman.py b/huffman.py
index ca4a166..c53eeb0 100644
--- a/huffman.py
+++ b/huffman.py
@@ -1,30 +1,56 @@
-import heapq
-from collections import defaultdict
-
-def encode(freq):
-    heap = [[weight, [symbol, '']] for symbol, weight in freq.items()]
-    heapq.heapify(heap)
-    while len(heap) > 1:
-        lo = heapq.heappop(heap)
-        hi = heapq.heappop(heap)
-        for pair in lo[1:]:
-            pair[1] = '0' + pair[1]
-        for pair in hi[1:]:
-            pair[1] = '1' + pair[1]
-        heapq.heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
-    return sorted(heapq.heappop(heap)[1:], key=lambda p: (len(p[-1]), p))
-
 data = "Lorem Ipsum is simply dummy text of the printing and typesetting industry."
-frequency = defaultdict(int)
+size = len(data) * 8
+print("Initial data size: {} bits\n".format(size))
 
+frequency = []
+characters = []
 for symbol in data:
-    frequency[symbol] += 1
+    if symbol not in frequency:
+        frequency.append(data.count(symbol))
+        frequency.append(symbol)
+        characters.append(symbol)
 
-huff = encode(frequency)
-enc_data = []
+nodes = []
+while len(frequency) > 0:
+    nodes.append(frequency[0:2])
+    frequency = frequency[2:]
 
-for symbol in data:
-    for i in range(len(huff)):
-        if huff[i][0] == symbol:
-            enc_data.append(huff[i][1])
-print(enc_data)
\ No newline at end of file
+nodes.sort()
+huff = []
+huff.append(nodes)
+
+def huffman_tree(nodes):
+    pos = 0
+    newnode = []
+    if len(nodes) > 1:
+        nodes.sort()
+        nodes[pos].append("0")
+        nodes[pos+1].append("1")
+        combined_node1 = nodes[pos][0] + nodes[pos+1][0]
+        combined_node2 = nodes[pos][1] + nodes[pos+1][1]
+        newnode.append(combined_node1)
+        newnode.append(combined_node2)
+        newnodes = []
+        newnodes.append(newnode)
+        newnodes = newnodes + nodes[2:]
+        nodes = newnodes
+        huff.append(nodes)
+        huffman_tree(nodes)
+    return huff
+
+newnodes = huffman_tree(nodes)
+
+huff.sort(reverse=True)
+
+checklist = []
+for level in huff:
+    for node in level:
+        if node not in checklist:
+            checklist.append(node)
+        else:
+            level.remove(node)
+
+count = 0
+for level in huff:
+    print("Level", count, ":", level)
+    count += 1
\ No newline at end of file

From ef6e597ee743ebdd3927f7045574dee3010122e4 Mon Sep 17 00:00:00 2001
From: rohanBrid18 <bridrohan1122@gmail.com>
Date: Thu, 29 Oct 2020 23:01:24 +0530
Subject: [PATCH 3/5] encoded data using huffman tree

---
 huffman.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/huffman.py b/huffman.py
index c53eeb0..77b5e51 100644
--- a/huffman.py
+++ b/huffman.py
@@ -53,4 +53,34 @@ def huffman_tree(nodes):
 count = 0
 for level in huff:
     print("Level", count, ":", level)
-    count += 1
\ No newline at end of file
+    count += 1
+print()
+
+def encode(characters, checklist, data):
+    char_bin = []
+    if len(characters) == 1:
+        char_code = [characters[0], "0"]
+        char_bin.append(char_code * len(data))
+    else:
+        for char in characters:
+            charcode = ""
+            for node in checklist:
+                if len(node) > 2 and char in node[1]:
+                    charcode += node[2]
+            char_code = [char, charcode]
+            char_bin.append(char_code)
+
+    for item in char_bin:
+        print(item[0], item[1])
+
+    bitstring = ""
+    for char in data:
+        for item in char_bin:
+            if char in item:
+                bitstring += item[1]
+    return bitstring
+
+encoded_data = encode(characters, checklist, data)
+
+print("\nEncoded Data: {}\n".format(encoded_data))
+print("Compressed data size: {} bits\n".format(len(encoded_data)))
\ No newline at end of file

From a281d5d568b8e303320272dc81b9086f01966cb3 Mon Sep 17 00:00:00 2001
From: rohanBrid18 <bridrohan1122@gmail.com>
Date: Fri, 30 Oct 2020 18:07:06 +0530
Subject: [PATCH 4/5] decoded data using huffman tree

---
 huffman.py | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/huffman.py b/huffman.py
index 77b5e51..d1f904f 100644
--- a/huffman.py
+++ b/huffman.py
@@ -70,17 +70,35 @@ def encode(characters, checklist, data):
             char_code = [char, charcode]
             char_bin.append(char_code)
 
-    for item in char_bin:
-        print(item[0], item[1])
-
     bitstring = ""
     for char in data:
         for item in char_bin:
             if char in item:
                 bitstring += item[1]
-    return bitstring
+    return bitstring, char_bin
+
+encoded_data, character_binary = encode(characters, checklist, data)
 
-encoded_data = encode(characters, checklist, data)
+for item in character_binary:
+    print(item[0], item[1])
 
 print("\nEncoded Data: {}\n".format(encoded_data))
-print("Compressed data size: {} bits\n".format(len(encoded_data)))
\ No newline at end of file
+print("Compressed data size: {} bits\n".format(len(encoded_data)))
+
+def decode(enc_data, character_binary):
+    uncompressed_data = ""
+    code = ""
+    for bit in enc_data:
+        code += bit
+        pos = 0
+        for item in character_binary:
+            if code == item[1]:
+                uncompressed_data += character_binary[pos][0]
+                code = ""
+            pos += 1
+    return uncompressed_data
+
+decoded_data = decode(encoded_data, character_binary)
+
+print("Original data:", decoded_data)
+print("Original data size: {} bits".format(len(decoded_data) * 8))
\ No newline at end of file

From faa0788ea2664ff01b0963711839c81f09d41357 Mon Sep 17 00:00:00 2001
From: rohanBrid18 <bridrohan1122@gmail.com>
Date: Fri, 30 Oct 2020 22:41:36 +0530
Subject: [PATCH 5/5] data compression using huffman encoding

---
 huffman.py | 118 ++++++++++++++++++++++++++++++-----------------------
 stegano.py |  58 ++++++++++++++++----------
 2 files changed, 103 insertions(+), 73 deletions(-)

diff --git a/huffman.py b/huffman.py
index d1f904f..f2975ab 100644
--- a/huffman.py
+++ b/huffman.py
@@ -1,25 +1,4 @@
-data = "Lorem Ipsum is simply dummy text of the printing and typesetting industry."
-size = len(data) * 8
-print("Initial data size: {} bits\n".format(size))
-
-frequency = []
-characters = []
-for symbol in data:
-    if symbol not in frequency:
-        frequency.append(data.count(symbol))
-        frequency.append(symbol)
-        characters.append(symbol)
-
-nodes = []
-while len(frequency) > 0:
-    nodes.append(frequency[0:2])
-    frequency = frequency[2:]
-
-nodes.sort()
-huff = []
-huff.append(nodes)
-
-def huffman_tree(nodes):
+def huffman_tree(nodes, huff):
     pos = 0
     newnode = []
     if len(nodes) > 1:
@@ -35,28 +14,11 @@ def huffman_tree(nodes):
         newnodes = newnodes + nodes[2:]
         nodes = newnodes
         huff.append(nodes)
-        huffman_tree(nodes)
+        huffman_tree(nodes, huff)
     return huff
 
-newnodes = huffman_tree(nodes)
 
-huff.sort(reverse=True)
-
-checklist = []
-for level in huff:
-    for node in level:
-        if node not in checklist:
-            checklist.append(node)
-        else:
-            level.remove(node)
-
-count = 0
-for level in huff:
-    print("Level", count, ":", level)
-    count += 1
-print()
-
-def encode(characters, checklist, data):
+def huff_encode(characters, checklist, data):
     char_bin = []
     if len(characters) == 1:
         char_code = [characters[0], "0"]
@@ -77,15 +39,8 @@ def encode(characters, checklist, data):
                 bitstring += item[1]
     return bitstring, char_bin
 
-encoded_data, character_binary = encode(characters, checklist, data)
-
-for item in character_binary:
-    print(item[0], item[1])
-
-print("\nEncoded Data: {}\n".format(encoded_data))
-print("Compressed data size: {} bits\n".format(len(encoded_data)))
 
-def decode(enc_data, character_binary):
+def huff_decode(enc_data, character_binary):
     uncompressed_data = ""
     code = ""
     for bit in enc_data:
@@ -98,7 +53,66 @@ def decode(enc_data, character_binary):
             pos += 1
     return uncompressed_data
 
-decoded_data = decode(encoded_data, character_binary)
 
-print("Original data:", decoded_data)
-print("Original data size: {} bits".format(len(decoded_data) * 8))
\ No newline at end of file
+def huffman(data):
+    size = len(data) * 8
+    print("Initial data size: {} Kb\n".format(size / 1000))
+
+    frequency = []
+    characters = []
+    for symbol in data:
+        if symbol not in frequency:
+            frequency.append(data.count(symbol))
+            frequency.append(symbol)
+            characters.append(symbol)
+
+    nodes = []
+    while len(frequency) > 0:
+        nodes.append(frequency[0:2])
+        frequency = frequency[2:]
+
+    nodes.sort()
+    huff = []
+    huff.append(nodes)
+    
+    newnodes = huffman_tree(nodes, huff)
+    huff.sort(reverse=True)
+    
+    checklist = []
+    for level in huff:
+        for node in level:
+            if node not in checklist:
+                checklist.append(node)
+            else:
+                level.remove(node)
+
+    # print Huffman Tree
+    # count = 0
+    # for level in huff:
+    #     print("Level", count, ":", level)
+    #     count += 1
+    # print()
+    
+    encoded_data, character_binary = huff_encode(characters, checklist, data)
+    
+    # print character codes
+    # print("Character\tCode")
+    # for item in character_binary:
+    #     print("{}\t\t{}".format(item[0], item[1]))
+
+    # print("\nCompressed Data: {}\n".format(encoded_data))
+    print("Compressed data size: {} Kb\n".format(len(encoded_data) / 1000))
+    compression = round((100 - (len(encoded_data) / size) * 100), 2)
+    print("Compression: {}%\n".format(compression))
+    
+    return encoded_data, character_binary
+
+    # decoded_data = huff_decode(encoded_data, character_binary)
+
+    # print("Original data:", decoded_data)
+    # print("Original data size: {} Kb".format((len(decoded_data) * 8) / 1000))
+
+
+# data = "Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum."
+
+# huffman(data)
\ No newline at end of file
diff --git a/stegano.py b/stegano.py
index fbe4840..8b9a77e 100644
--- a/stegano.py
+++ b/stegano.py
@@ -1,6 +1,9 @@
 from PIL import Image
 import numpy as np
 
+from huffman import huffman
+from huffman import huff_decode
+
 def image_load(infilename):
     img = Image.open(infilename)
     img.load()
@@ -31,28 +34,29 @@ def image_create(img):
 
 def encode(img, data):
     img_new = []
-    data_c = 0
+    # data_c = 0
+    k = 0
 
     for i in range(len(img)):
         img_new.append([])
         for j in range(len(img[i])):
             t = img[i][j]
-            # print(t)
-            if data_c < len(data):
+            if k < len(data):
                 # print(t)
                 # print(data[data_c], int(data[data_c], 2), chr(int(data[data_c], 2)))
-                t[0] = t[0][:-3] + data[data_c][0:3]
-                t[1] = t[1][:-3] + data[data_c][3:6]
-                t[2] = t[2][:-2] + data[data_c][6:8]
+                t[0] = t[0][:-3] + data[k+0:k+3]
+                t[1] = t[1][:-3] + data[k+3:k+6]
+                t[2] = t[2][:-3] + data[k+6:k+9]
                 # print(data[data_c][0:3], data[data_c][3:6], data[data_c][6:8])
                 # print(t)
                 # print('-'*50)
-                data_c += 1
-            elif data_c == len(data):
-                t[0] = t[0][0:-3] + '111'
-                t[1] = t[1][0:-3] + '111'
-                t[2] = t[2][0:-2] + '11'
-                data_c += 1
+                # data_c += 1
+                k += 9
+            elif k == len(data):
+                t[0] = t[0][0:-4] + '1111'
+                t[1] = t[1][0:-4] + '1111'
+                t[2] = t[2][0:-4] + '1111'
+                k += 1
 
             img_new[i].append(t)
     return img_new
@@ -61,13 +65,16 @@ def decode(img):
     data = ''
     for i in range(len(img)):
         for j in range(len(img[i])):
+            t = img[i][j]
             t = [np.binary_repr(k, width=8) for k in img[i][j]]
-            d = t[0][-3:] + t[1][-3:] + t[2][-2:]
-            if d == '11111111':
+            last = t[0][-4:] + t[1][-4:] + t[2][-4:]
+            if last == '111111111111':
                 return data
-            e = int(d, 2)
-            f = chr(e)
-            data += f
+            d = t[0][-3:] + t[1][-3:] + t[2][-3:]
+            data += d
+            # e = int(d, 2)
+            # f = chr(e)
+            # data += f
             # print(t)
             # print(t[0][-3:], t[1][-3:], t[2][-2:])
             # print(d, e, f)
@@ -86,7 +93,13 @@ def decode(img):
 with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop
 publishing software like Aldus PageMaker including versions of Lorem Ipsum."""
 # data = "lets make some dummy data"
-data_binary = [np.binary_repr(ord(i), width=8) for i in data]
+
+# data_binary = [np.binary_repr(ord(i), width=8) for i in data]
+data_binary, char_binary = huffman(data)
+padding = 9 - (len(data_binary) % 9)
+for i in range(padding):
+    data_binary += '0'
+# print(len(data_binary))
 
 # img_encoded = image_create(encode(img_bin, data_binary))
 img_encoded = encode(img_bin, data_binary)
@@ -97,8 +110,8 @@ def decode(img):
 image_save(img_crt, './images/output.png')
 img_ip = image_load('./images/output.png')
 
-i=0
-j=0
+# i=0
+# j=0
 
 # print(original[i][j])
 # print([np.binary_repr(i, width=8) for i in original[i][j]])
@@ -108,4 +121,7 @@ def decode(img):
 
 # data_decoded = decode(image_load('./images/output-new.jpg'))
 data_decoded = decode(img_ip)
-print(data_decoded)
+data_decoded = data_decoded[:-padding]
+uncompressed_data = huff_decode(data_decoded, char_binary)
+print("Decoded data:", uncompressed_data)
+print("\nDecoded data size: {} Kb".format((len(uncompressed_data) * 8) / 1000))
\ No newline at end of file