From 08b6e6550a2a6ade84a24a2ea37a09f2b834a603 Mon Sep 17 00:00:00 2001
From: Jacob Theisen <jacobth@stud.ntnu.no>
Date: Tue, 9 Nov 2021 14:28:35 +0100
Subject: [PATCH] =?UTF-8?q?kj=C3=B8r?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ov8/huffman.py    | 133 +++++++++++++++++++++++++++++++++++++++-------
 ov8/trash/test    | Bin 6 -> 4 bytes
 ov8/trash/test.py |  22 +++++++-
 3 files changed, 135 insertions(+), 20 deletions(-)

diff --git a/ov8/huffman.py b/ov8/huffman.py
index a43bcb6..1c4096c 100644
--- a/ov8/huffman.py
+++ b/ov8/huffman.py
@@ -1,3 +1,59 @@
+
+
+
+with open('./compressed_text','rb') as file:
+    tekst = file.read()
+
+class Table_object(object):
+    def __init__(self, char, freq):
+        self.char = char
+        self.freq = freq
+
+all_lz_bytes = []
+strings_array= []
+def dcomp():
+    i = 0
+    dcomp_string = ''
+    togle_not_bytes = False
+    while i < (len(tekst)):
+        if togle_not_bytes:
+            dcomp_string += tekst[i:i+num].decode('utf-8')
+            strings_array.append(tekst[i:i+num].decode('utf-8'))
+            togle_not_bytes = False
+            if i + num + 4 < len(tekst):
+                all_lz_bytes.append(tekst[i + num: i + num + 2])
+                all_lz_bytes.append(tekst[i + num + 2: i + num + 4])
+            i += num + 4
+        else:
+            value = tekst[i:i+2]
+            value = int.from_bytes(value, "little")
+            togle_not_bytes = True
+            num = value
+            all_lz_bytes.append(tekst[i:i+2])
+            i+=2
+        
+    return dcomp_string
+
+all_strings = dcomp()
+print(all_strings)
+print(strings_array)
+print(all_lz_bytes)
+print(tekst )
+
+
+
+table = []
+
+for i in all_strings:
+    a = [a for a in table if a.char == i]
+    if a:
+        a[0].freq +=1 
+    else:
+        table.append(Table_object(i, 1))
+
+table = sorted(table,key=lambda x: x.freq)
+
+
 # A Huffman Tree Node
 class node:
 	def __init__(self, freq, symbol, left=None, right=None):
@@ -17,43 +73,40 @@ class node:
 		self.huff = ''
 
 
+rep_tree_arr = []
+class rep_tree():
+    def __init__(self, symb, val):
+        self.symb = symb
+        self.val = val
 
 
 def printNodes(node, val=''):
 	# huffman code for current node
-	newVal = val + str(node.huff)
+    newVal = val + str(node.huff)
 
 	# if node is not an edge node
 	# then traverse inside it
-	if(node.left):
-		printNodes(node.left, newVal)
-	if(node.right):
-		printNodes(node.right, newVal)
+    if(node.left):
+        printNodes(node.left, newVal)   
+    if(node.right):
+        printNodes(node.right, newVal)
 
 		# if node is edge node then
 		# display its huffman code
-	if(not node.left and not node.right):
-		print(f"{node.symbol} -> {newVal}")
+    if(not node.left and not node.right):
+        rep_tree_arr.append(rep_tree(node.symbol, newVal))
 
 
-# characters for huffman tree
-chars = ['a', 'b', 'c', 'd', 'e', 'f']
 
-# frequency of characters
-freq = [ 5, 9, 12, 13, 16, 45]
 
 # list containing unused nodes
 nodes = []
 
-# converting characters and frequencies
-# into huffman tree nodes
-for x in range(len(chars)):
-	nodes.append(node(freq[x], chars[x]))
+for j in table:
+	nodes.append(node(j.freq, j.char))
 
 while len(nodes) > 1:
-	# sort all the nodes in ascending order
-	# based on theri frequency
-	nodes = sorted(nodes, key=lambda x: x.freq)
+
 
 	# pick 2 smallest nodes
 	left = nodes[0]
@@ -75,3 +128,47 @@ while len(nodes) > 1:
 
 # Huffman Tree is ready!
 printNodes(nodes[0])
+
+
+bit_string = ''
+		
+
+
+for i in all_strings:
+    a = [a for a in rep_tree_arr if a.symb == i]
+    bit_string += a[0].val
+
+
+
+if len(bit_string)%8 != 0:
+    rest = 8 - len(bit_string)%8
+    bit_string += '0' * rest
+
+print(bit_string)
+
+bytes = []
+
+for i in range(0 ,len(bit_string), 8):
+    num = int(bit_string[i:i+8], 2)
+    print(num)
+    bytes.append(num.to_bytes(1,'little'))
+
+print(bytes)
+num_of_bytes = len(bytes)
+
+with open('./trash/huffmann_test_file_comp', 'w') as file: 
+    for i in rep_tree_arr:
+        file.write(f'{i.symb} {i.val}')
+    file.write(f'{rest}')
+    file.write(f'{num_of_bytes}')
+
+with open('./trash/huffmann_test_file_comp', 'ab') as file: 
+    for i in bytes:
+        file.write(i)
+
+
+
+with open('./trash/huffmann_test_file_comp', 'ab') as file: 
+    for i in all_lz_bytes:
+        file.write(i)
+
diff --git a/ov8/trash/test b/ov8/trash/test
index 1c5e7527bb2aa26ae3227c6445eb6aa5cbab321c..a4da33844876f640630e6ce15cf3b448ce581e32 100644
GIT binary patch
literal 4
LcmXpsGX4($0>c58

literal 6
NcmeZcNK8813IGO=0$Kn7

diff --git a/ov8/trash/test.py b/ov8/trash/test.py
index e769aee..1938684 100644
--- a/ov8/trash/test.py
+++ b/ov8/trash/test.py
@@ -107,7 +107,25 @@ for i in a:
     else:
         print("nor")'''
 
-
+'''
 with open('../opg8-2021.pdf') as file:
-    print(file.read())
+    print(file.read())'''
+
+import array 
+
+
+bits_array = array.array('B')
+bits_array.append(int('110001', 2))
+bits_array.append(int('110010', 2))
+bits_array.append(int('110011', 2)) 
+bits_array.append(int('11111111', 2))
+
+print(bits_array)
+with open('./test', 'w') as file:
+    file.write('')
+
+with open('./test', 'ab') as file:
+    file.write(bits_array)
 
+with open('./test', "rb") as file:
+    print(file.read())
\ No newline at end of file
-- 
GitLab