From 08b6e6550a2a6ade84a24a2ea37a09f2b834a603 Mon Sep 17 00:00:00 2001 From: Jacob Theisen <jacobth@stud.ntnu.no> Date: Tue, 9 Nov 2021 14:28:35 +0100 Subject: [PATCH] =?UTF-8?q?kj=C3=B8r?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ov8/huffman.py | 133 +++++++++++++++++++++++++++++++++++++++------- ov8/trash/test | Bin 6 -> 4 bytes ov8/trash/test.py | 22 +++++++- 3 files changed, 135 insertions(+), 20 deletions(-) diff --git a/ov8/huffman.py b/ov8/huffman.py index a43bcb6..1c4096c 100644 --- a/ov8/huffman.py +++ b/ov8/huffman.py @@ -1,3 +1,59 @@ + + + +with open('./compressed_text','rb') as file: + tekst = file.read() + +class Table_object(object): + def __init__(self, char, freq): + self.char = char + self.freq = freq + +all_lz_bytes = [] +strings_array= [] +def dcomp(): + i = 0 + dcomp_string = '' + togle_not_bytes = False + while i < (len(tekst)): + if togle_not_bytes: + dcomp_string += tekst[i:i+num].decode('utf-8') + strings_array.append(tekst[i:i+num].decode('utf-8')) + togle_not_bytes = False + if i + num + 4 < len(tekst): + all_lz_bytes.append(tekst[i + num: i + num + 2]) + all_lz_bytes.append(tekst[i + num + 2: i + num + 4]) + i += num + 4 + else: + value = tekst[i:i+2] + value = int.from_bytes(value, "little") + togle_not_bytes = True + num = value + all_lz_bytes.append(tekst[i:i+2]) + i+=2 + + return dcomp_string + +all_strings = dcomp() +print(all_strings) +print(strings_array) +print(all_lz_bytes) +print(tekst ) + + + +table = [] + +for i in all_strings: + a = [a for a in table if a.char == i] + if a: + a[0].freq +=1 + else: + table.append(Table_object(i, 1)) + +table = sorted(table,key=lambda x: x.freq) + + # A Huffman Tree Node class node: def __init__(self, freq, symbol, left=None, right=None): @@ -17,43 +73,40 @@ class node: self.huff = '' +rep_tree_arr = [] +class rep_tree(): + def __init__(self, symb, val): + self.symb = symb + self.val = val def printNodes(node, val=''): # huffman code for current node - newVal = val + str(node.huff) + newVal = val + str(node.huff) # if node is not an edge node # then traverse inside it - if(node.left): - printNodes(node.left, newVal) - if(node.right): - printNodes(node.right, newVal) + if(node.left): + printNodes(node.left, newVal) + if(node.right): + printNodes(node.right, newVal) # if node is edge node then # display its huffman code - if(not node.left and not node.right): - print(f"{node.symbol} -> {newVal}") + if(not node.left and not node.right): + rep_tree_arr.append(rep_tree(node.symbol, newVal)) -# characters for huffman tree -chars = ['a', 'b', 'c', 'd', 'e', 'f'] -# frequency of characters -freq = [ 5, 9, 12, 13, 16, 45] # list containing unused nodes nodes = [] -# converting characters and frequencies -# into huffman tree nodes -for x in range(len(chars)): - nodes.append(node(freq[x], chars[x])) +for j in table: + nodes.append(node(j.freq, j.char)) while len(nodes) > 1: - # sort all the nodes in ascending order - # based on theri frequency - nodes = sorted(nodes, key=lambda x: x.freq) + # pick 2 smallest nodes left = nodes[0] @@ -75,3 +128,47 @@ while len(nodes) > 1: # Huffman Tree is ready! printNodes(nodes[0]) + + +bit_string = '' + + + +for i in all_strings: + a = [a for a in rep_tree_arr if a.symb == i] + bit_string += a[0].val + + + +if len(bit_string)%8 != 0: + rest = 8 - len(bit_string)%8 + bit_string += '0' * rest + +print(bit_string) + +bytes = [] + +for i in range(0 ,len(bit_string), 8): + num = int(bit_string[i:i+8], 2) + print(num) + bytes.append(num.to_bytes(1,'little')) + +print(bytes) +num_of_bytes = len(bytes) + +with open('./trash/huffmann_test_file_comp', 'w') as file: + for i in rep_tree_arr: + file.write(f'{i.symb} {i.val}') + file.write(f'{rest}') + file.write(f'{num_of_bytes}') + +with open('./trash/huffmann_test_file_comp', 'ab') as file: + for i in bytes: + file.write(i) + + + +with open('./trash/huffmann_test_file_comp', 'ab') as file: + for i in all_lz_bytes: + file.write(i) + diff --git a/ov8/trash/test b/ov8/trash/test index 1c5e7527bb2aa26ae3227c6445eb6aa5cbab321c..a4da33844876f640630e6ce15cf3b448ce581e32 100644 GIT binary patch literal 4 LcmXpsGX4($0>c58 literal 6 NcmeZcNK8813IGO=0$Kn7 diff --git a/ov8/trash/test.py b/ov8/trash/test.py index e769aee..1938684 100644 --- a/ov8/trash/test.py +++ b/ov8/trash/test.py @@ -107,7 +107,25 @@ for i in a: else: print("nor")''' - +''' with open('../opg8-2021.pdf') as file: - print(file.read()) + print(file.read())''' + +import array + + +bits_array = array.array('B') +bits_array.append(int('110001', 2)) +bits_array.append(int('110010', 2)) +bits_array.append(int('110011', 2)) +bits_array.append(int('11111111', 2)) + +print(bits_array) +with open('./test', 'w') as file: + file.write('') + +with open('./test', 'ab') as file: + file.write(bits_array) +with open('./test', "rb") as file: + print(file.read()) \ No newline at end of file -- GitLab