cleaning

6d464d7c · Jacob Theisen · 17d257d1 · 6d464d7c · 6d464d7c · 6d464d7c
Commit 6d464d7c authored 3 years ago by Jacob Theisen
--- a/.gitignore
+++ b/.gitignore
+/ov8/trash 
+/ov8/test_files/enwik8 
+/ov8/test_files/opg8-2021.pdf
--- a/ov8/comp.py
+++ b/ov8/comp.py
 import string 
-with open('./fil.txt') as file:
+with open('./test_files/fil.txt') as file:
    tekst = file.read()
@@ -73,10 +73,10 @@ def run_comp():
 #run func and print result
 a = run_comp()
-with open('./compresed', 'w') as file:
+with open('./compressed_text', 'w') as file:
    file.write('')
-print(a)
+#print(a)
 i = 0
 '''a = a.replace('å', 'a')
 a = a.replace('ø', 'o')
@@ -131,10 +131,10 @@ while True:
            if a[i+len_of_int] == ']':
                break
            len_of_int +=1
-        with open('./compresed', 'ab') as file:
+        with open('./compressed_text', 'ab') as file:
            int1 = int(a[i:i+len_of_int])
            file.write(int1.to_bytes(2,'little'))
-        with open('./compresed', 'a') as file:
+        with open('./compressed_text', 'a') as file:
            #print(a[i + len_of_int+1:i + len_of_int+1+int1])
            for j in a[i + len_of_int+1:i + len_of_int+1+int1]:
                if j not in string.printable and j not in extended_string:
@@ -163,7 +163,7 @@ while True:
                break
            len_of_backtrack +=1
        len_of_backtrack_value = int(a[i:len_of_backtrack])
-        with open('./compresed', 'ab') as file:
+        with open('./compressed_text', 'ab') as file:
            file.write(backtrack_value.to_bytes(2,'little'))
            file.write(len_of_backtrack_value.to_bytes(2,'little'))
        togle_append_clear_text = True
@@ -175,7 +175,7 @@ while True:
-with open('./compresed', 'rb') as file:
+#with open('./compressed_text', 'rb') as file:
-    print(file.read())
+#    print(file.read())
\ No newline at end of file
--- a/ov8/dcomp.py
+++ b/ov8/dcomp.py
-with open('./compresed', 'rb') as file:
+with open('./compressed_text', 'rb') as file:
    tekst = file.read()
@@ -34,7 +34,8 @@ def dcomp():
 string = dcomp()
-with open('./uncomp', 'w') as file:
+with open('./uncompressed_text', 'w') as file:
    file.write(string)
--- a/ov8/compresed
+++ b/ov8/compresed
--- a/ov8/halla.py
+++ b/ov8/halla.py
-import PyPDF2
-pdfFileObj = open('opg8-2021.pdf', 'rb')
-pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
-pages = pdfReader.numPages
-tekst = ''
-for i in range(pages):
-    page = pdfReader.getPage(i)
-    tekst += page.extractText()
-print(tekst)
--- a/ov8/huffman.py
+++ b/ov8/huffman.py
+# A Huffman Tree Node
+class node:
+	def __init__(self, freq, symbol, left=None, right=None):
+		# frequency of symbol
+		self.freq = freq
+		# symbol name (character)
+		self.symbol = symbol
+		# node left of current node
+		self.left = left
+		# node right of current node
+		self.right = right
+		# tree direction (0/1)
+		self.huff = ''
+# utility function to print huffman
+# codes for all symbols in the newly
+# created Huffman tree
+def printNodes(node, val=''):
+	# huffman code for current node
+	newVal = val + str(node.huff)
+	# if node is not an edge node
+	# then traverse inside it
+	if(node.left):
+		printNodes(node.left, newVal)
+	if(node.right):
+		printNodes(node.right, newVal)
+		# if node is edge node then
+		# display its huffman code
+	if(not node.left and not node.right):
+		print(f"{node.symbol} -> {newVal}")
+# characters for huffman tree
+chars = ['a', 'b', 'c', 'd', 'e', 'f']
+# frequency of characters
+freq = [ 5, 9, 12, 13, 16, 45]
+# list containing unused nodes
+nodes = []
+# converting characters and frequencies
+# into huffman tree nodes
+for x in range(len(chars)):
+	nodes.append(node(freq[x], chars[x]))
+while len(nodes) > 1:
+	# sort all the nodes in ascending order
+	# based on theri frequency
+	nodes = sorted(nodes, key=lambda x: x.freq)
+	# pick 2 smallest nodes
+	left = nodes[0]
+	right = nodes[1]
+	# assign directional value to these nodes
+	left.huff = 0
+	right.huff = 1
+	# combine the 2 smallest nodes to create
+	# new node as their parent
+	newNode = node(left.freq+right.freq, left.symbol+right.symbol, left, right)
+	# remove the 2 nodes and add their
+	# parent as new node among others
+	nodes.remove(left)
+	nodes.remove(right)
+	nodes.append(newNode)
+# Huffman Tree is ready!
+printNodes(nodes[0])
--- a/ov8/opg8-2021.pdf
+++ b/ov8/opg8-2021.pdf
--- a/ov8/fil.txt
+++ b/ov8/fil.txt
--- a/ov8/trash/test.py
+++ b/ov8/trash/test.py
@@ -92,12 +92,12 @@ for i in tekst:
    str += i[1]
 print(str)'''
-extended_string = '¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+'''extended_string = '¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
 print(string.printable)
 print(extended_string)
+'''
-a = "ø"
+'''a = "ø"
 for i in a:
    if i in string.printable:
@@ -105,4 +105,9 @@ for i in a:
    elif i in extended_string:
        print("not ascii")
    else:
-        print("nor")
+        print("nor")'''
\ No newline at end of file
+with open('../opg8-2021.pdf') as file:
+    print(file.read())
--- a/ov8/uncomp
+++ b/ov8/uncomp