Skip to content
Snippets Groups Projects
Commit 6d464d7c authored by Jacob Theisen's avatar Jacob Theisen
Browse files

cleaning

parent 17d257d1
No related branches found
No related tags found
No related merge requests found
/ov8/trash
/ov8/test_files/enwik8
/ov8/test_files/opg8-2021.pdf
import string import string
with open('./fil.txt') as file: with open('./test_files/fil.txt') as file:
tekst = file.read() tekst = file.read()
...@@ -73,10 +73,10 @@ def run_comp(): ...@@ -73,10 +73,10 @@ def run_comp():
#run func and print result #run func and print result
a = run_comp() a = run_comp()
with open('./compresed', 'w') as file: with open('./compressed_text', 'w') as file:
file.write('') file.write('')
print(a) #print(a)
i = 0 i = 0
'''a = a.replace('å', 'a') '''a = a.replace('å', 'a')
a = a.replace('ø', 'o') a = a.replace('ø', 'o')
...@@ -131,10 +131,10 @@ while True: ...@@ -131,10 +131,10 @@ while True:
if a[i+len_of_int] == ']': if a[i+len_of_int] == ']':
break break
len_of_int +=1 len_of_int +=1
with open('./compresed', 'ab') as file: with open('./compressed_text', 'ab') as file:
int1 = int(a[i:i+len_of_int]) int1 = int(a[i:i+len_of_int])
file.write(int1.to_bytes(2,'little')) file.write(int1.to_bytes(2,'little'))
with open('./compresed', 'a') as file: with open('./compressed_text', 'a') as file:
#print(a[i + len_of_int+1:i + len_of_int+1+int1]) #print(a[i + len_of_int+1:i + len_of_int+1+int1])
for j in a[i + len_of_int+1:i + len_of_int+1+int1]: for j in a[i + len_of_int+1:i + len_of_int+1+int1]:
if j not in string.printable and j not in extended_string: if j not in string.printable and j not in extended_string:
...@@ -163,7 +163,7 @@ while True: ...@@ -163,7 +163,7 @@ while True:
break break
len_of_backtrack +=1 len_of_backtrack +=1
len_of_backtrack_value = int(a[i:len_of_backtrack]) len_of_backtrack_value = int(a[i:len_of_backtrack])
with open('./compresed', 'ab') as file: with open('./compressed_text', 'ab') as file:
file.write(backtrack_value.to_bytes(2,'little')) file.write(backtrack_value.to_bytes(2,'little'))
file.write(len_of_backtrack_value.to_bytes(2,'little')) file.write(len_of_backtrack_value.to_bytes(2,'little'))
togle_append_clear_text = True togle_append_clear_text = True
...@@ -175,7 +175,7 @@ while True: ...@@ -175,7 +175,7 @@ while True:
with open('./compresed', 'rb') as file: #with open('./compressed_text', 'rb') as file:
print(file.read()) # print(file.read())
\ No newline at end of file
with open('./compresed', 'rb') as file: with open('./compressed_text', 'rb') as file:
tekst = file.read() tekst = file.read()
...@@ -34,7 +34,8 @@ def dcomp(): ...@@ -34,7 +34,8 @@ def dcomp():
string = dcomp() string = dcomp()
with open('./uncomp', 'w') as file: with open('./uncompressed_text', 'w') as file:
file.write(string) file.write(string)
File moved
import PyPDF2
pdfFileObj = open('opg8-2021.pdf', 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
pages = pdfReader.numPages
tekst = ''
for i in range(pages):
page = pdfReader.getPage(i)
tekst += page.extractText()
print(tekst)
# A Huffman Tree Node
class node:
def __init__(self, freq, symbol, left=None, right=None):
# frequency of symbol
self.freq = freq
# symbol name (character)
self.symbol = symbol
# node left of current node
self.left = left
# node right of current node
self.right = right
# tree direction (0/1)
self.huff = ''
# utility function to print huffman
# codes for all symbols in the newly
# created Huffman tree
def printNodes(node, val=''):
# huffman code for current node
newVal = val + str(node.huff)
# if node is not an edge node
# then traverse inside it
if(node.left):
printNodes(node.left, newVal)
if(node.right):
printNodes(node.right, newVal)
# if node is edge node then
# display its huffman code
if(not node.left and not node.right):
print(f"{node.symbol} -> {newVal}")
# characters for huffman tree
chars = ['a', 'b', 'c', 'd', 'e', 'f']
# frequency of characters
freq = [ 5, 9, 12, 13, 16, 45]
# list containing unused nodes
nodes = []
# converting characters and frequencies
# into huffman tree nodes
for x in range(len(chars)):
nodes.append(node(freq[x], chars[x]))
while len(nodes) > 1:
# sort all the nodes in ascending order
# based on theri frequency
nodes = sorted(nodes, key=lambda x: x.freq)
# pick 2 smallest nodes
left = nodes[0]
right = nodes[1]
# assign directional value to these nodes
left.huff = 0
right.huff = 1
# combine the 2 smallest nodes to create
# new node as their parent
newNode = node(left.freq+right.freq, left.symbol+right.symbol, left, right)
# remove the 2 nodes and add their
# parent as new node among others
nodes.remove(left)
nodes.remove(right)
nodes.append(newNode)
# Huffman Tree is ready!
printNodes(nodes[0])
File deleted
File moved
...@@ -92,12 +92,12 @@ for i in tekst: ...@@ -92,12 +92,12 @@ for i in tekst:
str += i[1] str += i[1]
print(str)''' print(str)'''
extended_string = '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' '''extended_string = '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
print(string.printable) print(string.printable)
print(extended_string) print(extended_string)
'''
a = "ø" '''a = "ø"
for i in a: for i in a:
if i in string.printable: if i in string.printable:
...@@ -105,4 +105,9 @@ for i in a: ...@@ -105,4 +105,9 @@ for i in a:
elif i in extended_string: elif i in extended_string:
print("not ascii") print("not ascii")
else: else:
print("nor") print("nor")'''
\ No newline at end of file
with open('../opg8-2021.pdf') as file:
print(file.read())
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment