-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhuffman.py
104 lines (86 loc) · 3.19 KB
/
huffman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
class Nodes:
def __init__(self, probability, symbol, left=None, right=None):
self.probability = probability
self.symbol = symbol
self.left = left
self.right = right
self.code = ''
def CalculateProbability(the_data):
the_symbols = dict()
for item in the_data:
if the_symbols.get(item) == None:
the_symbols[item] = 1
else:
the_symbols[item] += 1
return the_symbols
the_codes = dict()
def CalculateCodes(node, value=''):
newValue = value + str(node.code)
if (node.left):
CalculateCodes(node.left, newValue)
if (node.right):
CalculateCodes(node.right, newValue)
if (not node.left and not node.right):
the_codes[node.symbol] = newValue
return the_codes
def OutputEncoded(the_data, coding):
encodingOutput = []
for element in the_data:
# print(coding[element], end = '')
encodingOutput.append(coding[element])
the_string = ''.join([str(item) for item in encodingOutput])
return the_string
def TotalGain(the_data, coding):
beforeCompression = len(the_data) * 8
afterCompression = 0
the_symbols = coding.keys()
for symbol in the_symbols:
the_count = the_data.count(symbol)
afterCompression += the_count * len(coding[symbol])
print("Space usage before compression (in bits):", beforeCompression)
print("Space usage after compression (in bits):", afterCompression)
def HuffmanEncoding(the_data):
symbolWithProbs = CalculateProbability(the_data)
the_symbols = symbolWithProbs.keys()
the_probabilities = symbolWithProbs.values()
print("symbols: ", the_symbols)
print("probabilities: ", the_probabilities)
the_nodes = []
for symbol in the_symbols:
the_nodes.append(Nodes(symbolWithProbs.get(symbol), symbol))
while len(the_nodes) > 1:
the_nodes = sorted(the_nodes, key=lambda x: x.probability)
right = the_nodes[0]
left = the_nodes[1]
left.code = 0
right.code = 1
newNode = Nodes(left.probability + right.probability, left.symbol + right.symbol, left, right)
the_nodes.remove(left)
the_nodes.remove(right)
the_nodes.append(newNode)
huffmanEncoding = CalculateCodes(the_nodes[0])
print("symbols with codes", huffmanEncoding)
TotalGain(the_data, huffmanEncoding)
encodedOutput = OutputEncoded(the_data, huffmanEncoding)
return encodedOutput, the_nodes[0]
def HuffmanDecoding(encodedData, huffmanTree):
treeHead = huffmanTree
decodedOutput = []
for x in encodedData:
if x == '1':
huffmanTree = huffmanTree.right
elif x == '0':
huffmanTree = huffmanTree.left
try:
if huffmanTree.left.symbol == None and huffmanTree.right.symbol == None:
pass
except AttributeError:
decodedOutput.append(huffmanTree.symbol)
huffmanTree = treeHead
string = ''.join([str(item) for item in decodedOutput])
return string
the_data = "AAAAAAABBCCCCCCDDDEEEEEEEEE"
print(the_data)
encoding, the_tree = HuffmanEncoding(the_data)
print("Encoded output", encoding)
print("Decoded Output", HuffmanDecoding(encoding, the_tree))