-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_generator_using_simhash.py
77 lines (62 loc) · 2.67 KB
/
image_generator_using_simhash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
from PIL import Image
import hash_generator
import os
class SimHashGenerator:
def __init__(self, opcodes, hash_function):
"""
Initialize SimHashGenerator with an opcodes list and a hash function.
Parameters:
opcodes (list): The opcodes to be hashed.
hash_function (callable): The hash function to be used for generating the SimHash value.
"""
self.opcodes = opcodes # Store the list of opcodes.
self.hash_function = hash_function # Store the hash function reference.
# Function to generate SimHash
def simhash(opcodes_seq, n=256):
v = [0] * n # Initialize n-bit vector
# Iterate over each opcode
for opcode in opcodes_seq:
b = hash_generator.sha256_hash(opcode)
# Update vector based on hash bits
for i in range(n):
if b[i] == '1':
v[i] += 1
else:
v[i] -= 1
# Generate SimHash value
simhash_value = ''.join(['1' if x > 0 else '0' for x in v])
return simhash_value
# Function to convert SimHash to image
def simhash_to_image(simhash_value, size=(16, 16)):
# Convert each bit (0 or 1) to pixel value (0 or 255)
pixels = [255 if bit == '1' else 0 for bit in simhash_value]
# Convert to a 16x16 NumPy array
pixel_matrix = np.array(pixels).reshape(size)
# Create image from the pixel matrix
img = Image.fromarray(np.uint8(pixel_matrix), 'L') # 'L' for grayscale
return img
# Function to read opcodes from a file
def read_opcodes(filename):
with open(filename, 'r') as file:
content = file.read()
opcodes_seq = content.split(',')
return opcodes_seq
input_directory_path = "/home/kourosh/Source/Data/malware_detect2/code_section_opcodes/train/Mediyes"
output_directory_path = "/home/kourosh/Source/Data/malware_detect2/malware_images/train/Mediyes"
for file_name in os.listdir(input_directory_path):
file_path = os.path.join(input_directory_path, file_name)
opcodes_seq = read_opcodes(file_path) # List of opcode strings
simhash_value = simhash(opcodes_seq)
print(f"Calculated SimHash Value for the file {file_name}.")
# print(f"SimHash Value: {simhash_value}")
image = simhash_to_image(simhash_value)
output_image_path = os.path.join(output_directory_path, f"{file_name}.png")
image.save(output_image_path)
print(f"Saved the generated image of the file {file_name}.")
# # Scale the image to 128x128 using bilinear interpolation
# scaled_image = image.resize((32, 32), Image.BILINEAR)
# # Show the scaled image
# scaled_image.show()
# # Optionally, save the scaled image
# scaled_image.save("scaled_malware_image.png")