I am trying to produce a bigram of hexdump of a malware file which will help me to relate with the different malware files based on bigram, I am trying to use counter, zip, and slice to get the result but instead getting an error. I would be glad if someone can help me out.
import binascii
import re
import collections
try:
from itertools import izip as zip
except ImportError: # will be 3.x series
pass
try:
from itertools import islice as slice
except ImportError: # will be 3.x series
pass
with open('path', 'rb') as f:
for chunk in iter(lambda: f.read(), b''):
s=binascii.hexlify(chunk)
print(collections.Counter(zip(s),slice(s,1,None)))
The result should be like:Counter({(4d5a):200,(5a76):120,(7635):1000...}) but instead i am getting this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-110-d99ed11a1260> in <module>
3 for chunk in iter(lambda: f.read(), b''):
4 s=binascii.hexlify(chunk)
----> 5 print(collections.Counter(zip(s),slice(s,1,None)))
6
~\Anaconda3\lib\collections\__init__.py in __init__(*args, **kwds)
562 self, *args = args
563 if len(args) > 1:
--> 564 raise TypeError('expected at most 1 arguments, got %d' % len(args))
565 super(Counter, self).__init__()
566 self.update(*args, **kwds)
TypeError: expected at most 1 arguments, got 2
import binascii
import collections
import pathlib
malware = pathlib.Path().home().joinpath('Desktop').joinpath('Malware').joinpath('HWID_4_0_6YMBWX.exe')
malware.exists()
with open(malware, 'rb') as fh:
data = fh.read()
def find_ngrams(data, n):
s = binascii.hexlify(data).decode()
return zip(*[s[i:] for i in range(n)])
x = find_ngrams(data, 2)
output = dict()
for ngram, count in collections.Counter(x).items():
output[''.join(ngram)] = count
i = sorted(output.items(), key=lambda x: x[1], reverse=True)
print(i)
Output (truncated):
[('00', 31198), ('ff', 14938), ('40', 11669), ('8b', 11537), ('06', 11360), ('20', 11340), ('08', 11144)......