Skip to content

Commit

Permalink
implement hash function (string-based MD5) (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
Adam Fekete committed Oct 22, 2019
1 parent 076823e commit 9608056
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 9 deletions.
83 changes: 74 additions & 9 deletions abcd/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import getpass
import logging
from hashlib import md5
from collections import Counter, UserDict
from ase.calculators.singlepoint import SinglePointCalculator

Expand All @@ -10,6 +11,45 @@
logger = logging.getLogger(__name__)


class Hasher(object):
def __init__(self, method=md5()):
self.method = method

def update(self, value):

if isinstance(value, int):
self.update(str(value))

elif isinstance(value, str):
self.update(value.encode('utf-8'))

elif isinstance(value, float):
self.update(str(value))

elif isinstance(value, (tuple, list)):
for e in value:
self.update(e)

elif isinstance(value, (dict, UserDict)):
keys = value.keys()
for k in sorted(keys):
self.update(k.encode('utf-8'))
self.update(value[k])

elif isinstance(value, datetime.datetime):
self.update(str(value))

elif isinstance(value, bytes):
self.method.update(value)

else:
raise ValueError("The {} type cannot be hashed! (Value: {})", format(type(value), value))

def __call__(self):
"""Retrieve the digest of the hash."""
return self.method.hexdigest()


class AbstractModel(UserDict):
reserved_keys = {'n_atoms', 'cell', 'pbc', 'calculator_name', 'calculator_parameters', 'derived'}

Expand Down Expand Up @@ -222,6 +262,23 @@ def pre_save(self):

self['modified'] = datetime.datetime.utcnow()

m = Hasher()

for key in ('numbers', 'positions', 'cell', 'pbc'):
m.update(self[key])

self.derived_keys.append('hash_structure')
self['hash_structure'] = m()

m = Hasher()
for key in self.arrays_keys:
m.update(self[key])
for key in self.info_keys:
m.update(self[key])

self.derived_keys.append('hash')
self['hash'] = m()


if __name__ == '__main__':
import io
Expand All @@ -231,17 +288,10 @@ def pre_save(self):
logging.basicConfig(level=logging.INFO)
# from ase.io import jsonio

xyz = io.StringIO("""
2
Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F"
Si 0.00000000 0.00000000 0.00000000
Si 0.00000000 0.00000000 0.00000000
""")

atoms = read(xyz, format='xyz')
atoms = read('test.xyz', format='xyz', index=0)
atoms.set_cell([1, 1, 1])

# print(atoms)
print(atoms)
# print(atoms.arrays)
# print(atoms.info)

Expand All @@ -253,5 +303,20 @@ def pre_save(self):

pprint(AbstractModel.from_atoms(atoms))

h = Hasher()
h.update(AbstractModel.from_atoms(atoms))
print(h())

model = AbstractModel.from_atoms(atoms)
print(model.to_ase())

# xyz = io.StringIO(
# """
# 2
# Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F"
# Si 0.00000000 0.00000000 0.00000000
# Si 0.00000000 0.00000000 0.00000000
#
# """)
#
# atoms = read(xyz, format='extxyz', index=0)
4 changes: 4 additions & 0 deletions abcd/test.xyz
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
2
Properties=species:S:1:pos:R:3 s="string value" _vtk_test="t e s t _ s t r" pbc="F F F"
Si 0.00000000 0.00000000 0.00000000
Si 0.00000000 0.00000000 0.00000000

0 comments on commit 9608056

Please sign in to comment.