diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 6566224c7..84d3b6427 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -25,7 +25,6 @@ dependencies: - sphinxcontrib-bibtex - mpiplus - pymbar - - pyyaml # Testing - nose diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst index ab6112a73..affa7e9fc 100644 --- a/docs/gettingstarted.rst +++ b/docs/gettingstarted.rst @@ -761,7 +761,7 @@ part of a system. .. doctest:: - >>> # Prepare the T4 Lysozyme + p-xylene system for alchemical modification. + >>> # Prepare the host-guest system for alchemical modification. >>> guest_atoms = host_guest.mdtraj_topology.select('resname B2') >>> alchemical_region = alchemy.AlchemicalRegion(alchemical_atoms=guest_atoms) >>> factory = alchemy.AbsoluteAlchemicalFactory() @@ -786,8 +786,8 @@ Finally, let's mix Monte Carlo and dynamics for propagation. .. doctest:: >>> sequence_move = mcmc.SequenceMove(move_list=[ - ... mcmc.MCDisplacementMove(atom_subset=pxylene_atoms), - ... mcmc.MCRotationMove(atom_subset=pxylene_atoms), + ... mcmc.MCDisplacementMove(atom_subset=guest_atoms), + ... mcmc.MCRotationMove(atom_subset=guest_atoms), ... mcmc.LangevinSplittingDynamicsMove(timestep=2.0*unit.femtoseconds, n_steps=n_steps, ... reassign_velocities=True, n_restart_attempts=6) ... ]) diff --git a/docs/releasehistory.rst b/docs/releasehistory.rst index 1365a4be7..f9d429a57 100644 --- a/docs/releasehistory.rst +++ b/docs/releasehistory.rst @@ -1,12 +1,19 @@ Release History *************** -Current development -=================== +0.18.3 - Storage enhancements and bugfixes +========================================== Bugfixes -------- - Fixed a bug in ``multistateanalyzer.py`` where a function was imported from ``openmmtools.utils`` instead of ``openmmtools.multistate.utils`` (`#430 `_). +- Fixed a few imprecisions in the documentation (`#432 `_). + +Enhancements +------------ +- Writing on disk is much faster when the `checkpoint_interval` of multi-state samplers is large. This was due + to the dimension of the netcdf chunk size increasing with the checkpoint interval and surpassing the dimension + of the netcdf chunk cache. The chunk size of the iteration dimension is now always set to 1 (`#432 `_). 0.18.2 - Bugfix release ======================= diff --git a/docs/states.rst b/docs/states.rst index de73f6457..f9f2e3096 100644 --- a/docs/states.rst +++ b/docs/states.rst @@ -25,3 +25,15 @@ States IComposableState GlobalParameterFunction GlobalParameterState + +| + +Utility functions +----------------- + +.. currentmodule:: openmmtools.states +.. autosummary:: + :nosignatures: + :toctree: api/generated/ + + create_thermodynamic_state_protocol diff --git a/openmmtools/cache.py b/openmmtools/cache.py index 346ee2549..3bd0ffe94 100644 --- a/openmmtools/cache.py +++ b/openmmtools/cache.py @@ -650,12 +650,57 @@ class DummyContextCache(object): The OpenMM platform to use. If None, OpenMM tries to select the fastest one available. + Examples + -------- + Create a new ``Context`` object for alanine dipeptide in vacuum in NPT. + + >>> from simtk import openmm, unit + >>> from openmmtools import states, testsystems + >>> system = testsystems.AlanineDipeptideVacuum().system + >>> thermo_state = states.ThermodynamicState(system, temperature=300*unit.kelvin) + + >>> context_cache = DummyContextCache() + >>> integrator = openmm.VerletIntegrator(1.0*unit.femtosecond) + >>> context, context_integrator = context_cache.get_context(thermo_state, integrator) + + Or create a ``Context`` with an arbitrary integrator (when you only + need to compute energies, for example). + + >>> context, context_integrator = context_cache.get_context(thermo_state) + """ def __init__(self, platform=None): self.platform = platform - def get_context(self, thermodynamic_state, integrator): - """Create a new context in the given thermodynamic state.""" + def get_context(self, thermodynamic_state, integrator=None): + """Create a new context in the given thermodynamic state. + + Parameters + ---------- + thermodynamic_state : states.ThermodynamicState + The thermodynamic state of the system. + integrator : simtk.openmm.Integrator, optional + The integrator to bind to the new context. If ``None``, an arbitrary + integrator is used. Currently, this is a ``LangevinIntegrator`` with + "V R O R V" splitting, but this might change in the future. Default + is ``None``. + + Returns + ------- + context : simtk.openmm.Context + The new context in the given thermodynamic system. + context_integrator : simtk.openmm.Integrator + The integrator bound to the context that can be used for + propagation. This is identical to the ``integrator`` argument + if it was passed. + + """ + if integrator is None: + integrator = integrators.LangevinIntegrator( + timestep=1.0*unit.femtoseconds, + splitting="V R O R V", + temperature=thermodynamic_state.temperature + ) context = thermodynamic_state.create_context(integrator, self.platform) return context, integrator diff --git a/openmmtools/integrators.py b/openmmtools/integrators.py index 01b6d9598..87169d707 100644 --- a/openmmtools/integrators.py +++ b/openmmtools/integrators.py @@ -2125,22 +2125,29 @@ def __init__(self, *args, **kwargs): kwargs['splitting'] = "O { V R V } O" super(GHMCIntegrator, self).__init__(*args, **kwargs) + class FIREMinimizationIntegrator(mm.CustomIntegrator): """Fast Internal Relaxation Engine (FIRE) minimization. + Notes ----- This integrator is taken verbatim from Peter Eastman's example appearing in the CustomIntegrator header file documentation. + References ---------- Erik Bitzek, Pekka Koskinen, Franz Gaehler, Michael Moseler, and Peter Gumbsch. Structural Relaxation Made Simple. PRL 97:170201, 2006. http://dx.doi.org/10.1103/PhysRevLett.97.170201 + Examples -------- >>> from openmmtools import testsystems >>> from simtk import openmm >>> t = testsystems.AlanineDipeptideVacuum() >>> system, positions = t.system, t.positions + + Create a FIRE integrator with default parameters and minimize for 100 steps + >>> integrator = FIREMinimizationIntegrator() >>> context = openmm.Context(system, integrator) >>> context.setPositions(positions) diff --git a/openmmtools/mcmc.py b/openmmtools/mcmc.py index 1d73335c0..6f555c084 100644 --- a/openmmtools/mcmc.py +++ b/openmmtools/mcmc.py @@ -570,7 +570,7 @@ class BaseIntegratorMove(MCMCMove): reassign_velocities : bool, optional If True, the velocities will be reassigned from the Maxwell-Boltzmann distribution at the beginning of the move (default is False). - restart_attempts : int, optional + n_restart_attempts : int, optional When greater than 0, if after the integration there are NaNs in energies, the move will restart. When the integrator has a random component, this may help recovering. On the last attempt, the ``Context`` is @@ -583,7 +583,7 @@ class BaseIntegratorMove(MCMCMove): n_steps : int context_cache : openmmtools.cache.ContextCache reassign_velocities : bool - restart_attempts : int or None + n_restart_attempts : int or None Examples -------- diff --git a/openmmtools/multistate/multistatereporter.py b/openmmtools/multistate/multistatereporter.py index b1ee7d4af..44aa08705 100644 --- a/openmmtools/multistate/multistatereporter.py +++ b/openmmtools/multistate/multistatereporter.py @@ -733,12 +733,13 @@ def write_replica_thermodynamic_states(self, state_indices, iteration): self._ensure_dimension_exists('replica', n_replicas) # Create variables and attach units and description. - ncvar_states = self._storage_analysis.createVariable('states', 'i4', ('iteration', 'replica'), - zlib=False, - chunksizes=(self._checkpoint_interval, n_replicas)) - setattr(ncvar_states, 'units', 'none') - setattr(ncvar_states, "long_name", ("states[iteration][replica] is the thermodynamic state index " - "(0..n_states-1) of replica 'replica' of iteration 'iteration'.")) + ncvar_states = self._storage_analysis.createVariable( + 'states', 'i4', ('iteration', 'replica'), + zlib=False, chunksizes=(1, n_replicas) + ) + ncvar_states.units = 'none' + ncvar_states.long_name = ("states[iteration][replica] is the thermodynamic state index " + "(0..n_states-1) of replica 'replica' of iteration 'iteration'.") # Store thermodynamic states indices. self._storage_analysis.variables['states'][iteration, :] = state_indices[:] @@ -837,29 +838,23 @@ def write_energies(self, energy_thermodynamic_states, energy_neighborhoods, ener self._ensure_dimension_exists('replica', n_replicas) self._ensure_dimension_exists('state', n_states) if 'energies' not in self._storage_analysis.variables: - ncvar_energies = self._storage_analysis.createVariable('energies', - 'f8', - ('iteration', 'replica', 'state'), - zlib=False, - chunksizes=(self._checkpoint_interval, - n_replicas, - n_states)) + ncvar_energies = self._storage_analysis.createVariable( + 'energies', 'f8', ('iteration', 'replica', 'state'), + zlib=False, chunksizes=(1, n_replicas, n_states) + ) ncvar_energies.units = 'kT' ncvar_energies.long_name = ("energies[iteration][replica][state] is the reduced (unitless) " "energy of replica 'replica' from iteration 'iteration' evaluated " "at the thermodynamic state 'state'.") if 'neighborhoods' not in self._storage_analysis.variables: - ncvar_neighborhoods = self._storage_analysis.createVariable('neighborhoods', - 'i1', - ('iteration', 'replica', 'state'), - zlib=False, - fill_value=1, # old-style files will be upgraded to have all states - chunksizes=(self._checkpoint_interval, - n_replicas, - n_states)) - ncvar_neighborhoods.long_name = ("neighborhoods[iteration][replica][state] is 1 if this energy was computed " - "during this iteration.") + ncvar_neighborhoods = self._storage_analysis.createVariable( + 'neighborhoods', 'i1', ('iteration', 'replica', 'state'), + zlib=False, fill_value=1, # old-style files will be upgraded to have all states + chunksizes=(1, n_replicas, n_states) + ) + ncvar_neighborhoods.long_name = ("neighborhoods[iteration][replica][state] is 1 if " + "this energy was computed during this iteration.") if 'unsampled_energies' not in self._storage_analysis.variables: # Check if we have unsampled states. @@ -870,14 +865,10 @@ def write_energies(self, energy_thermodynamic_states, energy_neighborhoods, ener if 'unsampled_energies' not in self._storage_analysis.variables: # Create variable for thermodynamic state energies with units and descriptions. - ncvar_unsampled = self._storage_analysis.createVariable('unsampled_energies', - 'f8', - ('iteration', 'replica', 'unsampled'), - zlib=False, - chunksizes=(self._checkpoint_interval, - n_replicas, - n_unsampled_states) - ) + ncvar_unsampled = self._storage_analysis.createVariable( + 'unsampled_energies', 'f8', ('iteration', 'replica', 'unsampled'), + zlib=False, chunksizes=(1, n_replicas, n_unsampled_states) + ) ncvar_unsampled.units = 'kT' ncvar_unsampled.long_name = ("unsampled_energies[iteration][replica][state] is the reduced " "(unitless) energy of replica 'replica' from iteration 'iteration' " @@ -940,28 +931,20 @@ def write_mixing_statistics(self, n_accepted_matrix, n_proposed_matrix, iteratio self._ensure_dimension_exists('state', n_states) # Create variables with units and descriptions. - ncvar_accepted = self._storage_analysis.createVariable('accepted', - 'i4', - ('iteration', 'state', 'state'), - zlib=False, - chunksizes=(self._checkpoint_interval, - n_states, - n_states) - ) - ncvar_proposed = self._storage_analysis.createVariable('proposed', - 'i4', - ('iteration', 'state', 'state'), - zlib=False, - chunksizes=(self._checkpoint_interval, - n_states, - n_states) - ) - setattr(ncvar_accepted, 'units', 'none') - setattr(ncvar_proposed, 'units', 'none') - setattr(ncvar_accepted, 'long_name', ("accepted[iteration][i][j] is the number of proposed transitions " - "between states i and j from iteration 'iteration-1'.")) - setattr(ncvar_proposed, 'long_name', ("proposed[iteration][i][j] is the number of proposed transitions " - "between states i and j from iteration 'iteration-1'.")) + ncvar_accepted = self._storage_analysis.createVariable( + 'accepted', 'i4', ('iteration', 'state', 'state'), + zlib=False, chunksizes=(1, n_states, n_states) + ) + ncvar_proposed = self._storage_analysis.createVariable( + 'proposed', 'i4', ('iteration', 'state', 'state'), + zlib=False, chunksizes=(1, n_states, n_states) + ) + ncvar_accepted.units = 'none' + ncvar_proposed.units = 'none' + ncvar_accepted.long_name = ("accepted[iteration][i][j] is the number of proposed transitions " + "between states i and j from iteration 'iteration-1'.") + ncvar_proposed.long_name = ("proposed[iteration][i][j] is the number of proposed transitions " + "between states i and j from iteration 'iteration-1'.") # Store statistics. self._storage_analysis.variables['accepted'][iteration, :, :] = n_accepted_matrix[:, :] @@ -1000,9 +983,8 @@ def write_timestamp(self, iteration: int): # Create variable if needed. for storage_key, storage in self._storage_dict.items(): if 'timestamp' not in storage.variables: - timestamp = storage.createVariable('timestamp', str, ('iteration',), - zlib=False, - chunksizes=(self._storage_chunks[storage_key],)) + storage.createVariable('timestamp', str, ('iteration',), + zlib=False, chunksizes=(1,)) timestamp = time.ctime() self._storage_analysis.variables['timestamp'][iteration] = timestamp @@ -1331,10 +1313,10 @@ def _write_1d_online_data(self, iteration, variable, data, storage): if variable not in storage.variables: variable_parameters = self._determine_netcdf_variable_parameters(iteration, data, storage) logger.debug('Creating new NetCDF variable %s with parameters: %s' % (variable, variable_parameters)) # DEBUG - nc_var = storage.createVariable(variable, variable_parameters['dtype'], - dimensions=variable_parameters['dims'], - chunksizes=variable_parameters['chunksizes'], - zlib=False) + storage.createVariable(variable, variable_parameters['dtype'], + dimensions=variable_parameters['dims'], + chunksizes=variable_parameters['chunksizes'], + zlib=False) # Get the variable nc_var = storage[variable] @@ -1401,7 +1383,7 @@ def _determine_netcdf_variable_parameters(self, iteration, data, storage): if iteration is not None: dims = ("iteration", data_dim) - chunks = (self._checkpoint_interval, size) + chunks = (1, size) else: dims = (data_dim,) chunks = (size,) @@ -1536,7 +1518,7 @@ def _ensure_group_exists_and_get(self, group_name, storage=None): return storage.groups[group_name] @staticmethod - def _initialize_sampler_variables_on_file(dataset, n_atoms, n_replicas, is_periodic, iteration_chunk=1): + def _initialize_sampler_variables_on_file(dataset, n_atoms, n_replicas, is_periodic): """ Initialize the NetCDF variables on the storage file needed to store sampler states. Does nothing if file already initialized @@ -1551,8 +1533,6 @@ def _initialize_sampler_variables_on_file(dataset, n_atoms, n_replicas, is_perio Number of Sampler states which will be written is_periodic : bool True if system is periodic; False otherwise. - iteration_chunk : int, Optional, Default: 1 - What chunksize to use for the iteration dimension """ if 'positions' not in dataset.variables: @@ -1561,31 +1541,30 @@ def _initialize_sampler_variables_on_file(dataset, n_atoms, n_replicas, is_perio if 'replica' not in dataset.dimensions: dataset.createDimension('replica', n_replicas) - # Define position variables + # Define position variables. ncvar_positions = dataset.createVariable('positions', 'f4', ('iteration', 'replica', 'atom', 'spatial'), - zlib=True, chunksizes=(iteration_chunk, n_replicas, n_atoms, 3)) - setattr(ncvar_positions, 'units', 'nm') - setattr(ncvar_positions, "long_name", ("positions[iteration][replica][atom][spatial] is position of " - "coordinate 'spatial' of atom 'atom' from replica 'replica' for " - "iteration 'iteration'.")) + zlib=True, chunksizes=(1, n_replicas, n_atoms, 3)) + ncvar_positions.units = 'nm' + ncvar_positions.long_name = ("positions[iteration][replica][atom][spatial] is position of " + "coordinate 'spatial' of atom 'atom' from replica 'replica' for " + "iteration 'iteration'.") # Define variables for periodic systems if is_periodic: ncvar_box_vectors = dataset.createVariable('box_vectors', 'f4', ('iteration', 'replica', 'spatial', 'spatial'), - zlib=False, chunksizes=(iteration_chunk, n_replicas, 3, 3)) + zlib=False, chunksizes=(1, n_replicas, 3, 3)) ncvar_volumes = dataset.createVariable('volumes', 'f8', ('iteration', 'replica'), - zlib=False, chunksizes=(iteration_chunk, n_replicas)) + zlib=False, chunksizes=(1, n_replicas)) - setattr(ncvar_box_vectors, 'units', 'nm') - setattr(ncvar_volumes, 'units', 'nm**3') - - setattr(ncvar_box_vectors, "long_name", ("box_vectors[iteration][replica][i][j] is dimension j of box " - "vector i for replica 'replica' from iteration " - "'iteration-1'.")) - setattr(ncvar_volumes, "long_name", ("volume[iteration][replica] is the box volume for replica " - "'replica' from iteration 'iteration-1'.")) + ncvar_box_vectors.units = 'nm' + ncvar_volumes.units = 'nm**3' + ncvar_box_vectors.long_name = ("box_vectors[iteration][replica][i][j] is dimension j of box " + "vector i for replica 'replica' from iteration " + "'iteration-1'.") + ncvar_volumes.long_name = ("volume[iteration][replica] is the box volume for replica " + "'replica' from iteration 'iteration-1'.") def _write_sampler_states_to_given_file(self, sampler_states: list, iteration: int, storage_file='checkpoint', obey_checkpoint_interval=True): @@ -1612,8 +1591,7 @@ def _write_sampler_states_to_given_file(self, sampler_states: list, iteration: i n_particles = sampler_states[0].n_particles n_replicas = len(sampler_states) self._initialize_sampler_variables_on_file(storage, n_particles, - n_replicas, is_periodic, - iteration_chunk=self._storage_chunks[storage_file]) + n_replicas, is_periodic) if obey_checkpoint_interval: write_iteration = self._calculate_checkpoint_iteration(iteration) else: @@ -1769,11 +1747,6 @@ def _write_dict(self, path, data, storage_name='analysis', packed_data[0] = data_str nc_variable[:] = packed_data - @ property - def _storage_chunks(self): - """Known NetCDF storage chunk sizes""" - return {'checkpoint': 1, 'analysis': self._checkpoint_interval} - # ============================================================================== # MODULE INTERNAL USAGE UTILITIES