Source code for mdpow.equil

# equil.py
# Copyright (c) 2010-2011 Oliver Beckstein

"""
:mod:`mdpow.equil` --- Setting up and running equilibrium MD
============================================================

The :mod:`mdpow.equil` module facilitates the setup of equilibrium
molecular dynamics simulations of a compound molecule in a simulation
box of water or other solvent such as octanol.

It requires as input

 - the itp file for the compound
 - a coordinate (structure) file (in pdb or gro format)

By default it uses the *OPLS/AA* forcefield and the *TIP4P* water
model.

.. warning:: Other forcefields than OPLS/AA are currently not
   officially supported; it is not hard to do but requires tedious
   changes to a few paths in template scripts.

.. autoclass:: Simulation
   :members:
.. autoclass:: WaterSimulation
.. autoclass:: OctanolSimulation

.. autodata:: DIST
"""
import pickle
import re

import os, errno
import shutil
from pathlib import Path
from string import Template
from typing import Union

import MDAnalysis as mda

try:
    import gromacs.setup, gromacs.cbook
except (ImportError, OSError):
    raise ImportError("Gromacs installation not found, source GMXRC?")
from gromacs.utilities import in_dir, realpath, asiterable, AttributeDict
import gromacs.utilities

from . import config
from . import forcefields
from .restart import Journalled

import logging

logger = logging.getLogger("mdpow.equil")

# ITP <-- forcefields.get_solvent_model(id).itp
# BOX <-- forcefields.get_solvent_model(id).coordinates


# TODO: change to water distance 1.2 in the future (1.0 for
#       compatibility with our SAMPL5 runs)
#: minimum distance between solute and box surface (in nm)
DIST = {
    "water": 1.0,
    "octanol": 1.5,
    "cyclohexane": 1.5,
    "wetoctanol": 1.5,
    "toluene": 1.5,
}


[docs]class Simulation(Journalled): """Simple MD simulation of a single compound molecule in water. Typical use :: S = Simulation(molecule='DRUG') S.topology(itp='drug.itp') S.solvate(struct='DRUG-H.pdb') S.energy_minimize() S.MD_relaxed() S.MD() .. Note:: The OPLS/AA force field and the TIP4P water molecule is the default; changing this is possible but will require provision of customized itp, mdp and template top files at various stages. """ #: Keyword arguments to pre-set some file names; they are keys in :attr:`Simulation.files`. filekeys = ( "topology", "processed_topology", "structure", "solvated", "ndx", "energy_minimized", "MD_relaxed", "MD_restrained", "MD_NPT", ) topdir_default = "Equilibrium" dirname_default = os.path.curdir solvent_default = "water" #: Coordinate files of the full system in increasing order of advancement of #: the protocol; the later the better. The values are keys into :attr:`Simulation.files`. coordinate_structures = ( "solvated", "energy_minimized", "MD_relaxed", "MD_restrained", "MD_NPT", ) checkpoints = ( "solvated", "energy_minimized", "MD_relaxed", "MD_restrained", "MD_NPT", ) #: Check list of all methods that can be run as an independent protocol; see also #: :meth:`Simulation.get_protocol` and :class:`restart.Journal` protocols = ( "MD_NPT", "MD_NPT_run", # *_run as dummies for the ... "MD_relaxed", "MD_relaxed_run", # ...checkpointing logic "MD_restrained", "MD_restrained_run", "energy_minimize", "solvate", "topology", ) #: Default Gromacs *MDP* run parameter files for the different stages. #: (All are part of the package and are found with :func:`mdpow.config.get_template`.) mdp_defaults = { "MD_relaxed": "NPT_opls.mdp", "MD_restrained": "NPT_opls.mdp", "MD_NPT": "NPT_opls.mdp", "energy_minimize": "em_opls.mdp", } def __init__( self, molecule=None, forcefield: Union[forcefields.Forcefield, str] = "OPLS-AA", **kwargs, ): """Set up Simulation instance. The *molecule* of the compound molecule should be supplied. Existing files (which have been generated in previous runs) can also be supplied. :Keywords: *molecule* Identifier for the compound molecule. This is the same as the entry in the ``[ molecule ]`` section of the itp file. ["DRUG"] *filename* If provided and *molecule* is ``None`` then load the instance from the pickle file *filename*, which was generated with :meth:`~mdpow.equil.Simulation.save`. *dirname* base directory; all other directories are created under it *forcefield* A :class:`~forcefields.Forcefield`, or the string name of a packaged forcefield: 'OPLS-AA', 'CHARMM' or 'AMBER'. *solvent* 'water' or 'octanol' or 'cyclohexane' or 'wetoctanol' or 'toluene' *solventmodel* ``None`` chooses the default (e.g, the :class:`mdpow.forcefields.Forcefield` default water model for ``solvent == "water"``. Other options are the models defined in :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no alternative parameterizations included for other solvents. *mdp* dict with keys corresponding to the stages ``energy_minimize``, ``MD_restrained``, ``MD_relaxed``, ``MD_NPT`` and values *mdp* file names (if no entry then the package defaults are used) *distance* minimum distance between solute and closest box face *kwargs* advanced keywords for short-circuiting; see :data:`mdpow.equil.Simulation.filekeys`. .. versionchanged:: 0.9.0 `forcefield` may now be either a :class:`~forcefields.Forcefield` or the string name of a builtin forcefield. """ self.__cache = {} filename = kwargs.pop("filename", None) dirname = kwargs.pop("dirname", self.dirname_default) forcefield = forcefields.get_forcefield(forcefield) solvent = kwargs.pop("solvent", self.solvent_default) # mdp files --- should get values from default runinput.cfg # None values in the kwarg mdp dict are ignored # self.mdp: key = stage, value = path to MDP file # 'water' will choose the default ('tip4p'), other choices are # 'tip3p', 'spc', 'spce', 'm24', for water; no choices # available for 'cyclohexane' and 'octanol' solventmodel = kwargs.pop("solventmodel", None) mdp_kw = kwargs.pop("mdp", {}) self.mdp = dict( (stage, config.get_template(fn)) for stage, fn in self.mdp_defaults.items() ) self.mdp.update( dict( (stage, config.get_template(fn)) for stage, fn in mdp_kw.items() if fn is not None ) ) if molecule is None and filename is not None: # load from pickle file self.load(filename) self.filename = filename kwargs = {} # for super else: self.molecule = molecule or "DRUG" self.dirs = AttributeDict( basedir=realpath(dirname), # .../Equilibrium/<solvent> includes=list(asiterable(kwargs.pop("includes", []))) + [config.includedir], ) # pre-set filenames: keyword == variable name self.files = AttributeDict( [(k, kwargs.pop(k, None)) for k in self.filekeys] ) self.deffnm = kwargs.pop("deffnm", "md") if self.files.topology: # assume that a user-supplied topology lives in a 'standard' top dir # that includes the necessary itp file(s) self.dirs.topology = realpath(os.path.dirname(self.files.topology)) self.dirs.includes.append(self.dirs.topology) self.forcefield = forcefield self.solvent_type = solvent self.solventmodel_identifier = forcefields.get_solvent_identifier( solvent, model=solventmodel, forcefield=forcefield, ) if self.solventmodel_identifier is None: msg = "No parameters for solvent {0} and solventmodel {1} available.".format( solvent, solventmodel ) logger.error(msg) raise ValueError(msg) self.solventmodel = forcefields.get_solvent_model( self.solventmodel_identifier, forcefield=forcefield, ) distance = kwargs.pop("distance", None) distance = distance if distance is not None else DIST[solvent] self.solvent = AttributeDict( itp=self.solventmodel.itp, box=self.solventmodel.coordinates, distance=distance, ) self.filename = filename or self.solvent_type + ".simulation" super(Simulation, self).__init__(**kwargs) def BASEDIR(self, *args): return os.path.join(self.dirs.basedir, *args)
[docs] def save(self, filename=None): """Save instance to a pickle file. The default filename is the name of the file that was last loaded from or saved to. """ if filename is None: if self.filename is None: self.filename = filename or self.solvent_type + ".simulation" logger.warning( "No filename known, saving instance under name %r", self.filename ) filename = self.filename else: self.filename = filename with open(filename, "wb") as f: pickle.dump(self, f) logger.debug("Instance pickled to %(filename)r" % vars())
[docs] def load(self, filename=None): """Re-instantiate class from pickled file.""" if filename is None: if self.filename is None: self.filename = self.molecule.lower() + ".pickle" logger.warning("No filename known, trying name %r", self.filename) filename = self.filename with open(filename, "rb") as f: instance = pickle.load(f) self.__dict__.update(instance.__dict__) logger.debug("Instance loaded from %(filename)r" % vars())
[docs] def make_paths_relative(self, prefix=os.path.curdir): """Hack to be able to copy directories around: prune basedir from paths. .. Warning:: This is not guaranteed to work for all paths. In particular, check :attr:`mdpow.equil.Simulation.dirs.includes` and adjust manually if necessary. """ def assinglet(m): if len(m) == 1: return m[0] elif len(m) == 0: return None return m basedir = self.dirs.basedir for key, fn in self.files.items(): try: self.files[key] = fn.replace(basedir, prefix) except AttributeError: pass for key, val in self.dirs.items(): fns = asiterable(val) # treat them all as lists try: self.dirs[key] = assinglet([fn.replace(basedir, prefix) for fn in fns]) except AttributeError: pass for key, fn in self.mdp.items(): try: self.mdp[key] = fn.replace(basedir, prefix) except AttributeError: pass logger.warning( "make_paths_relative(): check/manually adjust %s.dirs.includes = %r !", self.__class__.__name__, self.dirs.includes, )
[docs] def topology(self, itp="drug.itp", prm=None, **kwargs): """Generate a topology for compound *molecule*. :Keywords: *itp* Gromacs itp file; will be copied to topology dir and included in topology *prm* Gromacs prm file; if given, will be copied to topology dir and included in topology *dirname* name of the topology directory ["top"] *kwargs* see source for *top_template*, *topol* """ self.journal.start("topology") dirname = kwargs.pop("dirname", self.BASEDIR("top")) self.dirs.topology = realpath(dirname) template = forcefields.get_top_template(self.solvent_type) top_template = config.get_template(kwargs.pop("top_template", template)) default_top_path = Path(top_template) if ".top" in default_top_path.suffixes: # Include all suffixes up to that one default_top_name = default_top_path.name default_top_name = ( default_top_name[: default_top_name.index(".top")] + ".top" ) else: default_top_name = default_top_path.with_suffix(".top").name topol = kwargs.pop("topol", default_top_name) self.top_template = top_template itp = os.path.realpath(itp) _itp = os.path.basename(itp) if prm is None: prm_kw = "" else: prm = os.path.realpath(prm) _prm = os.path.basename(prm) prm_kw = '#include "{}"'.format(_prm) with in_dir(dirname): with open(top_template, "r") as f: top_string_template = Template(f.read()) top_string_formatted = top_string_template.substitute( forcefield_itp=self.forcefield.forcefield_dir / "forcefield.itp", prm_line=f'#include "{prm_kw}"' if prm_kw else "", compound_itp=_itp, solvent_itp=self.forcefield.forcefield_dir / self.solvent.itp, ions_itp=self.forcefield.ions_itp, water_itp=self.forcefield.default_water_itp, compound_name=self.molecule, solvent=self.solvent_type, ) with open(topol, "w") as f: f.write(top_string_formatted) shutil.copy(itp, _itp) if prm is not None: shutil.copy(prm, _prm) logger.info( "[%(dirname)s] Created topology %(topol)r that includes %(_itp)r", vars() ) # update known files and dirs self.files.topology = realpath(dirname, topol) if not self.dirs.topology in self.dirs.includes: self.dirs.includes.append(self.dirs.topology) self.journal.completed("topology") return {"dirname": dirname, "topol": topol}
@staticmethod def _setup_solvate(**kwargs): """Solvate structure in a single solvent box.""" return gromacs.setup.solvate(**kwargs)
[docs] def solvate(self, struct=None, **kwargs): """Solvate structure *struct* in a box of solvent. The solvent is determined with the *solvent* keyword to the constructor. :Keywords: *struct* pdb or gro coordinate file (if not supplied, the value is used that was supplied to the constructor of :class:`~mdpow.equil.Simulation`) *distance* minimum distance between solute and the closes box face; the default depends on the solvent but can be set explicitly here, too. *bt* any box type understood by :func:`gromacs.editconf` (``-bt``): * "triclinic" is a triclinic box, * "cubic" is a rectangular box with all sides equal; * "dodecahedron" represents a rhombic dodecahedron; * "octahedron" is a truncated octahedron. The default is "dodecahedron". *kwargs* All other arguments are passed on to :func:`gromacs.setup.solvate`, but set to sensible default values. *top* and *water* are always fixed. """ self.journal.start("solvate") self.dirs.solvation = realpath( kwargs.setdefault("dirname", self.BASEDIR("solvation")) ) kwargs["struct"] = self._checknotempty(struct or self.files.structure, "struct") kwargs["top"] = self._checknotempty(self.files.topology, "top") kwargs["water"] = self.solvent.box kwargs.setdefault( "mainselection", '"%s"' % self.molecule ) # quotes are needed for make_ndx kwargs.setdefault("distance", self.solvent.distance) boxtype = kwargs.pop("bt", None) boxtype = boxtype if boxtype is not None else "dodecahedron" if boxtype not in ("dodecahedron", "triclinic", "cubic", "octahedron"): msg = "Invalid boxtype '{0}', not suitable for 'gmx editconf'.".format( boxtype ) logger.error(msg) raise ValueError(msg) kwargs["bt"] = boxtype kwargs["includes"] = asiterable(kwargs.pop("includes", [])) + self.dirs.includes params = self._setup_solvate(**kwargs) self.files.structure = kwargs["struct"] self.files.solvated = params["struct"] self.files.ndx = params["ndx"] # we can also make a processed topology right now self.processed_topology(**kwargs) self.journal.completed("solvate") return params
[docs] def processed_topology(self, **kwargs): """Create a portable topology file from the topology and the solvated system.""" if self.files.solvated is None or not os.path.exists(self.files.solvated): self.solvate(**kwargs) kwargs["topol"] = self.files.topology kwargs["struct"] = self.files.solvated kwargs["includes"] = self.dirs.includes self.files.processed_topology = gromacs.cbook.create_portable_topology(**kwargs) return self.files.processed_topology
[docs] def energy_minimize(self, **kwargs): """Energy minimize the solvated structure on the local machine. *kwargs* are passed to :func:`gromacs.setup.energ_minimize` but if :meth:`~mdpow.equil.Simulation.solvate` step has been carried out previously all the defaults should just work. """ self.journal.start("energy_minimize") self.dirs.energy_minimization = realpath( kwargs.setdefault("dirname", self.BASEDIR("em")) ) kwargs["top"] = self.files.topology kwargs.setdefault("struct", self.files.solvated) kwargs.setdefault("mdp", self.mdp["energy_minimize"]) kwargs["mainselection"] = None kwargs["includes"] = asiterable(kwargs.pop("includes", [])) + self.dirs.includes params = gromacs.setup.energy_minimize(**kwargs) self.files.energy_minimized = params["struct"] self.journal.completed("energy_minimize") return params
def _MD(self, protocol, **kwargs): """Basic MD driver for this Simulation. Do not call directly.""" self.journal.start(protocol) kwargs.setdefault("dirname", self.BASEDIR(protocol)) kwargs.setdefault("deffnm", self.deffnm) kwargs.setdefault("mdp", config.get_template("NPT_opls.mdp")) self.dirs[protocol] = realpath(kwargs["dirname"]) setupMD = kwargs.pop("MDfunc", gromacs.setup.MD) kwargs["top"] = self.files.topology kwargs["includes"] = asiterable(kwargs.pop("includes", [])) + self.dirs.includes kwargs["ndx"] = self.files.ndx kwargs[ "mainselection" ] = None # important for SD (use custom mdp and ndx!, gromacs.setup._MD) self._checknotempty(kwargs["struct"], "struct") if not os.path.exists(kwargs["struct"]): # struct is not reliable as it depends on qscript so now we just try everything... struct = gromacs.utilities.find_first( kwargs["struct"], suffices=["pdb", "gro"] ) if struct is None: logger.error( "Starting structure %(struct)r does not exist (yet)" % kwargs ) raise IOError( errno.ENOENT, "Starting structure not found", kwargs["struct"] ) else: logger.info( "Found starting structure %r (instead of %r).", struct, kwargs["struct"], ) kwargs["struct"] = struct # now setup the whole simulation (this is typically gromacs.setup.MD() ) params = setupMD(**kwargs) # params['struct'] is md.gro but could also be md.pdb --- depends entirely on qscript self.files[protocol] = params["struct"] # Gromacs 4.5.x 'mdrun -c PDB' fails if it cannot find 'residuetypes.dat' # so instead of fuffing with GMXLIB we just dump it into the directory try: shutil.copy(config.topfiles["residuetypes.dat"], self.dirs[protocol]) except IOError: logger.warning( "Failed to copy 'residuetypes.dat': mdrun will likely fail to write a final structure" ) self.journal.completed(protocol) return params
[docs] def MD_relaxed(self, **kwargs): """Short MD simulation with *timestep* = 0.1 fs to relax strain. Energy minimization does not always remove all problems and LINCS constraint errors occur. A very short *runtime* = 5 ps MD with very short integration time step *dt* tends to solve these problems. .. See Also:: :func:`gromacs.setup.MD` :Keywords: *struct* starting coordinates (typically guessed) *mdp* MDP run parameter file for Gromacs *qscript* list of queuing system submission scripts; probably a good idea to always include the default "local.sh" even if you have your own ["local.sh"] *qname* name of the job as shown in the queuing system *startdir* **advanced uses**: path of the directory on a remote system, which will be hard-coded into the queuing system script(s); see :func:`gromacs.setup.MD` and :class:`gromacs.manager.Manager` """ # user structure or restrained or solvated kwargs.setdefault("struct", self.files.energy_minimized) kwargs.setdefault("dt", 0.0001) # ps kwargs.setdefault("runtime", 5) # ps kwargs.setdefault("mdp", self.mdp["MD_relaxed"]) return self._MD("MD_relaxed", **kwargs)
[docs] def MD_restrained(self, **kwargs): """Short MD simulation with position restraints on compound. See documentation of :func:`gromacs.setup.MD_restrained` for details. The following keywords can not be changed: top, mdp, ndx, mainselection .. Note:: Position restraints are activated with ``-DPOSRES`` directives for :func:`gromacs.grompp`. Hence this will only work if the compound itp file does indeed contain a ``[ posres ]`` section that is protected by a ``#ifdef POSRES`` clause. .. See Also:: :func:`gromacs.setup.MD_restrained` :Keywords: *struct* starting coordinates (leave empty for inspired guess of file name) *mdp* MDP run parameter file for Gromacs *qscript* list of queuing system submission scripts; probably a good idea to always include the default "local.sh" even if you have your own ["local.sh"] *qname* name of the job as shown in the queuing system *startdir* **advanced uses**: path of the directory on a remote system, which will be hard-coded into the queuing system script(s); see :func:`gromacs.setup.MD` and :class:`gromacs.manager.Manager` """ kwargs.setdefault( "struct", self._lastnotempty([self.files.energy_minimized, self.files.MD_relaxed]), ) kwargs.setdefault("mdp", self.mdp["MD_restrained"]) kwargs["MDfunc"] = gromacs.setup.MD_restrained return self._MD("MD_restrained", **kwargs)
[docs] def MD_NPT(self, **kwargs): """Short NPT MD simulation. See documentation of :func:`gromacs.setup.MD` for details such as *runtime* or specific queuing system options. The following keywords can not be changed: *top*, *mdp*, *ndx*, *mainselection*. .. Note:: If the system crashes (with LINCS errors), try initial equilibration with timestep *dt* = 0.0001 ps (0.1 fs instead of 2 fs) and *runtime* = 5 ps as done in :meth:`~Simulation.MD_relaxed` .. See Also:: :func:`gromacs.setup.MD` and :meth:`Simulation.MD_relaxed` :Keywords: *struct* starting conformation; by default, the *struct* is the last frame from the position restraints run, or, if this file cannot be found (e.g. because :meth:`Simulation.MD_restrained` was not run) it falls back to the relaxed and then the solvated system. *mdp* MDP run parameter file for Gromacs *runtime* total run time in ps *qscript* list of queuing system scripts to prepare; available values are in :data:`gromacs.config.templates` or you can provide your own filename(s) in the current directory (see :mod:`gromacs.qsub` for the format of the templates) *qname* name of the job as shown in the queuing system *startdir* **advanced uses**: path of the directory on a remote system, which will be hard-coded into the queuing system script(s); see :func:`gromacs.setup.MD` and :class:`gromacs.manager.Manager` """ # user structure or relaxed or restrained or solvated kwargs.setdefault("struct", self.get_last_structure()) kwargs.setdefault( "t", self.get_last_checkpoint() ) # Pass checkpoint file from md_relaxed kwargs.setdefault("mdp", self.mdp["MD_NPT"]) return self._MD("MD_NPT", **kwargs)
# for convenience and compatibility MD = MD_NPT @staticmethod def _checknotempty(value, name): if value is None or value == "": raise ValueError("Parameter %s cannot be empty." % name) return value @staticmethod def _lastnotempty(l): """Return the last non-empty value in list *l* (or None :-p)""" nonempty = [None] + [x for x in l if not (x is None or x == "" or x == [])] return nonempty[-1]
[docs] def get_last_structure(self): """Returns the coordinates of the most advanced step in the protocol.""" return self._lastnotempty( [self.files[name] for name in self.coordinate_structures] )
[docs] def get_last_checkpoint(self): """Returns the checkpoint of the most advanced step in the protocol. Relies on md.gro being present from previous simulation, assumes that checkpoint is then present. """ return self._lastnotempty( [self.files[name] for name in self.checkpoints] ).replace(".gro", ".cpt")
[docs]class WaterSimulation(Simulation): """Equilibrium MD of a solute in a box of water.""" solvent_default = "water" dirname_default = os.path.join(Simulation.topdir_default, solvent_default)
class CyclohexaneSimulation(Simulation): """Equilibrium MD of a solute in a box of cyclohexane.""" solvent_default = "cyclohexane" dirname_default = os.path.join(Simulation.topdir_default, solvent_default)
[docs]class OctanolSimulation(Simulation): """Equilibrium MD of a solute in a box of octanol.""" solvent_default = "octanol" dirname_default = os.path.join(Simulation.topdir_default, solvent_default)
class WetOctanolSimulation(Simulation): """Equilibrium MD of a solute in a box of wet octanol.""" solvent_default = "wetoctanol" dirname_default = os.path.join(Simulation.topdir_default, solvent_default) def _setup_solvate(self, **kwargs): sol = gromacs.setup.solvate_sol(**kwargs) with in_dir(self.dirs.solvation, create=False): u = mda.Universe("solvated.gro") octanol = u.select_atoms("resname OcOH") n = octanol.n_residues with in_dir(self.dirs.topology, create=False): gromacs.cbook.edit_txt( self.files.topology, [("OcOH 1", "1", n)] ) ionkwargs = kwargs ionkwargs["struct"] = sol["struct"] params = gromacs.setup.solvate_ion(**ionkwargs) return params class TolueneSimulation(Simulation): """Equilibrium MD of a solute in a box of toluene.""" solvent_default = "toluene" dirname_default = os.path.join(Simulation.topdir_default, solvent_default)