# equil.py
# Copyright (c) 2010-2011 Oliver Beckstein
"""
:mod:`mdpow.run` --- Performing complete simulation protocols
=============================================================
The module provides building blocks for complete simulation protocols
(or pipelines). Each protocol is written as a function that takes a
run input file and the solvent type as input.
:ref:`mdpow-scripts-label` make use of the building blocks.
Typically, *journalling* is enabled, i.e. the tasks remember which
stages have already been completed and can be restarted directly from
the last completed stage. (Restarts are only implemeneted at the level
of individual steps in a MDPOW protocol, not at the level of
continuing interrupted simulations using the Gromacs restart files.)
Input is read from the run input *cfg* file.
.. SeeAlso:: :mod:`mdpow.restart` for the journalling required for restarts.
Protocols
---------
.. autofunction:: equilibrium_simulation
.. autofunction:: fep_simulation
Support
-------
.. autofunction:: setupMD
.. autofunction:: get_mdp_files
.. autofunction:: runMD_or_exit
"""
import sys
import os
import errno
import gromacs.run
import gromacs.exceptions
from .config import get_configuration, set_gromacsoutput, NoSectionError
from . import equil
from . import fep
from .restart import checkpoint
import logging
logger = logging.getLogger("mdpow.run")
[docs]def setupMD(S, protocol, cfg):
"""setup MD simulation *protocol* using the runinput file *cfg*"""
try:
maxwarn = cfg.getint(protocol, "maxwarn")
except KeyError:
maxwarn = 0
simulation_protocol = S.get_protocol(protocol)
params = simulation_protocol(
runtime=cfg.getfloat(protocol, "runtime"),
qscript=cfg.getlist(protocol, "qscript"),
maxwarn=maxwarn,
)
return params
[docs]def get_mdp_files(cfg, protocols):
"""Get file names of MDP files from *cfg* for all *protocols*"""
mdpfiles = {}
for protocol in protocols:
try:
mdp = cfg.findfile(protocol, "mdp")
except NoSectionError:
# skip anything for which we do not define sections, such as
# the dummy run protocols
mdp = None
except ValueError:
# But it is a problem if we can't find a file!
logger.critical(
"Failed to find custom MDP file %r for protocol [%s]",
cfg.get(protocol, "mdp"),
protocol,
)
raise
else:
if mdp is None:
# Should not happen... let's continue and wait for hard-coded defaults
logger.warning(
"No 'mdp' config file entry for protocol [%s]---check input files! "
"Using package defaults.",
protocol,
)
if mdp:
mdpfiles[protocol] = mdp
logger.debug(
"%(protocol)s: Using MDP file %(mdp)r from config file", vars()
)
return mdpfiles
[docs]def runMD_or_exit(S, protocol, params, cfg, exit_on_error=True, **kwargs):
"""run simulation
Can launch :program:`mdrun` itself (:class:`gromacs.run.MDrunner`) or exit so
that the user can run the simulation independently. Checks if the
simulation has completed and sets the return value to ``True`` if this is
the case.
- Configuration parameters are taken from the section *protocol* of the
runinput configuration *cfg*.
- The directory in which the simulation input files reside can be provided
as keyword argument *dirname* or taken from `S.dirs[protocol]`.
- The `exit_on_error` kwarg determines if :func:`sys.exit` is called if
mdrun fails to complete (``True``, default) or if instead we raise an
:exc:`gromacs.exceptions.GromacsError` (``False``).
- Other *kwargs* are interpreted as options for
:class:`~gromacs.tools.Mdrun`.
.. versionchanged:: 0.9.0
New kwarg `exit_on_error`.
"""
dirname = kwargs.pop("dirname", None)
if dirname is None:
try:
dirname = S.dirs[protocol]
except KeyError:
raise ValueError("S.dirs does not have protocol %r" % protocol)
except AttributeError:
raise ValueError("supply dirname as a keyword argument")
simulation_done = False
if cfg.getboolean(protocol, "runlocal"):
logger.info("Running %s (%s.log) ... stand by.", protocol, params["deffnm"])
logger.info("Run directory: %(dirname)s", vars())
mdrun = gromacs.run.MDrunner(
dirname=dirname,
deffnm=params["deffnm"],
v=cfg.getboolean("mdrun", "verbose"),
stepout=cfg.getint("mdrun", "stepout"),
nice=cfg.getint("mdrun", "nice"),
nt=cfg.get("mdrun", "maxthreads"),
cpi=True,
)
simulation_done = mdrun.run_check()
if not simulation_done:
# should probably stop
logger.critical("Failed %(protocol)s, investigate manually.", vars())
if exit_on_error:
sys.exit(1)
else:
raise gromacs.exceptions.GromacsError(
f"Failed {protocol}, investigate manually."
)
else:
# must check if the simulation was run externally
logfile = os.path.join(dirname, params["deffnm"] + os.extsep + "log")
logger.debug("Checking logfile %r if simulation has been completed.", logfile)
simulation_done = gromacs.run.check_mdrun_success(logfile) ### broken??
if simulation_done is None:
logger.info("Now go and run %(protocol)s in directory %(dirname)r.", vars())
if exit_on_error:
sys.exit(0)
else:
return simulation_done
elif simulation_done is False:
logger.warning(
"Simulation %(protocol)s in directory %(dirname)r is incomplete (log=%(logfile)s).",
vars(),
)
if exit_on_error:
sys.exit(1)
else:
raise gromacs.exceptions.MissingDataError(
f"Simulation {protocol} in directory {dirname} is incomplete (log={logfile})."
)
logger.info("Simulation %(protocol)s seems complete (log=%(logfile)s)", vars())
return simulation_done
[docs]def equilibrium_simulation(cfg, solvent, **kwargs):
"""Set up and run equilibrium simulation.
See tutorial for the individual steps.
.. Note:: Depending on the settings in the run input file
(``runlocal``), :program:`mdrun` is executed at various stages,
and hence this process can take a while.
"""
deffnm = kwargs.pop("deffnm", "md")
Simulations = {
"water": equil.WaterSimulation,
"octanol": equil.OctanolSimulation,
"wetoctanol": equil.WetOctanolSimulation,
"cyclohexane": equil.CyclohexaneSimulation,
"toluene": equil.TolueneSimulation,
}
try:
Simulation = Simulations[solvent]
except KeyError:
raise ValueError(
"solvent must be one of {0}".format(", ".join(Simulations.keys()))
)
# generate a canonical path under dirname
topdir = kwargs.get("dirname", None)
if topdir is None:
topdir = cfg.get("setup", "name")
dirname = os.path.join(topdir, Simulation.dirname_default)
savefilename = os.path.join(topdir, "%(solvent)s.simulation" % vars())
# output to screen or hidden?
set_gromacsoutput(cfg)
# start pipeline
if os.path.exists(savefilename):
S = Simulation(filename=savefilename)
else:
# custom mdp files
mdpfiles = get_mdp_files(cfg, Simulation.protocols)
try:
distance = cfg.get("setup", "distance")
except KeyError:
distance = None # if no distance is specified, None = default
try:
boxtype = cfg.get("setup", "boxtype")
except KeyError:
boxtype = None # if no distance is specified, None = default
solventmodel = None
try:
solventmodel = cfg.get("setup", "solventmodel")
logger.info("Selected solvent model: {0}".format(solventmodel))
except KeyError:
solventmodel = None
logger.info("Using default water model")
# for other solvents we currently only have a single OPLS-AA
# parameterization included and hence there is no mechanism to
# choose between different models.
S = Simulation(
molecule=cfg.get("setup", "molecule"),
forcefield=cfg.get("setup", "forcefield"),
dirname=dirname,
deffnm=deffnm,
mdp=mdpfiles,
distance=distance,
solventmodel=solventmodel,
)
if S.journal.has_not_completed("energy_minimize"):
maxwarn = cfg.getint("setup", "maxwarn") or None
prm = cfg.get("setup", "prm") or None
maxthreads = cfg.get("mdrun", "maxthreads") or None
S.topology(itp=cfg.getpath("setup", "itp"), prm=prm)
S.solvate(
struct=cfg.getpath("setup", "structure"),
bt=cfg.get("setup", "boxtype"),
maxwarn=maxwarn,
)
S.energy_minimize(maxwarn=maxwarn, mdrun_args={"nt": maxthreads})
checkpoint("energy_minize", S, savefilename)
else:
logger.info("Fast-forwarding: setup + energy_minimize done")
if S.journal.has_not_completed("MD_relaxed"):
params = setupMD(S, "MD_relaxed", cfg)
checkpoint("MD_relaxed", S, savefilename)
else:
params = {"deffnm": deffnm}
logger.info("Fast-forwarding: MD_relaxed (setup) done")
if S.journal.has_not_completed("MD_relaxed_run"):
wrapper = S.get_protocol("MD_relaxed_run")
success = wrapper(
runMD_or_exit, S, "MD_relaxed", params, cfg
) # note: MD_relaxed!
checkpoint("MD_relaxed_run", S, savefilename)
else:
logger.info("Fast-forwarding: MD_relaxed (run) done")
if S.journal.has_not_completed("MD_NPT"):
params = setupMD(S, "MD_NPT", cfg)
checkpoint("MD_NPT", S, savefilename)
else:
logger.info("Fast-forwarding: MD_NPT (setup) done")
if S.journal.has_not_completed("MD_NPT_run"):
wrapper = S.get_protocol("MD_NPT_run")
success = wrapper(runMD_or_exit, S, "MD_NPT", params, cfg) # note: MD_NPT
checkpoint("MD_NPT_run", S, savefilename)
else:
logger.info("Fast-forwarding: MD_NPT (run) done")
logger.info(
"Equilibrium simulation phase complete, use %(savefilename)r to continue.",
vars(),
)
return savefilename
[docs]def fep_simulation(cfg, solvent, **kwargs):
"""Set up and run FEP simulation.
See tutorial for the individual steps.
.. Note:: Depending on the settings in the run input file
(``runlocal``), :program:`mdrun` is executed sequentially for
all windows and hence this can take a long time. It is
recommended to use ``runlocal = False`` in the run input file
and submit all window simulations to a cluster.
"""
exit_on_error = kwargs.pop("exit_on_error", True)
deffnm = kwargs.pop("deffnm", "md")
EquilSimulations = {
"water": equil.WaterSimulation,
"octanol": equil.OctanolSimulation,
"wetoctanol": equil.WetOctanolSimulation,
"cyclohexane": equil.CyclohexaneSimulation,
"toluene": equil.TolueneSimulation,
}
Simulations = {
"water": fep.Ghyd,
"octanol": fep.Goct,
"wetoctanol": fep.Gwoct,
"cyclohexane": fep.Gcyclo,
"toluene": fep.Gtol,
}
try:
EquilSimulation = EquilSimulations[solvent]
Simulation = Simulations[solvent]
except KeyError:
raise ValueError(
"solvent must be 'water', 'octanol', 'wetoctanol', 'cyclohexane' or 'toluene'"
)
# generate a canonical path under dirname
topdir = kwargs.get("dirname", None)
if topdir is None:
topdir = cfg.get("setup", "name")
dirname = os.path.join(topdir, Simulation.dirname_default)
# XXX nasty ... use same recipe to construct default save file name as in
# Gsolv ... should be a static method or something else I can use before
# the class is instantiated. Note that the pickle files live under dirname
# and NOT topdir (bit of an historic inconsistency)
savefilename = os.path.join(dirname, Simulation.__name__ + os.extsep + "fep")
# need pickle files for the equilibrium simulation ... another nasty guess:
equil_savefilename = os.path.join(topdir, "%(solvent)s.simulation" % vars())
try:
equil_S = EquilSimulation(filename=equil_savefilename)
except IOError as err:
if err.errno == errno.ENOENT:
logger.critical(
"Missing the equilibrium simulation %(equil_savefilename)r.", vars()
)
logger.critical(
"Run `mdpow-equilibrium -S %s %s' first!", solvent, "RUNINPUT.cfg"
)
raise
# output to screen or hidden?
set_gromacsoutput(cfg)
# start pipeline
if os.path.exists(savefilename):
S = Simulation(filename=savefilename, basedir=topdir)
logger.info("Running FEP with saved settings from %r", savefilename)
logger.info("Note: all configuration file options are ignored!")
else:
# method to be used "TI"/"BAR"
method = cfg.get("FEP", "method")
logger.info("Running FEP with the %r implementation in Gromacs", method)
# custom mdp file
mdp = cfg.findfile("FEP", "mdp")
logger.debug("Using [FEP] MDP file %r from config file", mdp)
# lambda schedules can be read from [FEP_schedule_*] sections
schedules = {
"coulomb": fep.FEPschedule.load(cfg, "FEP_schedule_Coulomb"),
"vdw": fep.FEPschedule.load(cfg, "FEP_schedule_VDW"),
}
logger.debug("Loaded FEP schedules %r from config file", schedules.keys())
# Note that we set basedir=topdir (and *not* dirname=dirname!)...FEP is a bit convoluted
S = Simulation(
simulation=equil_S,
runtime=cfg.getfloat("FEP", "runtime"),
basedir=topdir,
deffnm=deffnm,
mdp=mdp,
schedules=schedules,
method=method,
)
if S.journal.has_not_completed("setup"):
params = S.setup(
qscript=cfg.getlist("FEP", "qscript"), maxwarn=cfg.getint("FEP", "maxwarn")
)
checkpoint("setup", S, savefilename)
else:
params = {"deffnm": deffnm}
logger.info("Fast-forwarding: FEP setup done")
if S.journal.has_not_completed("fep_run"):
def run_all_FEPS():
for wdir in S.fep_dirs():
runMD_or_exit(S, "FEP", params, cfg, dirname=wdir, dgdl=True)
wrapper = S.get_protocol("fep_run")
wrapper(run_all_FEPS)
checkpoint("fep_run", S, savefilename)
else:
logger.info("Fast-forwarding: fep (run) done")
logger.info(
"FEP simulation phase complete, use %(savefilename)r to continue.", vars()
)
return savefilename