Source code for mdpow.run

# equil.py
# Copyright (c) 2010-2011 Oliver Beckstein

"""
:mod:`mdpow.run` --- Performing complete simulation protocols
=============================================================

The module provides building blocks for complete simulation protocols
(or pipelines). Each protocol is written as a function that takes a
run input file and the solvent type as input.

:ref:`mdpow-scripts-label` make use of the building blocks.

Typically, *journalling* is enabled, i.e. the tasks remember which
stages have already been completed and can be restarted directly from
the last completed stage. (Restarts are only implemeneted at the level
of individual steps in a MDPOW protocol, not at the level of
continuing interrupted simulations using the Gromacs restart files.)

Input is read from the run input *cfg* file.

.. SeeAlso:: :mod:`mdpow.restart` for the journalling required for restarts.


Protocols
---------

.. autofunction:: equilibrium_simulation
.. autofunction:: fep_simulation

Support
-------

.. autofunction:: setupMD
.. autofunction:: get_mdp_files
.. autofunction:: runMD_or_exit

"""

import sys
import os
import errno

import gromacs.run
import gromacs.exceptions

from .config import get_configuration, set_gromacsoutput, NoSectionError
from . import equil
from . import fep
from .restart import checkpoint

import logging

logger = logging.getLogger("mdpow.run")


[docs]def setupMD(S, protocol, cfg):
    """setup MD simulation *protocol* using the runinput file *cfg*"""

    try:
        maxwarn = cfg.getint(protocol, "maxwarn")
    except KeyError:
        maxwarn = 0

    simulation_protocol = S.get_protocol(protocol)
    params = simulation_protocol(
        runtime=cfg.getfloat(protocol, "runtime"),
        qscript=cfg.getlist(protocol, "qscript"),
        maxwarn=maxwarn,
    )
    return params


[docs]def get_mdp_files(cfg, protocols):
    """Get file names of MDP files from *cfg* for all *protocols*"""
    mdpfiles = {}
    for protocol in protocols:
        try:
            mdp = cfg.findfile(protocol, "mdp")
        except NoSectionError:
            # skip anything for which we do not define sections, such as
            # the dummy run protocols
            mdp = None
        except ValueError:
            # But it is a problem if we can't find a file!
            logger.critical(
                "Failed to find custom MDP file %r for protocol [%s]",
                cfg.get(protocol, "mdp"),
                protocol,
            )
            raise
        else:
            if mdp is None:
                # Should not happen... let's continue and wait for hard-coded defaults
                logger.warning(
                    "No 'mdp' config file entry for protocol [%s]---check input files! "
                    "Using package defaults.",
                    protocol,
                )
        if mdp:
            mdpfiles[protocol] = mdp
            logger.debug(
                "%(protocol)s: Using MDP file %(mdp)r from config file", vars()
            )
    return mdpfiles


[docs]def runMD_or_exit(S, protocol, params, cfg, exit_on_error=True, **kwargs):
    """run simulation

    Can launch :program:`mdrun` itself (:class:`gromacs.run.MDrunner`) or exit so
    that the user can run the simulation independently. Checks if the
    simulation has completed and sets the return value to ``True`` if this is
    the case.

    - Configuration parameters are taken from the section *protocol* of the
      runinput configuration *cfg*.

    - The directory in which the simulation input files reside can be provided
      as keyword argument *dirname* or taken from `S.dirs[protocol]`.

    - The `exit_on_error` kwarg determines if :func:`sys.exit` is called if
      mdrun fails to complete (``True``, default) or if instead we raise an
      :exc:`gromacs.exceptions.GromacsError` (``False``).

    - Other *kwargs* are interpreted as options for
      :class:`~gromacs.tools.Mdrun`.

    .. versionchanged:: 0.9.0
       New kwarg `exit_on_error`.

    """
    dirname = kwargs.pop("dirname", None)
    if dirname is None:
        try:
            dirname = S.dirs[protocol]
        except KeyError:
            raise ValueError("S.dirs does not have protocol %r" % protocol)
        except AttributeError:
            raise ValueError("supply dirname as a keyword argument")
    simulation_done = False
    if cfg.getboolean(protocol, "runlocal"):
        logger.info("Running %s (%s.log) ... stand by.", protocol, params["deffnm"])
        logger.info("Run directory: %(dirname)s", vars())
        mdrun = gromacs.run.MDrunner(
            dirname=dirname,
            deffnm=params["deffnm"],
            v=cfg.getboolean("mdrun", "verbose"),
            stepout=cfg.getint("mdrun", "stepout"),
            nice=cfg.getint("mdrun", "nice"),
            nt=cfg.get("mdrun", "maxthreads"),
            cpi=True,
        )
        simulation_done = mdrun.run_check()
        if not simulation_done:
            # should probably stop
            logger.critical("Failed %(protocol)s, investigate manually.", vars())
            if exit_on_error:
                sys.exit(1)
            else:
                raise gromacs.exceptions.GromacsError(
                    f"Failed {protocol}, investigate manually."
                )
    else:
        # must check if the simulation was run externally
        logfile = os.path.join(dirname, params["deffnm"] + os.extsep + "log")
        logger.debug("Checking logfile %r if simulation has been completed.", logfile)
        simulation_done = gromacs.run.check_mdrun_success(logfile)  ### broken??
        if simulation_done is None:
            logger.info("Now go and run %(protocol)s in directory %(dirname)r.", vars())
            if exit_on_error:
                sys.exit(0)
            else:
                return simulation_done
        elif simulation_done is False:
            logger.warning(
                "Simulation %(protocol)s in directory %(dirname)r is incomplete (log=%(logfile)s).",
                vars(),
            )
            if exit_on_error:
                sys.exit(1)
            else:
                raise gromacs.exceptions.MissingDataError(
                    f"Simulation {protocol} in directory {dirname} is incomplete (log={logfile})."
                )
        logger.info("Simulation %(protocol)s seems complete (log=%(logfile)s)", vars())
    return simulation_done


[docs]def equilibrium_simulation(cfg, solvent, **kwargs):
    """Set up and run equilibrium simulation.

    See tutorial for the individual steps.

    .. Note:: Depending on the settings in the run input file
       (``runlocal``), :program:`mdrun` is executed at various stages,
       and hence this process can take a while.
    """
    deffnm = kwargs.pop("deffnm", "md")
    Simulations = {
        "water": equil.WaterSimulation,
        "octanol": equil.OctanolSimulation,
        "wetoctanol": equil.WetOctanolSimulation,
        "cyclohexane": equil.CyclohexaneSimulation,
        "toluene": equil.TolueneSimulation,
    }
    try:
        Simulation = Simulations[solvent]
    except KeyError:
        raise ValueError(
            "solvent must be one of {0}".format(", ".join(Simulations.keys()))
        )

    # generate a canonical path under dirname
    topdir = kwargs.get("dirname", None)
    if topdir is None:
        topdir = cfg.get("setup", "name")
    dirname = os.path.join(topdir, Simulation.dirname_default)
    savefilename = os.path.join(topdir, "%(solvent)s.simulation" % vars())
    # output to screen or hidden?
    set_gromacsoutput(cfg)

    # start pipeline
    if os.path.exists(savefilename):
        S = Simulation(filename=savefilename)
    else:
        # custom mdp files
        mdpfiles = get_mdp_files(cfg, Simulation.protocols)
        try:
            distance = cfg.get("setup", "distance")
        except KeyError:
            distance = None  # if no distance is specified, None = default
        try:
            boxtype = cfg.get("setup", "boxtype")
        except KeyError:
            boxtype = None  # if no distance is specified, None = default

        solventmodel = None
        try:
            solventmodel = cfg.get("setup", "solventmodel")
            logger.info("Selected solvent model: {0}".format(solventmodel))
        except KeyError:
            solventmodel = None
            logger.info("Using default water model")
        # for other solvents we currently only have a single OPLS-AA
        # parameterization included and hence there is no mechanism to
        # choose between different models.

        S = Simulation(
            molecule=cfg.get("setup", "molecule"),
            forcefield=cfg.get("setup", "forcefield"),
            dirname=dirname,
            deffnm=deffnm,
            mdp=mdpfiles,
            distance=distance,
            solventmodel=solventmodel,
        )

    if S.journal.has_not_completed("energy_minimize"):
        maxwarn = cfg.getint("setup", "maxwarn") or None
        prm = cfg.get("setup", "prm") or None
        maxthreads = cfg.get("mdrun", "maxthreads") or None
        S.topology(itp=cfg.getpath("setup", "itp"), prm=prm)
        S.solvate(
            struct=cfg.getpath("setup", "structure"),
            bt=cfg.get("setup", "boxtype"),
            maxwarn=maxwarn,
        )
        S.energy_minimize(maxwarn=maxwarn, mdrun_args={"nt": maxthreads})
        checkpoint("energy_minize", S, savefilename)
    else:
        logger.info("Fast-forwarding: setup + energy_minimize done")

    if S.journal.has_not_completed("MD_relaxed"):
        params = setupMD(S, "MD_relaxed", cfg)
        checkpoint("MD_relaxed", S, savefilename)
    else:
        params = {"deffnm": deffnm}
        logger.info("Fast-forwarding: MD_relaxed (setup) done")

    if S.journal.has_not_completed("MD_relaxed_run"):
        wrapper = S.get_protocol("MD_relaxed_run")
        success = wrapper(
            runMD_or_exit, S, "MD_relaxed", params, cfg
        )  # note: MD_relaxed!
        checkpoint("MD_relaxed_run", S, savefilename)
    else:
        logger.info("Fast-forwarding: MD_relaxed (run) done")

    if S.journal.has_not_completed("MD_NPT"):
        params = setupMD(S, "MD_NPT", cfg)
        checkpoint("MD_NPT", S, savefilename)
    else:
        logger.info("Fast-forwarding: MD_NPT (setup) done")

    if S.journal.has_not_completed("MD_NPT_run"):
        wrapper = S.get_protocol("MD_NPT_run")
        success = wrapper(runMD_or_exit, S, "MD_NPT", params, cfg)  # note: MD_NPT
        checkpoint("MD_NPT_run", S, savefilename)
    else:
        logger.info("Fast-forwarding: MD_NPT (run) done")

    logger.info(
        "Equilibrium simulation phase complete, use %(savefilename)r to continue.",
        vars(),
    )
    return savefilename


[docs]def fep_simulation(cfg, solvent, **kwargs):
    """Set up and run FEP simulation.

    See tutorial for the individual steps.

    .. Note:: Depending on the settings in the run input file
       (``runlocal``), :program:`mdrun` is executed sequentially for
       all windows and hence this can take a long time. It is
       recommended to use ``runlocal = False`` in the run input file
       and submit all window simulations to a cluster.
    """
    exit_on_error = kwargs.pop("exit_on_error", True)
    deffnm = kwargs.pop("deffnm", "md")
    EquilSimulations = {
        "water": equil.WaterSimulation,
        "octanol": equil.OctanolSimulation,
        "wetoctanol": equil.WetOctanolSimulation,
        "cyclohexane": equil.CyclohexaneSimulation,
        "toluene": equil.TolueneSimulation,
    }
    Simulations = {
        "water": fep.Ghyd,
        "octanol": fep.Goct,
        "wetoctanol": fep.Gwoct,
        "cyclohexane": fep.Gcyclo,
        "toluene": fep.Gtol,
    }
    try:
        EquilSimulation = EquilSimulations[solvent]
        Simulation = Simulations[solvent]
    except KeyError:
        raise ValueError(
            "solvent must be 'water', 'octanol', 'wetoctanol', 'cyclohexane' or 'toluene'"
        )
    # generate a canonical path under dirname
    topdir = kwargs.get("dirname", None)
    if topdir is None:
        topdir = cfg.get("setup", "name")
    dirname = os.path.join(topdir, Simulation.dirname_default)
    # XXX nasty ... use same recipe to construct default save file name as in
    # Gsolv ... should be a static method or something else I can use before
    # the class is instantiated. Note that the pickle files live under dirname
    # and NOT topdir (bit of an historic inconsistency)
    savefilename = os.path.join(dirname, Simulation.__name__ + os.extsep + "fep")
    # need pickle files for the equilibrium simulation ... another nasty guess:
    equil_savefilename = os.path.join(topdir, "%(solvent)s.simulation" % vars())
    try:
        equil_S = EquilSimulation(filename=equil_savefilename)
    except IOError as err:
        if err.errno == errno.ENOENT:
            logger.critical(
                "Missing the equilibrium simulation %(equil_savefilename)r.", vars()
            )
            logger.critical(
                "Run `mdpow-equilibrium -S %s %s'  first!", solvent, "RUNINPUT.cfg"
            )
        raise

    # output to screen or hidden?
    set_gromacsoutput(cfg)
    # start pipeline
    if os.path.exists(savefilename):
        S = Simulation(filename=savefilename, basedir=topdir)
        logger.info("Running FEP with saved settings from %r", savefilename)
        logger.info("Note: all configuration file options are ignored!")
    else:
        # method to be used "TI"/"BAR"
        method = cfg.get("FEP", "method")
        logger.info("Running FEP with the %r implementation in Gromacs", method)
        # custom mdp file
        mdp = cfg.findfile("FEP", "mdp")
        logger.debug("Using [FEP] MDP file %r from config file", mdp)

        # lambda schedules can be read from [FEP_schedule_*] sections
        schedules = {
            "coulomb": fep.FEPschedule.load(cfg, "FEP_schedule_Coulomb"),
            "vdw": fep.FEPschedule.load(cfg, "FEP_schedule_VDW"),
        }
        logger.debug("Loaded FEP schedules %r from config file", schedules.keys())

        # Note that we set basedir=topdir (and *not* dirname=dirname!)...FEP is a bit convoluted
        S = Simulation(
            simulation=equil_S,
            runtime=cfg.getfloat("FEP", "runtime"),
            basedir=topdir,
            deffnm=deffnm,
            mdp=mdp,
            schedules=schedules,
            method=method,
        )
    if S.journal.has_not_completed("setup"):
        params = S.setup(
            qscript=cfg.getlist("FEP", "qscript"), maxwarn=cfg.getint("FEP", "maxwarn")
        )
        checkpoint("setup", S, savefilename)
    else:
        params = {"deffnm": deffnm}
        logger.info("Fast-forwarding: FEP setup done")

    if S.journal.has_not_completed("fep_run"):

        def run_all_FEPS():
            for wdir in S.fep_dirs():
                runMD_or_exit(S, "FEP", params, cfg, dirname=wdir, dgdl=True)

        wrapper = S.get_protocol("fep_run")
        wrapper(run_all_FEPS)
        checkpoint("fep_run", S, savefilename)
    else:
        logger.info("Fast-forwarding: fep (run) done")

    logger.info(
        "FEP simulation phase complete, use %(savefilename)r to continue.", vars()
    )
    return savefilename