Source code for mdpow.fep

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# mdpow: fep.py
# Copyright (c) 2010 Oliver Beckstein

r"""
:mod:`mdpow.fep` -- Calculate free energy of solvation
======================================================

Set up and run free energy perturbation (FEP) calculations to
calculate the free energy of hydration of a solute in a solvent
box. The protocol follows the works of D. Mobley (`Free Energy
Tutorial`_) and M. Shirts, and uses Gromacs 4.0.x.

Required Input:

 - topology
 - equilibrated structure of the solvated molecule

See the docs for :class:`Gsolv` for details on what is calculated.


.. _Free Energy Tutorial:
   http://www.dillgroup.ucsf.edu/group/wiki/index.php?title=Free_Energy:_Tutorial

Differences to published protocols
----------------------------------

Some notes on how the approach encoded here differs from what others
(notabley Mobley) did:

- We use Gromacs 4.x and use the new decoupling feature ::

    couple-intramol = no

  which indicates that "intra-molecular interactions remain". It should (as I
  understand it) allow us to only calculate the free energy changes in solution
  so that we do not have to do an extra calculation in vacuo.
  http://www.mail-archive.com/gmx-users@gromacs.org/msg18803.html

  Mobley does an extra discharging calculation in vacuo and calculates

  .. math::

      \Delta A = \Delta A_{\mathrm{coul}}(\mathrm{vac}) - (\Delta A_{\mathrm{coul}}(\mathrm{sol}) + \Delta A_{\mathrm{vdw}}(\mathrm{sol}))

  (but also annihilates the interactions on the solute, corresponding to
  ``couple-intramol = yes``) whereas we do

  .. math::

      \Delta A = - (\Delta A_{\mathrm{coul}}(\mathrm{sol}) + \Delta A_{\mathrm{vdw}}(\mathrm{sol}))

- simulations are run as NPT (but make sure to use Langevin dynamics
  for temperature control)


Example
-------

see :mod:`mdpow`


User reference
--------------

Simulation setup and analysis of all FEP simulations is encapsulated
by a :class:`mdpow.fep.Gsolv` object. For the hydration free energy
there is a special class :class:`~mdpow.fep.Ghyd` and for the
solvation free energy in octanol there is
:class:`~mdpow.fep.Goct`. See the description of
:class:`~mdpow.fep.Gsolv` for methods common to both.

.. autoclass:: Gsolv
   :members:
   :inherited-members:

.. autoclass:: Ghyd
   :members:
   :inherited-members:

.. autoclass:: Goct
   :members:
   :inherited-members:

.. autoclass:: Gcyclo
   :members:
   :inherited-members:

.. autoclass:: Gtol
   :members:
   :inherited-members:

.. autofunction:: pOW

.. autofunction:: pCW

.. autofunction:: pTW

Developer notes
---------------

A user really only needs to access classes derived from
:class:`mdpow.fep.Gsolv`; all other classes and functions are auxiliary and
only of interest to developers.

Additional objects that support :class:`mdpow.fep.Gsolv`.

.. autoclass:: FEPschedule
   :members:
.. autofunction:: molar_to_nm3
.. autofunction:: kcal_to_kJ
.. autofunction:: kJ_to_kcal

.. data:: N_AVOGADRO

          Avogadro's constant |NA| in mol\ :sup:`-1` (`NA NIST value`_).

.. data:: kBOLTZ

          Boltzmann's constant |kB| in kJ mol\ :sup:`-1` (`kB NIST value`_).

.. |NA| replace:: *N*\ :sub:`A`
.. |kB| replace:: *k*\ :sub:`B`
.. _NA NIST value: http://physics.nist.gov/cgi-bin/cuu/Value?na
.. _kB NIST value: http://physics.nist.gov/cgi-bin/cuu/Value?k
.. |^-1| replace:: \ :sup:`-1`
.. |^-3| replace:: \ :sup:`-3`
.. |^3|  replace:: \ :sup:`3`

TODO
~~~~

- run minimization, NVT-equil, NPT-equil prior to production (probably
  use preprocessed topology from ``grompp -pp`` for portability)

  See `Free Energy Tutorial`_.

"""

import os
import errno
import copy
from subprocess import call
import warnings
from glob import glob
from configparser import NoOptionError

import numpy as np
import pandas as pd

import scipy.integrate
from scipy import constants

import numkit.integration
import numkit.timeseries
from numkit.observables import QuantityWithError

from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk
from alchemlyb.estimators import TI, BAR, MBAR
from alchemlyb.parsing.gmx import _extract_dataframe
from alchemlyb.preprocessing.subsampling import statistical_inefficiency

import gromacs
import gromacs.utilities

try:
    import gromacs.setup
except (ImportError, OSError):
    raise ImportError("Gromacs installation not found, source GMXRC?")
from gromacs.utilities import asiterable, AttributeDict, in_dir, openany

import logging

logger = logging.getLogger("mdpow.fep")

from . import config
from .restart import Journalled
from . import kBOLTZ, N_AVOGADRO


[docs]def molar_to_nm3(c): """Convert a concentration in Molar to nm|^-3|.""" return c * N_AVOGADRO * 1e-24
def bar_to_kJmolnm3(p): """Convert pressure in bar to kJ mol|^-1| nm|^-3|. 1 bar = 1e5 J m|^-3| """ return p * N_AVOGADRO * 1e-25
[docs]def kcal_to_kJ(x): """Convert a energy in kcal to kJ.""" return 4.184 * x
[docs]def kJ_to_kcal(x): """Convert a energy in kJ to kcal.""" return x / 4.184
def kBT_to_kJ(x, T): """Convert a energy in kBT to kJ/mol.""" return x * constants.N_A * constants.k * T * 1e-3
[docs]class FEPschedule(AttributeDict): """Describe mdp parameter choices as key - value pairs. The FEP schedule can be loaded from a configuration parser with the static method :meth:`FEPschedule.load`. See the example runinput file for an example. It contains the sections:: [FEP_schedule_Coulomb] name = Coulomb description = dis-charging vdw+q --> vdw label = Coul couple_lambda0 = vdw-q couple_lambda1 = vdw # soft core alpha: linear scaling for coulomb sc_alpha = 0 lambdas = 0, 0.25, 0.5, 0.75, 1.0 [FEP_schedule_VDW] name = vdw description = decoupling vdw --> none label = VDW couple_lambda0 = vdw couple_lambda1 = none # recommended values for soft cores (Mobley, Shirts et al) sc_alpha = 0.5 sc_power = 1 sc_sigma = 0.3 lambdas = 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1 """ mdp_keywords = dict( ( ("sc_alpha", float), ("sc_power", int), ("sc_sigma", float), ("couple_lambda0", str), ("couple_lambda1", str), ) ) meta_keywords = dict((("name", str), ("description", str), ("label", str))) other_keywords = dict((("lambdas", list),)) @property def mdp_dict(self): """Dict of key-values that can be set in a mdp file.""" return dict(((k, v) for k, v in self.items() if k in self.mdp_keywords))
[docs] @staticmethod def load(cfg, section): """Initialize a :class:`FEPschedule` from the *section* in the configuration *cfg*""" keys = {} keys.update(FEPschedule.mdp_keywords) keys.update(FEPschedule.meta_keywords) keys.update(FEPschedule.other_keywords) cfg_get = { float: cfg.getfloat, int: cfg.getint, str: cfg.getstr, # literal strings, no conversion of None (which we need for the MDP!) # CHECK: THIS IS LIKELY NOT GUARANTEED ANYMORE since getstr == get list: cfg.getarray, # numpy float array from list } def getter(type, section, key): value = cfg_get[type](section, key) try: # for empty list [] and array array([]) if len(value) == 0: value = None except TypeError: pass return value # skip any None values return FEPschedule( (key, getter(keytype, section, key)) for key, keytype in keys.items() if getter(keytype, section, key) is not None )
def __deepcopy__(self, memo): x = FEPschedule() for k, v in self.items(): x[k] = copy.deepcopy(v) return x
[docs]class Gsolv(Journalled): r"""Simulations to calculate and analyze the solvation free energy. :math:`\Delta A` is computed from the decharging and the decoupling step. With our choice of ``lambda=0`` being the fully interacting and ``lambda=1`` the non-interacting state, it is computed as .. math:: \Delta A = -(\Delta A_{\mathrm{coul}} + \Delta A_{\mathrm{vdw}}) With this protocol, the concentration in the liquid and in the gas phase is the same. (Under the assumption of ideal solution/ideal gas behaviour this directly relates to the Ben-Naim 1M/1M standard state.) (We neglect the negligible correction :math:`-kT \ln V_x/V_{\mathrm{sim}} = -kT \ln(1 - v_s/V_{\mathrm{sim}})` where :math:`V_x` is the volume of the system without the solute but the same number of water molecules as in the fully interacting case [see Michael Shirts' Thesis, p82].) Typical work flow:: G = Gsolv(simulation='drug.simulation') # continue from :mod:`mdpow.equil` G.setup(qscript=['my_template.sge', 'local.sh']) # my_template.sge is user supplied G.qsub() # run SGE job arrays as generated from my_template.sge G.analyze() G.plot() See :mod:`gromacs.qsub` for notes on how to write templates for queuing system scripts (in particular `queuing system templates`_). .. _queuing system templates: http://gromacswrapper.readthedocs.io/en/latest/gromacs/blocks/qsub.html#queuing-system-templates """ topdir_default = "FEP" dirname_default = os.path.curdir solvent_default = "water" #: Check list of all methods that can be run as an independent protocol; see also #: :meth:`Simulation.get_protocol` and :class:`restart.Journal` protocols = ["setup", "fep_run"] #: Estimators in alchemlyb estimators = { "TI": {"extract": extract_dHdl, "estimator": TI}, "BAR": {"extract": extract_u_nk, "estimator": BAR}, "MBAR": {"extract": extract_u_nk, "estimator": MBAR}, } # TODO: initialize from default cfg schedules_default = { "coulomb": FEPschedule( name="coulomb", description="dis-charging vdw+q --> vdw", label="Coul", couple_lambda0="vdw-q", couple_lambda1="vdw", sc_alpha=0, # linear scaling for coulomb lambdas=np.array([0.0, 0.25, 0.5, 0.75, 1.0]), # default values ), "vdw": FEPschedule( name="vdw", description="decoupling vdw --> none", label="VDW", couple_lambda0="vdw", couple_lambda1="none", sc_alpha=0.5, sc_power=1, sc_sigma=0.3, # recommended values lambdas=np.array( [ 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1, ] ), # defaults ), } #: Default Gromacs *MDP* run parameter file for FEP. #: (The file is part of the package and is found with :func:`mdpow.config.get_template`.) mdp_default = "bar_opls.mdp" def __init__(self, molecule=None, top=None, struct=None, method="BAR", **kwargs): """Set up Gsolv from input files or a equilibrium simulation. :Arguments: *molecule* name of the molecule for which the hydration free energy is to be computed (as in the gromacs topology) [REQUIRED] *top* topology [REQUIRED] *struct* solvated and equilibrated input structure [REQUIRED] *ndx* index file *dirname* directory to work under ['FEP/*solvent*'] *solvent* name of the solvent (only used for path names); will not affect the simulations because the input coordinates and topologies are determined by either *simulation* or *molecule*, *top*, and *struct*. If *solvent* is not provided then it is set to :attr:`solvent_default`. *lambda_coulomb* list of lambdas for discharging: q+vdw --> vdw *lambda_vdw* list of lambdas for decoupling: vdw --> none *runtime* simulation time per window in ps [5000] *temperature* temperature in Kelvin of the simulation [300.0] *qscript* template or list of templates for queuing system scripts (see :data:`gromacs.config.templates` for details) [local.sh] *includes* include directories *simulation* Instead of providing the required arguments, obtain the input files from a :class:`mdpow.equil.Simulation` instance. *method* "TI" for thermodynamic integration or "BAR" for Bennett acceptance ratio; using "BAR" in Gromacs also writes TI data so this is the default. *mdp* MDP file name (if no entry then the package defaults are used) *filename* Instead of providing the required arguments, load from pickle file. If either *simulation* or *molecule*, *top*, and *struct* are provided then simply set the attribute :attr:`filename` of the default checkpoint file to *filename*. *basedir* Prepend *basedir* to all filenames; ``None`` disables [.] *permissive* Set to ``True`` if you want to read past corrupt data in output xvg files (see :class:`gromacs.formats.XVG` for details); note that *permissive*=``True`` can lead to **wrong results**. Overrides the value set in a loaded pickle file. [``False``] *stride* collect every *stride* data line, see :meth:`Gsolv.collect` [1] *start* Start frame of data analyzed in every fep window. *stop* Stop frame of data analyzed in every fep window. *SI* Set to ``True`` if you want to perform statistical inefficiency to preprocess the data. *kwargs* other undocumented arguments (see source for the moment) .. Note:: Only a subset of the FEP parameters can currently be set with keywords (lambdas); other parameters such as the soft cores are currently fixed. For advanced use: pass a dict with keys "coulomb" and "vdw" and values of :class:`FEPschedule` in the keyword *schedules*. The *lambdas* will override these settings (which typically come from a cfg). This method allows setting all parameters. """ required_args = ("molecule", "top", "struct") # should this be below somewhere? if not method in ("TI", "BAR", "MBAR"): raise ValueError("method can only be TI, BAR, or MBAR") self.method = method filename = kwargs.pop("filename", None) basedir = kwargs.pop( "basedir", os.path.curdir ) # all other dirs relative to basedir simulation = kwargs.pop("simulation", None) solvent = kwargs.pop("solvent", self.solvent_default) if ( None in (molecule, top, struct) and simulation is None ) and filename is not None: # load from pickle file self.load(filename) self.filename = filename kwargs = {} # for super else: if simulation is not None: # load data from Simulation instance self.molecule = simulation.molecule self.top = ( simulation.files.processed_topology or simulation.files.topology ) self.struct = simulation.files.MD_NPT self.ndx = simulation.files.ndx if simulation.solvent_type == solvent: self.solvent_type = simulation.solvent_type else: errmsg = ( "Solvent mismatch: simulation was run for %s but Gsolv is set up for %s" % (simulation.solvent_type, solvent) ) logger.error(errmsg) raise ValueError(errmsg) else: self.molecule = molecule # should check that this is in top (?) self.top = top self.struct = struct self.ndx = kwargs.pop("ndx", None) self.solvent_type = solvent for attr in required_args: if self.__getattribute__(attr) is None: raise ValueError("A value is required for %(attr)r." % vars()) # fix struct (issue with qscripts being independent from rest of code) if not os.path.exists(self.struct): # struct is not reliable as it depends on qscript so now we just try everything... struct = gromacs.utilities.find_first( self.struct, suffices=["pdb", "gro"] ) if struct is None: logger.error("Starting structure %r does not exist.", self.struct) raise IOError( errno.ENOENT, "Starting structure not found", self.struct ) else: logger.info( "Found starting structure %r (instead of %r).", struct, self.struct, ) self.struct = struct self.Temperature = kwargs.pop("temperature", 300.0) self.qscript = kwargs.pop("qscript", ["local.sh"]) self.deffnm = kwargs.pop("deffnm", "md") self.mdp = kwargs.pop("mdp", config.get_template(self.mdp_default)) # schedules (deepcopy because we might modify) # For some reason 2.7 tests failed with deepcopy in 2.7 so used merge_dict instead self.schedules = config.merge_dicts(self.schedules_default, {}) schedules = kwargs.pop("schedules", {}) self.schedules.update(schedules) self.lambdas = { "coulomb": kwargs.pop( "lambda_coulomb", self.schedules["coulomb"].lambdas ), "vdw": kwargs.pop("lambda_vdw", self.schedules["vdw"].lambdas), } self.runtime = kwargs.pop("runtime", 5000.0) # ps self.dirname = kwargs.pop("dirname", self.dirname_default) self.includes = list(asiterable(kwargs.pop("includes", []))) + [ config.includedir ] self.component_dirs = { "coulomb": os.path.join(self.dirname, "Coulomb"), "vdw": os.path.join(self.dirname, "VDW"), } # for analysis self.stride = kwargs.pop("stride", 1) self.start = kwargs.pop("start", 0) self.stop = kwargs.pop("stop", None) self.SI = kwargs.pop("SI", True) # other variables #: Results from the analysis self.results = AttributeDict( xvg=AttributeDict(), dvdl=AttributeDict(), DeltaA=AttributeDict(), # contains QuantityWithError ) #: Generated run scripts self.scripts = AttributeDict() # sanity checks if os.path.exists(self.dirname): wmsg = ( "Directory %(dirname)r already exists --- will overwrite " "existing files." % vars(self) ) warnings.warn(wmsg) logger.warning(wmsg) # overrides pickle file so that we can run from elsewhere if not basedir is None: self.basedir = os.path.realpath(basedir) else: self.basedir = None try: self.filename = os.path.abspath(self.filename) except (AttributeError, TypeError): # default filename if none was provided self.filename = self.frombase( self.dirname, self.__class__.__name__ + os.extsep + "fep" ) # override pickle file for this dangerous option: must be set # on a case-by-case basis self.permissive = kwargs.pop("permissive", False) logger.info( "Solvation free energy calculation for molecule " "%(molecule)s in solvent %(solvent_type)s.", vars(self), ) logger.info("Base directory is %(basedir)r", vars(self)) logger.info( "Using setup directories under %(dirname)r: %(component_dirs)r", vars(self) ) logger.info("Default checkpoint file is %(filename)r", vars(self)) logger.debug("Coulomb lambdas = %(coulomb)r", self.lambdas) logger.debug("VDW lambdas = %(vdw)r", self.lambdas) super(Gsolv, self).__init__(**kwargs)
[docs] def frombase(self, *args): """Return path with :attr:`Gsolv.basedir` prefixed.""" # wrap paths with frombase() and hopefully this allows us fairly # flexible use of the class, especially for analysis if self.basedir is None: return os.path.join(*args) return os.path.join(self.basedir, *args)
[docs] def wname(self, component, lmbda): """Return name of the window directory itself. Typically something like ``VDW/0000``, ``VDW/0500``, ..., ``Coulomb/1000`` """ return os.path.join(self.component_dirs[component], "%04d" % (1000 * lmbda))
[docs] def wdir(self, component, lmbda): """Return rooted path to the work directory for *component* and *lmbda*. (Constructed from :meth:`frombase` and :meth:`wname`.) """ return self.frombase(self.wname(component, lmbda))
[docs] def label(self, component): """Simple label for component, e.g. for use in filenames.""" return self.schedules[component].label
[docs] def tasklabel(self, component, lmbda): """Batch submission script name for a single task job.""" return ( self.molecule[:3] + "_" + self.schedules[component].label + "%04d" % (1000 * lmbda) )
[docs] def arraylabel(self, component): """Batch submission script name for a job array.""" return self.molecule[:3] + "_" + self.schedules[component].label
[docs] def fep_dirs(self): """Generator for all simulation sub directories""" for component, lambdas in self.lambdas.items(): for l in lambdas: yield self.wdir(component, l)
[docs] def setup(self, **kwargs): """Prepare the input files for all Gromacs runs. :Keywords: *qscript* (List of) template(s) for batch submission scripts; if not set then the templates are used that were supplied to the constructor. *kwargs* Most kwargs are passed on to :func:`gromacs.setup.MD` although some are set to values that are required for the FEP functionality. *mdrun_opts* list of options to :program:`mdrun`; ``-dhdl`` is always added to this list as it is required for the thermodynamic integration calculations *includes* list of directories where Gromacs input files can be found The following keywords cannot be changed: *dirname*, *jobname*, *prefix*, *runtime*, *deffnm*. The last two can be specified when constructing :class:`Gsolv`. .. SeeAlso:: :func:`gromacs.setup.MD` and :func:`gromacs.qsub.generate_submit_scripts` .. versionchanged:: 0.6.0 Gromacs now uses option ``-dhdl`` instead of ``-dgdl``. """ self.journal.start("setup") # -dgdl for FEP output (although that seems to have been changed to -dHdl in Gromacs 4.5.3) # NOW use -dhdl kwargs["mdrun_opts"] = " ".join([kwargs.pop("mdrun_opts", ""), "-dhdl"]) kwargs["includes"] = asiterable(kwargs.pop("includes", [])) + self.includes kwargs["deffnm"] = self.deffnm kwargs.setdefault("maxwarn", 1) qsubargs = kwargs.copy() qsubargs["dirname"] = self.frombase(self.dirname) # handle templates separately (necessary for array jobs) qscripts = qsubargs.pop("sge", None) or self.qscript qscripts.extend(qsubargs.pop("qscript", [])) # also allow canonical 'templates' # make sure that the individual batch scripts are also written kwargs.setdefault("qscript", qscripts) for component, lambdas in self.lambdas.items(): for l in lambdas: params = self._setup(component, l, foreign_lambdas=lambdas, **kwargs) # generate queuing system script for array job directories = [self.wdir(component, l) for l in lambdas] qsubargs["jobname"] = self.arraylabel(component) qsubargs["prefix"] = self.label(component) + "_" self.scripts[component] = gromacs.qsub.generate_submit_array( qscripts, directories, **qsubargs ) logger.info( "[%s] Wrote array job scripts %r", component, self.scripts[component] ) self.journal.completed("setup") self.save(self.filename) logger.info( "Saved state information to %r; reload later with G = %r.", self.filename, self, ) logger.info("Finished setting up all individual simulations. Now run them...") params.pop("struct", None) # scrub window-specific params return params
def _setup(self, component, lmbda, foreign_lambdas, **kwargs): """Prepare the input files for an individual Gromacs runs.""" # note that all arguments pertinent to the submission scripts should be in kwargs # and have been set in setup() so that it is easy to generate array job scripts logger.info("Preparing %(component)s for lambda=%(lmbda)g" % vars()) wdir = self.wdir(component, lmbda) kwargs.setdefault("couple-intramol", "no") ### XXX Issue 20: if an entry is None then the dict will not be updated: ### I *must* keep "none" as a legal string value kwargs.update( self.schedules[component].mdp_dict ) # sets soft core & lambda0/1 state if kwargs.pop("edr", True): logger.info("Setting dhdl file to edr format") kwargs.setdefault("separate-dhdl-file", "no") else: logger.info("Setting dhdl file to xvg format") kwargs.setdefault("separate-dhdl-file", "yes") foreign_lambdas = np.asarray(foreign_lambdas) lambda_index = np.where(foreign_lambdas == lmbda)[0][0] kwargs.update( dirname=wdir, struct=self.struct, top=self.top, mdp=self.mdp, ndx=self.ndx, mainselection=None, runtime=self.runtime, ref_t=self.Temperature, # TODO: maybe not working yet, check _setup() gen_temp=self.Temperature, # needed until gromacs.setup() is smarter qname=self.tasklabel(component, lmbda), free_energy="yes", couple_moltype=self.molecule, init_lambda_state=lambda_index, fep_lambdas=foreign_lambdas, calc_lambda_neighbors=-1, ) return gromacs.setup.MD(**kwargs)
[docs] def dgdl_xvg(self, *args): """Return filename of the dgdl XVG file. Recognizes uncompressed, gzipped (gz), and bzip2ed (bz2) files. :Arguments: *args* joins the arguments into a path and adds the default filename for the dvdl file :Returns: Checks if a compressed file exists and returns the appropriate filename. :Raises: :exc:`IOError` with error code ENOENT if no file could be found """ EXTENSIONS = ("", os.path.extsep + "bz2", os.path.extsep + "gz") root = os.path.join(*args + (self.deffnm + ".xvg",)) for ext in EXTENSIONS: fn = root + ext if os.path.exists(fn): return fn logger.error("Missing dgdl.xvg file %(root)r.", vars()) raise IOError(errno.ENOENT, "Missing dgdl.xvg file", root)
[docs] def dgdl_edr(self, *args): """Return filename of the dgdl EDR file. :Arguments: *args* joins the arguments into a path and adds the default filename for the dvdl file :Returns: path to EDR :Raises: :exc:`IOError` with error code ENOENT if no file could be found """ pattern = os.path.join(*args + (self.deffnm + "*.edr",)) edrs = glob(pattern) if not edrs: logger.error("Missing dgdl.edr file %(pattern)r.", vars()) raise IOError(errno.ENOENT, "Missing dgdl.edr file", pattern) return [os.path.abspath(i) for i in edrs]
[docs] def dgdl_tpr(self, *args): """Return filename of the dgdl TPR file. :Arguments: *args* joins the arguments into a path and adds the default filename for the dvdl file :Returns: path to TPR :Raises: :exc:`IOError` with error code ENOENT if no file could be found """ fn = os.path.join(*args + (self.deffnm + ".tpr",)) if not os.path.exists(fn): logger.error("Missing TPR file %(fn)r.", vars()) raise IOError(errno.ENOENT, "Missing TPR file", fn) return fn
[docs] def dgdl_total_edr(self, *args, **kwargs): """Return filename of the combined dgdl EDR file. :Arguments: *args* joins the arguments into a path and adds the default filename for the dvdl file :Keywords: *total_edr_name* Name of the user defined total edr file. :Returns: path to total EDR """ total_edr_name = kwargs.get("total_edr_name", "total.edr") fn = os.path.join(*args + (total_edr_name,)) return fn
[docs] def convert_edr(self): """Convert EDR files to compressed XVG files.""" logger.info("[%(dirname)s] Converting EDR -> XVG.bz2" % vars(self)) for component, lambdas in self.lambdas.items(): edr_files = [self.dgdl_edr(self.wdir(component, l)) for l in lambdas] tpr_files = [self.dgdl_tpr(self.wdir(component, l)) for l in lambdas] for tpr, edr in zip(tpr_files, edr_files): dirct = os.path.abspath(os.path.dirname(tpr)) if len(edr) == 1: total_edr = edr[0] else: total_edr = self.dgdl_total_edr(dirct) logger.info(" {0} --> {1}".format("edrs", total_edr)) gromacs.eneconv(f=edr, o=total_edr) xvgfile = os.path.join(dirct, self.deffnm + ".xvg") # hack logger.info(" {0} --> {1}".format(total_edr, xvgfile)) gromacs.g_energy(s=tpr, f=total_edr, odh=xvgfile)
[docs] def collect(self, stride=None, autosave=True, autocompress=True): """Collect dV/dl from output. :Keywords: *stride* read data every *stride* lines, ``None`` uses the class default *autosave* immediately save the class pickle fep file *autocompress* compress the xvg file with bzip2 (saves >70% of space) """ from gromacs.formats import XVG if not stride is None: self.stride = stride if autocompress: # must be done before adding to results.xvg or we will not find the file later self.compress_dgdl_xvg() logger.info( "[%(dirname)s] Finding dgdl xvg files, reading with " "stride=%(stride)d permissive=%(permissive)r." % vars(self) ) for component, lambdas in self.lambdas.items(): xvg_files = [self.dgdl_xvg(self.wdir(component, l)) for l in lambdas] self.results.xvg[component] = ( np.array(lambdas), [ XVG(xvg, permissive=self.permissive, stride=self.stride) for xvg in xvg_files ], ) if autosave: self.save()
[docs] def compress_dgdl_xvg(self): """Compress *all* dgdl xvg files with bzip2. .. Note:: After running this method you might want to run :meth:`collect` to ensure that the results in :attr:`results.xvg` point to the *compressed* files. Otherwise :exc:`IOError` might occur which fail to find a `md.xvg` file. """ for component, lambdas in self.lambdas.items(): xvg_files = [self.dgdl_xvg(self.wdir(component, l)) for l in lambdas] for xvg in xvg_files: root, ext = os.path.splitext(xvg) if ext == os.path.extsep + "xvg": fnbz2 = xvg + os.path.extsep + "bz2" logger.info( "[%s] Compressing dgdl file %r with bzip2", self.dirname, xvg ) # speed is similar to 'bzip2 -9 FILE' (using a 1 Mio buffer) # (Since GW 0.8, openany() does not take kwargs anymore so the write buffer cannot be # set anymore (buffering=1048576) so the performance might be lower in MDPOW >= 0.7.0) with open(xvg, "rb", buffering=1048576) as source: with openany(fnbz2, "wb") as target: target.writelines(source) if os.path.exists(fnbz2) and os.path.exists(xvg): os.unlink(xvg) if not os.path.exists(fnbz2): logger.error( "[%s] Failed to compress %r --- mysterious!", self.dirname, fnbz2, ) else: logger.info( "[%s] Compression complete: %r", self.dirname, fnbz2 )
[docs] def contains_corrupted_xvgs(self): """Check if any of the source datafiles had reported corrupted lines. :Returns: ``True`` if any of the xvg dgdl files were produced with the permissive=True flag and had skipped lines. For debugging purposes, the number of corrupted lines are stored in :attr:`Gsolv._corrupted` as dicts of dicts with the component as primary and the lambda as secondary key. """ def _lencorrupted(xvg): try: return len(xvg.corrupted_lineno) except AttributeError: # backwards compatible (pre gw 0.1.10 are always ok) return 0 except TypeError: # len(None): XVG.parse() has not been run yet return 0 # ... so we cannot conclude that it does contain bad ones corrupted = {} self._corrupted = {} # debugging ... for component, (lambdas, xvgs) in self.results.xvg.items(): corrupted[component] = np.any([(_lencorrupted(xvg) > 0) for xvg in xvgs]) self._corrupted[component] = dict( ((l, _lencorrupted(xvg)) for l, xvg in zip(lambdas, xvgs)) ) return np.any([x for x in corrupted.values()])
[docs] def analyze(self, force=False, stride=None, autosave=True, ncorrel=25000): r"""Extract dV/dl from output and calculate dG by TI. Thermodynamic integration (TI) is performed on the individual component window calculation (typically the Coulomb and the VDW part, :math:`\Delta A_{\mathrm{coul}}` and :math:`\Delta A_{\mathrm{vdW}}`). :math:`\Delta A_{\mathrm{coul}}` is the free energy component of discharging the molecule and :math:`\Delta A_{\mathrm{vdW}}` of decoupling (switching off LJ interactions with the environment). The free energy components must be interpreted in this way because we defined ``lambda=0`` as interaction switched on and ``lambda=1`` as switched off. .. math:: \Delta A^{*} = -(\Delta A_{\mathrm{coul}} + \Delta A_{\mathrm{vdw}}) Data are stored in :attr:`Gsolv.results`. The dV/dlambda graphs are integrated with the composite Simpson's rule (and if the number of datapoints are even, the first interval is evaluated with the trapezoidal rule); see :func:`scipy.integrate.simps` for details). Note that this implementation of Simpson's rule does not require equidistant spacing on the lambda axis. For the Coulomb part using Simpson's rule has been shown to produce more accurate results than the trapezoidal rule [Jorge2010]_. Errors are estimated from the errors of the individual <dV/dlambda>: 1. The error of the mean <dV/dlambda> is calculated via the decay time of the fluctuation around the mean. ``ncorrel`` is the max number of samples that is going to be used to calculate the autocorrelation time via a FFT. See :func:`numkit.timeseries.tcorrel`. 2. The error on the integral is calculated analytically via propagation of errors through Simpson's rule (with the approximation that all spacings are assumed to be equal; taken as the average over all spacings as implemented in :func:`numkit.integration.simps_error`). .. Note:: For the Coulomb part, which typically only contains about 5 lambdas, it is recommended to have a odd number of lambda values to fully benefit from the higher accuracy of the integration scheme. .. [Jorge2010] M. Jorge, N.M. Garrido, A.J. Queimada, I.G. Economou, and E.A. Macedo. Effect of the integration method on the accuracy and computational efficiency of free energy calculations using thermodynamic integration. Journal of Chemical Theory and Computation, 6 (4):1018--1027, 2010. 10.1021/ct900661c. :Keywords: *force* reload raw data even though it is already loaded *stride* read data every *stride* lines, ``None`` uses the class default *autosave* save to the pickle file when results have been computed *ncorrel* aim for <= 25,000 samples for t_correl .. rubric:: Notes The error on the mean of the data :math:`\epsilon_y`, taking the correlation time into account, is calculated according to [FrenkelSmit2002]_ `p526`_: .. math:: \epsilon_y = \sqrt{2 \tau_c \mathrm{acf}(0)/T} where :math:`\mathrm{acf}()` is the autocorrelation function of the fluctuations around the mean, :math:`y - \langle y \rangle`, :math:`\tau_c` is the correlation time, and :math:`T` the total length of the simulation .. [FrenkelSmit2002] D. Frenkel and B. Smit, Understanding Molecular Simulation. Academic Press, San Diego 2002 .. _p526: http://books.google.co.uk/books?id=XmyO2oRUg0cC&pg=PA526 """ stride = stride or self.stride logger.info("Analysis stride is %s.", stride) if force or not self.has_dVdl(): try: self.collect(stride=stride, autosave=False) except IOError as err: if err.errno == errno.ENOENT: self.convert_edr() self.collect(stride=stride, autosave=False) else: logger.exception(err) raise else: logger.info("Analyzing stored data.") # total free energy difference at const P (all simulations are done in NPT) GibbsFreeEnergy = QuantityWithError(0, 0) for component, (lambdas, xvgs) in self.results.xvg.items(): logger.info( "[%s %s] Computing averages <dV/dl> and errors for %d lambda values.", self.molecule, component, len(lambdas), ) # for TI just get the average dv/dl value (in array column 1; col 0 is the time) # (This can take a while if the XVG is now reading the array from disk first time) # Use XVG class properties: first data in column 0! Y = np.array([x.mean[0] for x in xvgs]) stdY = np.array([x.std[0] for x in xvgs]) # compute auto correlation time and error estimate for independent samples # (this can take a while). x.array[0] == time, x.array[1] == dHdl # nstep is calculated to give ncorrel samples (or all samples if less than ncorrel are # available) tc_data = [ numkit.timeseries.tcorrel( x.array[0], x.array[1], nstep=int(np.ceil(len(x.array[0]) / float(ncorrel))), ) for x in xvgs ] DY = np.array([tc["sigma"] for tc in tc_data]) tc = np.array([tc["tc"] for tc in tc_data]) self.results.dvdl[component] = { "lambdas": lambdas, "mean": Y, "error": DY, "stddev": stdY, "tcorrel": tc, } # Combined Simpson rule integration: # even="last" because dV/dl is smoother at the beginning so using trapezoidal # integration there makes less of an error (one hopes...) a = scipy.integrate.simps(Y, x=lambdas, even="last") da = numkit.integration.simps_error(DY, x=lambdas, even="last") self.results.DeltaA[component] = QuantityWithError(a, da) GibbsFreeEnergy += self.results.DeltaA[ component ] # error propagation is automagic! # hydration free energy Delta A = -(Delta A_coul + Delta A_vdw) GibbsFreeEnergy *= -1 self.results.DeltaA.Gibbs = GibbsFreeEnergy if autosave: self.save() self.logger_DeltaA0() return self.results.DeltaA.Gibbs
[docs] def collect_alchemlyb( self, SI=True, start=0, stop=None, stride=None, autosave=True, autocompress=True ): """Collect the data files using alchemlyb. Unlike :meth:`collect`, this method can subsample with the statistical inefficiency (parameter `SI`). """ extract = self.estimators[self.method]["extract"] if autocompress: # must be done before adding to results.xvg or we will not find the file later self.compress_dgdl_xvg() logger.info( "[%(dirname)s] Finding dgdl xvg files, reading with " "stride=%(stride)d permissive=%(permissive)r." % vars(self) ) for component, lambdas in self.lambdas.items(): val = [] for l in lambdas: xvg_file = self.dgdl_xvg(self.wdir(component, l)) xvg_df = extract(xvg_file, T=self.Temperature).iloc[start:stop:stride] if SI: logger.info( "Performing statistical inefficiency analysis for window %s %04d" % (component, 1000 * l) ) ts = _extract_dataframe(xvg_file).iloc[start:stop:stride] ts = pd.DataFrame({"time": ts.iloc[:, 0], "dhdl": ts.iloc[:, 1]}) ts = ts.set_index("time") # use the statistical_inefficiency function to subsample the data xvg_df = statistical_inefficiency(xvg_df, ts, conservative=True) val.append(xvg_df) self.results.xvg[component] = (np.array(lambdas), pd.concat(val)) if autosave: self.save()
[docs] def analyze_alchemlyb( self, SI=True, start=0, stop=None, stride=None, force=False, autosave=True ): """Compute the free energy from the simulation data with alchemlyb. Unlike :meth:`analyze`, the MBAR estimator is available (in addition to TI). Note that SI should be enabled for meaningful error estimates. """ stride = stride or self.stride start = start or self.start stop = stop or self.stop logger.info("Analysis stride is %s.", stride) logger.info("Analysis starts from frame %s.", start) logger.info("Analysis stops at frame %s.", stop) if self.method in ["TI", "BAR", "MBAR"]: estimator = self.estimators[self.method]["estimator"] else: errmsg = "The method is not supported." logger.error(errmsg) raise ValueError(errmsg) if force or not self.has_dVdl(): try: self.collect_alchemlyb(SI, start, stop, stride, autosave=False) except IOError as err: if err.errno == errno.ENOENT: self.convert_edr() self.collect_alchemlyb(SI, start, stop, stride, autosave=False) else: logger.exception(err) raise else: logger.info("Analyzing stored data.") # total free energy difference at const P (all simulations are done in NPT) GibbsFreeEnergy = QuantityWithError(0, 0) for component, (lambdas, xvgs) in self.results.xvg.items(): result = estimator().fit(xvgs) if self.method == "BAR": DeltaA = QuantityWithError(0, 0) a_s = np.diagonal(result.delta_f_, offset=1) da_s = np.diagonal(result.d_delta_f_, offset=1) for a, da in zip(a_s, da_s): DeltaA += QuantityWithError(a, da) self.results.DeltaA[component] = kBT_to_kJ(DeltaA, self.Temperature) else: a = result.delta_f_.loc[0.00, 1.00] da = result.d_delta_f_.loc[0.00, 1.00] self.results.DeltaA[component] = kBT_to_kJ( QuantityWithError(a, da), self.Temperature ) GibbsFreeEnergy += self.results.DeltaA[ component ] # error propagation is automagic! # hydration free energy Delta A = -(Delta A_coul + Delta A_vdw) GibbsFreeEnergy *= -1 self.results.DeltaA.Gibbs = GibbsFreeEnergy if autosave: self.save() self.logger_DeltaA0() return self.results.DeltaA.Gibbs if autosave: self.save()
[docs] def write_DeltaA0(self, filename, mode="w"): """Write free energy components to a file. :Arguments: *filename* name of the text file *mode* 'w' for overwrite or 'a' for append ['w'] Format:: . ----- kJ/mol ------ molecule solvent total coulomb vdw """ with open(filename, mode) as tab: tab.write(self.summary() + "\n")
[docs] def summary(self): """Return a string that summarizes the energetics. Each energy component is followed by its error. Format:: . ------ kJ/mol ----- molecule solvent total coulomb vdw """ fmt = "%-10s %-14s %+8.2f %8.2f %+8.2f %8.2f %+8.2f %8.2f" d = self.results.DeltaA return fmt % ( (self.molecule, self.solvent_type) + d.Gibbs.astuple() + d.coulomb.astuple() + d.vdw.astuple() )
[docs] def logger_DeltaA0(self): """Print the free energy contributions (errors in parentheses).""" if not "DeltaA" in self.results or len(self.results.DeltaA) == 0: logger.info("No DeltaA free energies computed yet.") return logger.info("DeltaG0 = -(DeltaG_coul + DeltaG_vdw)") for component, energy in self.results.DeltaA.items(): logger.info( "[%s] %s solvation free energy (%s) %g (%.2f) kJ/mol", self.molecule, self.solvent_type.capitalize(), component, energy.value, energy.error, )
[docs] def has_dVdl(self): """Check if dV/dl data have already been collected. :Returns: ``True`` if the dV/dl data have bee aquired (:meth:`Gsolv.collect`) for all FEP simulations. """ try: if len(self.results.xvg) == 0: return False except AttributeError: return False return np.all( np.array([len(xvgs) for (lambdas, xvgs) in self.results.xvg.values()]) > 0 )
[docs] def plot(self, **kwargs): """Plot the TI data with error bars. Run :meth:`mdpow.fep.Gsolv.analyze` first. All *kwargs* are passed on to :func:`pylab.errorbar`. :Returns: The axes of the subplot. """ import matplotlib import matplotlib.pyplot as plt kwargs.setdefault("color", "black") kwargs.setdefault("capsize", 0) kwargs.setdefault("elinewidth", 2) try: if self.results.DeltaA.Gibbs is None or len(self.results.dvdl) == 0: raise KeyError except KeyError: logger.info("Data were not analyzed yet -- doing that now... patience.") self.analyze() dvdl = self.results.dvdl nplots = len(dvdl) fig, axs = plt.subplots(nrows=1, ncols=nplots) for i, component in enumerate(np.sort(dvdl.keys())): # stable plot order x, y, dy = [dvdl[component][k] for k in ("lambdas", "mean", "error")] iplot = i ax = axs[i] energy = self.results.DeltaA[component] label = r"$\Delta A^{\rm{%s}}_{\rm{%s}} = %.2f\pm%.2f$ kJ/mol" % ( component, self.solvent_type, energy.value, energy.error, ) ax.errorbar(x, y, yerr=dy, label=label, **kwargs) ax.set_xlabel(r"$\lambda$") ax.legend(loc="best") ax.set_xlim(-0.05, 1.05) axs[0].set_ylabel(r"$dV/d\lambda$ in kJ/mol") fig.suptitle( r"Free energy difference $\Delta A^{0}_{\rm{%s}}$ for %s: $%.2f\pm%.2f$ kJ/mol" % ( ( self.solvent_type, self.molecule, ) + self.results.DeltaA.Gibbs.astuple() ) ) fig.savefig("DeltaA.png") plt.close() return fig
[docs] def qsub(self, script=None): """Submit a batch script locally. If *script* == ``None`` then take the first script (works well if only one template was provided). """ from gromacs.qsub import relpath def choose_script_from(scripts): if script is None: s = scripts[0] elif script in scripts: s = script else: errmsg = "No script matching %(script)r in %(scripts)r" % vars() logger.error(errmsg) raise ValueError(errmsg) return s with in_dir(self.dirname, create=False): for component, scripts in self.scripts.items(): s = relpath( choose_script_from(scripts), self.dirname ) # relative to dirname cmd = ["qsub", s] logger.debug("[%s] submitting locally: %s", " ".join(cmd), component) rc = call(cmd) if rc != 0: errmsg = "submitting job %(s)r failed with rc=%(rc)d" % vars() logger.error(errmsg) raise OSError(errmsg) logger.info( "[%r] Submitted jobs locally for %r", self.dirname, self.scripts.keys() )
def __repr__(self): return "%s(filename=%r)" % (self.__class__.__name__, self.filename)
[docs]class Ghyd(Gsolv): """Sets up and analyses MD to obtain the hydration free energy of a solute.""" solvent_default = "water" dirname_default = os.path.join(Gsolv.topdir_default, solvent_default)
[docs]class Gcyclo(Gsolv): solvent_default = "cyclohexane" dirname_default = os.path.join(Gsolv.topdir_default, solvent_default)
[docs]class Goct(Gsolv): """Sets up and analyses MD to obtain the solvation free energy of a solute in octanol. The *coulomb* lambda schedule is enhanced compared to water as the initial part of the dV/dl curve is quite sensitive. By adding two additional points we hope to reduce the overall error on the dis-charging free energy. """ solvent_default = "octanol" dirname_default = os.path.join(Gsolv.topdir_default, solvent_default) schedules = { "coulomb": FEPschedule( name="coulomb", description="dis-charging vdw+q --> vdw", label="Coul", couple_lambda0="vdw-q", couple_lambda1="vdw", sc_alpha=0, # linear scaling for coulomb # lambdas=[0, 0.25, 0.5, 0.75, 1.0], # default lambdas=[0, 0.125, 0.25, 0.375, 0.5, 0.75, 1.0], # +0.125, 0.375 enhanced ), "vdw": FEPschedule( name="vdw", description="decoupling vdw --> none", label="VDW", couple_lambda0="vdw", couple_lambda1="none", sc_alpha=0.5, sc_power=1, sc_sigma=0.3, # recommended values lambdas=[ 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, # defaults 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1, ], ), }
class Gwoct(Goct): """Sets up and analyses MD to obtain the solvation free energy of a solute in wet octanol. The *coulomb* lambda schedule is enhanced compared to water as the initial part of the dV/dl curve is quite sensitive. By adding two additional points we hope to reduce the overall error on the dis-charging free energy. """ solvent_default = "wetoctanol" dirname_default = os.path.join(Gsolv.topdir_default, solvent_default)
[docs]class Gtol(Gsolv): """Sets up and analyses MD to obtain the solvation free energy of a solute in toluene.""" solvent_default = "toluene" dirname_default = os.path.join(Gsolv.topdir_default, solvent_default)
def p_transfer(G1, G2, **kwargs): """Compute partition coefficient from two :class:`Gsolv` objects. The order determines the direction of transfer: G1 --> G2. E.g.: ``G1 = Gwat`` and ``G2 = Goct`` will compute a water-octanol transfer free energy and a water-octanol partition coefficient. transfer free energy from water into octanol:: DeltaDeltaG0 = DeltaG0_oct - DeltaG0_water water-octanol partition coefficient:: log P_oct/wat = log [X]_oct/[X]_wat .. note:: The partition coefficient corresponds to a transfer in the *opposite* direction compared to the free energy difference computed here. For instance, ``log P_oct/water`` is related to the free energy of transfer from octanol to water, ``Delta G_water - Delta G_oct``. ``log P_oct/water < 0`` means that the water phase is preferred, i.e. the probability to find the solute in octanol is *smaller* than the probability to find it in the water phase, or in other words, the hydration free energy ``Delta G_water`` is smaller (more negative) than the octanol solvation free energy ``Delta G_oct``. :Arguments: *G1*, *G2* *G1* and *G2* should be two :class:`Gsolv` instances, order matters. *force* force rereading of data files even if some data were already stored [False] *stride* analyze every *stride*-th datapoint in the dV/dlambda files *start* Start frame of data analyzed in every fep window. *stop* Stop frame of data analyzed in every fep window. *SI* Set to ``True`` if you want to perform statistical inefficiency to preprocess the data. *estimator* Set to ``alchemlyb`` if you want to use alchemlyb estimators, or ``mdpow`` if you want the default TI method. *method* Use `TI`, `BAR` or `MBAR` method in `alchemlyb`, or `TI` in `mdpow`. :Returns: (transfer free energy, log10 of the water-octanol partition coefficient = log Pow) """ kwargs.setdefault("force", False) estimator = kwargs.pop("estimator", "alchemlyb") if not estimator in ("mdpow", "alchemlyb"): errmsg = ( "estimator = %r is not supported, must be 'mdpow' or 'alchemlyb'" % estimator ) logger.error(errmsg) raise ValueError(errmsg) if G1.molecule != G2.molecule: raise ValueError("The two simulations were done for different molecules.") if G1.Temperature != G2.Temperature: raise ValueError("The two simulations were done at different temperatures.") logger.info( "[%s] transfer free energy %s --> %s calculation", G1.molecule, G1.solvent_type, G2.solvent_type, ) for G in (G1, G2): G_kwargs = kwargs.copy() # for fep files generated with old code which doesn't have these attributes if not hasattr(G, "start"): G.start = G_kwargs.pop("start", 0) if not hasattr(G, "stop"): G.stop = G_kwargs.pop("stop", None) if not hasattr(G, "SI"): G_kwargs.setdefault("SI", True) else: G_kwargs.setdefault("SI", G.SI) # for this version. use the method given instead of the one in the input cfg file G.method = G_kwargs.pop("method", "MBAR") if estimator == "mdpow": if G.method != "TI": errmsg = ( "Method %s is not implemented in MDPOW, use estimator='alchemlyb'" % G.method ) logger.error(errmsg) raise ValueError(errmsg) logger.info("The solvent is %s .", G.solvent_type) if kwargs["force"] or "Gibbs" not in G.results.DeltaA: # write out the settings when the analysis is performed logger.info("Estimator is %s.", estimator) logger.info("Free energy calculation method is %s.", G.method) if estimator == "mdpow": G_kwargs.pop("SI", None) # G.analyze() does not know SI G.analyze(**G_kwargs) elif estimator == "alchemlyb": logger.info( "Statistical inefficiency analysis will %s be performed." % ("" if G_kwargs["SI"] else "not") ) G.analyze_alchemlyb(**G_kwargs) else: logger.info("Using already calculated free energy DeltaA") # x.Gibbs are QuantityWithError so they do error propagation transferFE = G2.results.DeltaA.Gibbs - G1.results.DeltaA.Gibbs # note minus sign, with our convention of the free energy difference to be # opposite to the partition coefficient. logPow = -transferFE / (kBOLTZ * G1.Temperature) * np.log10(np.e) molecule = G1.molecule # lower case initials, in reverse order of transfer, e.g. # water -> octanol: P_ow # water -> cyclohexane: P_cw # water -> toluene: P_tw coefficient = "P_{0}{1}".format( G2.solvent_type.lower()[0], G1.solvent_type.lower()[0] ) logger.info("[%s] Values at T = %g K", molecule, G1.Temperature) logger.info( "[%s] Free energy of transfer %s --> %s: %.3f (%.3f) kJ/mol", molecule, G1.solvent_type, G2.solvent_type, transferFE.value, transferFE.error, ) logger.info( "[%s] log %s: %.3f (%.3f)", molecule, coefficient, logPow.value, logPow.error ) return transferFE, logPow
[docs]def pOW(G1, G2, **kwargs): """Compute water-octanol partition coefficient from two :class:`Gsolv` objects. transfer free energy from water into octanol:: DeltaDeltaG0 = DeltaG0_oct - DeltaG0_water octanol/water partition coefficient:: log P_oct/wat = log [X]_oct/[X]_wat :Arguments: *G1*, *G2* *G1* and *G2* should be a :class:`Ghyd` and a :class:`Goct` instance, but order does not matter *force* force rereading of data files even if some data were already stored [False] *stride* analyze every *stride*-th datapoint in the dV/dlambda files *start* Start frame of data analyzed in every fep window. *stop* Stop frame of data analyzed in every fep window. *SI* Set to ``True`` if you want to perform statistical inefficiency to preprocess the data. *estimator* Set to ``alchemlyb`` if you want to use alchemlyb estimators, or ``mdpow`` if you want the default TI method. *method* Use `TI`, `BAR` or `MBAR` method in `alchemlyb`, or `TI` in `mdpow`. :Returns: (transfer free energy, log10 of the octanol/water partition coefficient = log Pow) """ if G1.solvent_type == "water" and G2.solvent_type == "octanol": args = (G1, G2) elif G1.solvent_type == "octanol" and G2.solvent_type == "water": args = (G2, G1) else: msg = "For pOW need water and octanol simulations but instead got {0} and {1}".format( G1.solvent_type, G2.solvent_type ) logger.error(msg) raise ValueError(msg) return p_transfer(*args, **kwargs)
[docs]def pCW(G1, G2, **kwargs): """Compute water-cyclohexane partition coefficient from two :class:`Gsolv` objects. transfer free energy from water into cyclohexane:: DeltaDeltaG0 = DeltaG0_cyclohexane - DeltaG0_water cyclohexane/water partition coefficient:: log P_CW = log [X]_cyclohexane/[X]_water :Arguments: *G1*, *G2* *G1* and *G2* should be a :class:`Ghyd` and a :class:`Gcyclo` instance, but order does not matter *force* force rereading of data files even if some data were already stored [False] *stride* analyze every *stride*-th datapoint in the dV/dlambda files *start* Start frame of data analyzed in every fep window. *stop* Stop frame of data analyzed in every fep window. *SI* Set to ``True`` if you want to perform statistical inefficiency to preprocess the data. *estimator* Set to ``alchemlyb`` if you want to use alchemlyb estimators, or ``mdpow`` if you want the default TI method. *method* Use `TI`, `BAR` or `MBAR` method in `alchemlyb`, or `TI` in `mdpow`. :Returns: (transfer free energy, log10 of the cyclohexane/water partition coefficient = log Pcw) """ if G1.solvent_type == "water" and G2.solvent_type == "cyclohexane": args = (G1, G2) elif G1.solvent_type == "cyclohexane" and G2.solvent_type == "water": args = (G2, G1) else: msg = "For pCW need water and cyclohexane simulations but instead got {0} and {1}".format( G1.solvent_type, G2.solvent_type ) logger.error(msg) raise ValueError(msg) return p_transfer(*args, **kwargs)
[docs]def pTW(G1, G2, **kwargs): """Compute water-toluene partition coefficient from two :class:`Gsolv` objects. transfer free energy from water into toluene:: DeltaDeltaG0 = DeltaG0_tol - DeltaG0_water toluene/water partition coefficient:: log P_tol/wat = log [X]_tol/[X]_wat :Arguments: *G1*, *G2* *G1* and *G2* should be a :class:`Ghyd` and a :class:`Gtol` instance, but order does not matter *force* force rereading of data files even if some data were already stored [False] *stride* analyze every *stride*-th datapoint in the dV/dlambda files *start* Start frame of data analyzed in every fep window. *stop* Stop frame of data analyzed in every fep window. *SI* Set to ``True`` if you want to perform statistical inefficiency to preprocess the data. *estimator* Set to ``alchemlyb`` if you want to use alchemlyb estimators, or ``mdpow`` if you want the default TI method. *method* Use `TI`, `BAR` or `MBAR` method in `alchemlyb`, or `TI` in `mdpow`. :Returns: (transfer free energy, log10 of the toluene/water partition coefficient = log Ptw) """ if G1.solvent_type == "water" and G2.solvent_type == "toluene": args = (G1, G2) elif G1.solvent_type == "toluene" and G2.solvent_type == "water": args = (G2, G1) else: msg = "For pTW need water and toluene simulations but instead got {0} and {1}".format( G1.solvent_type, G2.solvent_type ) logger.error(msg) raise ValueError(msg) return p_transfer(*args, **kwargs)