# MDPOW: ensemble.py
# 2021 Alia Lescoulie
import os
import errno
from typing import Optional, List
import numpy as np
import MDAnalysis as mda
from MDAnalysis.lib.log import ProgressBar
from MDAnalysis.exceptions import (
FileFormatWarning,
NoDataError,
MissingDataWarning,
SelectionError,
)
from gromacs.utilities import in_dir
import logging
logger = logging.getLogger("mdpow._ensemble")
[docs]class Ensemble(object):
"""Collection of related :class:`MDAnalysis.Universe <MDAnalysis.core.groups.universe.Universe>`
objects.
Stores systems produced by running mdpow-fep organized
by solvent, interaction, and lambda.
Given a mdpow simulation directory will load the MD
simulation files with the directory structure as keys.
:Keywords:
*dirname*
Molecule Simulation directory. Loads simulation files present in
lambda directories into the new instance. With this method for
generating an :class:`~mdpow.analysis.ensemble.Ensemble` the
lambda directories are explored and
:meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir`
searches for .gro, .gro.b2z, .gro.gz, and .tpr files for topology,
and .xtc files for trajectory. It will default to using the tpr file
available.
*solvents*
Solvents from directory given to the new instance. Default
:code:`solvents=('water', 'octanol')`
*topology_paths*
Specifies topologies used in loading simulated systems. Given
with a dictionary with keys-value pair for each solvent and
its respective topology path.
*interactions*
Interactions from directory given to the instance. Default
:code:`interactions=('Coulomb', 'VDW')`
*universe_kwargs*
`Keywords arguments <https://docs.mdanalysis.org/stable/documentation_pages/core/universe>`_
for loading :class:`MDAnalysis.Universe <MDAnalysis.core.groups.universe.Universe>`
objects from MDPOW files in :code:`dirname` argument directory when creating an
:class:`~mdpow.analysis.ensemble.Ensemble` .
.. rubric:: Examples
Typical workflow for MDPOW directory::
ens = Ensemble(dirname='molecule')
Typical workflow for adding universes individually::
ens = Ensemble()
u = mda.Universe(md.gro', 'md.xtc')
ens.add_system(u)
Topology paths can be specified when defining the _ensemble
by giving the paths to each solvent topology in a dictionary
with the topology_paths argument::
ens = Ensemble(dirname='molecule', topology_paths={'water': water_path,
'octanol': octanol_path}
Interactions can also be specified when initializing the with
a list using the interactions argument::
ens = Ensemble(dirname='molecule', interactions=['Coulomb']
.. versionadded:: 0.8.0
"""
def __init__(
self,
dirname=None,
solvents=("octanol", "water"),
topology_paths=None,
interactions=("Coulomb", "VDW"),
**universe_kwargs,
):
self.top_dict = topology_paths
self._num_systems = 0
self._ensemble = {}
self._keys = []
self._solvents = solvents
self._interactions = interactions
self.unv_kwargs = universe_kwargs
if dirname is None:
return
if not os.path.exists(dirname):
logger.error(f"Directory {dirname} does not exist")
raise FileNotFoundError(errno.ENOENT, "Directory does not" "exist", dirname)
self._ensemble_dir = dirname
self._build_ensemble()
def __repr__(self):
return f"<Ensemble Containing {self._num_systems} System>"
def __len__(self):
return self._num_systems
def __getitem__(self, index):
"""Allows dictionary like indexing"""
return self._ensemble[index]
[docs] @staticmethod
def _load_universe_from_dir(
solv_dir=None, **universe_kwargs
) -> Optional[mda.Universe]:
"""Loads system simulation files in directory into an
:class:`MDAnalysis.Universe <MDAnalysis.core.groups.universe.Universe>`
If multiple topologies are found it will default to using
the .tpr file. If more than one trajectory is present they
will be sorted alphabetically and passed into the
:class:`MDAnalysis.Universe <MDAnalysis.core.groups.universe.Universe>`
This method is run automatically by :meth:`~mdpow/analysis/ensemble.Ensemble._build_ensemble`
when initializing the class using the :code:`dirname` argument.
"""
def _sort_trajectories(trajectories: list) -> list:
"""Sorts list of trajectory files alphabetically and makes paths
absolute"""
sorted(trajectories)
return [os.path.abspath(t) for t in trajectories]
def _sort_topologies(topologies: list) -> list:
"""sorts list of trajectory files with .tpr first"""
tops = []
logger.info(
"If more than one topology is present the tpr will be the one used"
)
for i in range(len(topologies)):
f = topologies[i]
if f.endswith(".tpr"):
topologies.pop(i)
tops = [f] + topologies
break
return tops
cur_dir = os.listdir(os.curdir)
trj = []
top = []
if solv_dir is not None:
# if top is specified in kwargs, saved to list
top = [solv_dir]
for file in cur_dir:
if file.endswith(".xtc"):
# Saving trajectory directories
trj.append(file)
elif (
file.endswith("gro")
or file.endswith(".tpr")
or file.endswith("gro.bz2")
or file.endswith("gro.gz")
) and solv_dir is None:
# Saving topology directories
top.append(file)
if len(top) == 0 or len(trj) == 0:
logger.warning("No MD files detected in %s", os.curdir)
return
trj = _sort_trajectories(trj)
if len(top) > 1:
top = _sort_topologies(top)
try:
return mda.Universe(os.path.abspath(top[0]), trj, **universe_kwargs)
except (
ValueError,
FileFormatWarning,
NoDataError,
MissingDataWarning,
OSError,
) as err:
logger.error(f"{err} raised while loading {top[0]} {trj} in dir {cur_dir}")
raise NoDataError
[docs] def keys(self):
"""Returns list of system keys"""
return self._keys
[docs] def _build_ensemble(self):
"""Finds simulation files genderated by MDPOW and attempts to build
:class:`MDAnalysis.Universe <MDAnalysis.core.groups.universe.Universe>`
in the lambda directories.
Run if :code:`dirname` argument is given when initializing the class.
First enters FEP directory, then traverses solvent and interaction
directories to search lambda directories for system files."""
fep_dir = os.path.join(self._ensemble_dir, "FEP")
solv_top_path = None
for (
solvent
) in self._solvents: # Ugly set of loops, may have to find way to clean up
if self.top_dict is not None:
solv_top_path = self.top_dict[solvent]
for dirs in self._interactions: # Attribute folder names
int_dir = os.path.join(fep_dir, solvent, dirs)
with in_dir(int_dir, create=False): # Entering attribute folders
logger.info("Searching %s directory for systems", os.curdir)
files = os.listdir(os.curdir)
for file in sorted(files): # Traversing lambda directories
if os.path.isdir(file):
with in_dir(file, create=False):
u = self._load_universe_from_dir(
solv_dir=solv_top_path, **self.unv_kwargs
)
if u is None:
logger.warning(f"No system loaded in {file}")
else:
self.add_system((solvent, dirs, file), u)
[docs] def add_system(self, key, universe: mda.Universe):
"""Adds system from universe object for trajectory and topology files
Existing mda.Universe object or trajectory and topology path. Ensure
that paths are set to absolute when creating the universe."""
self._ensemble[key] = universe
self._keys.append(key)
self._num_systems += 1
[docs] def pop(self, key):
"""Removes and returns system at specified key.
Logs if KeyError is raised."""
system = self._ensemble.pop(key)
self._num_systems -= 1
return system
[docs] def select_atoms(self, *args, **kwargs):
"""Returns :class:`~mdpow.analysis.ensemble.EnsembleAtomGroup` containing selections
from the :class:`~mdpow.analysis.ensemble.Ensemble`
Uses the same
`selection commands <https://docs.mdanalysis.org/stable/documentation_pages/selections.html>`_
as MDAnalysis, and has the same keys as the :class:`~mdpow.analysis.ensemble.Ensemble`
"""
selections = {}
for key in self.keys():
try:
ag = self[key].select_atoms(*args, **kwargs)
except SelectionError as err:
logger.error(
"%r on system %r with selection settings %r %r",
err,
key,
args,
kwargs,
)
raise
else:
selections[key] = ag
return EnsembleAtomGroup(selections, ensemble=self)
[docs] def select_systems(
self,
keys=None,
solvents=None,
interactions=None,
lambdas=None,
lambda_range=None,
):
"""
Select specific subset of systems and returns them in an Ensemble.
This can be accomplished in two ways, by specific keys, or by
specifying the desired system attributes solvents, interactions and
lambdas. All arguments are stored in list form.
:keywords:
*keys*
System keys from :class:`~mdpow.analysis.ensemble.Ensemble`
to be returned.
*solvents*
Solvents from :class:`~mdpow.analysis.ensemble.Ensemble`
to be returned.
*interactions*
Interactions from :class:`~mdpow.analysis.ensemble.Ensemble`
to be returned
*lambdas*
Specific lambdas to be returned
*lambda_range*
Range of lambda to be returned
.. rubric:: Examples
Specific key workflow example::
Ens = Ensemble(dirname='Mol')
w_v_0_25 = Ens.select_systems(keys=[('water', 'VDW', '0000'),
('water', 'VDW', '0025')]
For the system attributes workflow there are two ways of selecting lambdas,
the :param lambdas: keyword saves specific lambdas or the :param lambda_range:
which saves the lambdas that fall within the given range.
Specific lambdas example::
Ens = Ensemble(dirname='Mol')
w_v_0_25 = Ens.select_systems(solvents=['water'], interactions=['VDW'],
lambdas=['0000', '0025'])
Range of lambdas example::
Ens = Ensemble(dirname='Mol')
w_v = Ens.select_systems(solvents=['water'], interactions=['VDW'],
lambda_range=[0, 1])
"""
new_ens = Ensemble()
new_key = []
if keys is not None:
# Selection by giving keys
new_key = keys
elif solvents is not None:
# Selection by attributes
for s in solvents:
if interactions is not None:
for i in interactions:
if lambdas is not None:
# Selecting specific lambdas
for l in lambdas:
new_key.append((s, i, l))
elif lambda_range is not None:
# Selecting range of lambdas
for k in self.keys():
if (
lambda_range[0]
<= int(k[2]) / 1000
<= lambda_range[1]
):
new_key.append((s, i, k[2]))
for k in new_key:
logger.info("adding system %r to ensemble", k)
new_ens.add_system(k, universe=self[k])
new_ens._ensemble_dir = self._ensemble_dir
return new_ens
[docs]class EnsembleAtomGroup(object):
"""Group for storing selections from :class:`~mdpow.analysis.ensemble.Ensemble`
objects made using the :meth:`~mdpow.analysis.ensemble.Ensemble.select_atoms` method.
:class:`~mdpow.analysis.ensemble.EnsembleAtomGroup` is not set up for manual initialization,
they should be obtained by selecting atoms from an existing object.
"""
def __init__(self, group_dict: dict, ensemble: Ensemble):
self._groups = group_dict
self._ensemble = ensemble
self._keys = group_dict.keys()
def __getitem__(self, index):
return self._groups[index]
def __eq__(self, other):
if self.keys() == other.keys():
return all(self[k] == other[k] for k in self.keys())
return False
def __len__(self):
return len(self.keys())
[docs] def keys(self):
"""List of keys to specific atom groups in the system"""
return self._keys
[docs] def positions(self, keys=None):
"""Returns the positions of the keys of the selected atoms.
If no keys are specified positions for all keys are returned"""
positions = {}
if not keys is None:
for k in keys:
positions[k] = self._groups[k].positions
else:
for k in self.keys():
positions[k] = self[k].positions
return positions
[docs] def select_atoms(self, *args, **kwargs):
"""Returns :class:`~mdpow.analysis.ensemble.EnsembleAtomGroup` containing selections
from the :class:`~mdpow.analysis.ensemble.EnsembleAtomGroup`
Uses the same
`selection commands <https://docs.mdanalysis.org/stable/documentation_pages/selections.html>`_
as MDAnalysis, and has the same keys as :class:`~mdpow.analysis.ensemble.EnsembleAtomGroup`
"""
selections = {}
for key in self.keys():
try:
ag = self[key].select_atoms(*args, **kwargs)
except SelectionError as err:
logger.error(
"%r on system %r with selection settings %r %r",
err,
key,
args,
kwargs,
)
raise
else:
selections[key] = ag
return EnsembleAtomGroup(selections, ensemble=self._ensemble)
@property
def ensemble(self):
"""Returns the ensemble of the EnsembleAtomGroup"""
return self._ensemble
[docs]class EnsembleAnalysis(object):
"""Base class for running multi-system analyses
The class is designed based on the `AnalysisBase
class in MDAnalysis <https://docs.mdanalysis.org/stable/documentation_pages/analysis/base.html>`_
and is a template for creating multi-universe multi-frame analyses using the
:class:`~mdpow.analysis.ensemble.Ensemble` object
:Keywords:
*ensemble*
The :class:`~mdpow.analysis.ensemble.Ensemble` being analyzed in the class
.. rubric:: Example Analysis
Dihedral Analysis Demonstration::
class DihedralAnalysis(mdpow.ensemble.EnsembleAnalysis):
def __init__(self, DihedralEnsembleGroup):
super(DihedralAnalysis, self).__init__(DihedralEnsembleGroup.ensemble)
self._sel = DihedralEnsembleGroup
def _prepare_ensemble(self):
self.result_dict = {}
for s in ['water', 'octanol']:
self.result_dict[s] = {'Coulomb': {},
'VDW': {}}
for key in self._sel.group_keys():
self.result_dict[key[0]][key[1]][key[2]] = None
def _prepare_universe(self):
self.angle_dict = {'angle': None,
'time': None}
self.angles = []
def _single_frame(self):
angle = calc_dihedrals(self._sel[self._key].positions[0], self._sel[self._key].positions[1],
self._sel[self._key].positions[2], self._sel[self._key].positions[3])
self.angles.append(angle)
def _conclude_universe(self):
self.angle_dict['time'] = self.times
self.angle_dict['angle'] = self.angles
self.result_dict[self._key[0]][self._key[1]][self._key[2]] = self.angle_dict
def _conclude_ensemble(self):
self.results = pd.DataFrame(data=self.result_dict)
D = DihedralAnalysis.run(start=0 stop=10, step=1)
"""
def __init__(self, ensemble=None):
self._ensemble = ensemble
def _setup_system(self, key, start=None, stop=None, step=None):
self._system = self._ensemble[key]
self._key = key
self.start = start
self.stop = stop
self.step = step
self._setup_frames(self._system.trajectory)
def _setup_frames(self, trajectory):
self._trajectory = trajectory
start, stop, step = trajectory.check_slice_indices(
self.start, self.stop, self.step
)
self.n_frames = len(range(start, stop, step))
self.frames = np.zeros(self.n_frames, dtype=int)
self.times = np.zeros(self.n_frames)
[docs] def _single_universe(self):
"""Calculations on a single :class:`MDAnalysis.Universe
<MDAnalysis.core.groups.universe.Universe>` object.
Run on each :class:`MDAnalysis.Universe
<MDAnalysis.core.groups.universe.Universe>`
in the :class:`~mdpow.analysis.ensemble.Ensemble`
during when :meth:`run` in called.
:exc:`NotImplementedError` will detect whether
:meth:`~EnsembleAnalysis._single_universe`
or :meth:`~EnsembleAnalysis._single_frame`
should be implemented, based on which is defined
in the :class:`~mdpow.analysis.ensemble.EnsembleAnalysis`.
"""
raise NotImplementedError
[docs] def _single_frame(self):
"""Calculate data from a single frame of trajectory.
Called on each frame for each
:class:`MDAnalysis.Universe <MDAnalysis.core.groups.universe.Universe>`
in the :class:`~mdpow.analysis.ensemble.Ensemble`.
:exc:`NotImplementedError` will detect whether
:meth:`~EnsembleAnalysis._single_universe`
or :meth:`~EnsembleAnalysis._single_frame`
should be implemented, based on which is defined
in the :class:`~mdpow.analysis.ensemble.EnsembleAnalysis`.
"""
raise NotImplementedError
[docs] def _prepare_ensemble(self):
"""For establishing data structures used in running
analysis on the entire ensemble.
Data structures will not be overwritten upon moving to
next system in ensemble.
"""
pass # pragma: no cover
[docs] def _prepare_universe(self):
"""For establishing data structures used in running
analysis on each trajectory in ensemble
Data structures will be overwritten between upon after
each trajectory has been run
"""
pass # pragma: no cover
[docs] def _conclude_universe(self):
"""Run after each trajectory is finished"""
pass # pragma: no cover
[docs] def _conclude_ensemble(self):
"""Run after all trajectories in ensemble are finished"""
pass # pragma: no cover
[docs] def run(self, start=None, stop=None, step=None):
"""Runs :meth:`~EnsembleAnalysis._single_universe`
on each system or :meth:`~EnsembleAnalysis._single_frame`
on each frame in the system.
First iterates through keys of ensemble, then runs
:meth:`~EnsembleAnalysis._setup_system`which defines
the system and trajectory. Then iterates over each
system universe or trajectory frames of each universe
as defined by :meth:`~EnsembleAnalysis._single_universe`
or :meth:`~EnsembleAnalysis._single_frame`.
"""
logger.info("Setting up systems")
self._prepare_ensemble()
for self._key in ProgressBar(self._ensemble.keys(), verbose=True):
self._setup_system(self._key, start=start, stop=stop, step=step)
try:
self._single_universe()
except NotImplementedError:
for i, ts in enumerate(
ProgressBar(
self._trajectory[self.start : self.stop : self.step],
verbose=True,
postfix=f"running system {self._key}",
)
):
self._frame_index = i
self._ts = ts
self.frames[i] = ts.frame
self.times[i] = ts.time
self._single_frame()
logger.info("Moving to next universe")
logger.info("Finishing up")
self._conclude_ensemble()
return self
[docs] @staticmethod
def check_groups_from_common_ensemble(groups: List[EnsembleAtomGroup]):
"""Checks if inputted list of
:class:`~mdpow.analysis.ensemble.EnsembleAtomGroup` originate from
the same :class:`~mdpow.analysis.ensemble.Ensemble`
Checks every :class:`~mdpow.analysis.ensemble.EnsembleAtomGroup` in
list to determine if their :meth:`ensemble` references the same object
in memory. If two :class:`~mdpow.analysis.ensemble.EnsembleAtomGroup`
object don't have a common :class:`~mdpow.analysis.ensemble.Ensemble`
:class:`ValueError` is raised."""
for i in range(len(groups) - 1):
for j in range(i + 1, len(groups)):
# Checking if EnsembleAtomGroup.ensemble references same object in memory
if groups[i].ensemble is not groups[j].ensemble:
msg = (
"Dihedral selections from different Ensembles, "
"ensure that all EnsembleAtomGroups are created "
"from the same Ensemble. "
f"mismatch: group[{i}].ensemble != group[{j}].ensemble"
)
logger.error(msg)
raise ValueError(msg)