Source code for mdpow.workflows.dihedrals

# MDPOW: dihedrals.py
# 2022 Cade Duckworth

"""
:mod:`mdpow.workflows.dihedrals` --- Automation for :class:`DihedralAnalysis`
==============================================================================

:mod:`~mdpow.workflows.dihedrals` module provides functions for automated
workflows that encompass :class:`~mdpow.analysis.dihedral.DihedralAnalysis`.
See each function for requirements and examples.

Most functions can be used as standalone, individually, or in combination
depending on the desired results. Details of the completely automated workflow
are discussed under :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`.

Atom indices obtained by :func:`get_atom_indices` are 0-based,
atom index labels on the molecule in the plots are 0-based,
but atom `names` in plots and file names are 1-based.

.. autodata:: SOLVENTS_DEFAULT
    :annotation: = ('water', 'octanol')
.. autodata:: INTERACTIONS_DEFAULT
    :annotation: = ('Coulomb', 'VDW')
.. autodata:: SMARTS_DEFAULT
    :annotation: = [!#1]~[!$(*#*)&!D1]-!@[!$(*#*)&!D1]~[!#1]
.. autodata:: PLOT_WIDTH_DEFAULT
    :annotation: = 190
.. autofunction:: automated_dihedral_analysis
.. autofunction:: build_universe
.. autofunction:: rdkit_conversion
.. autofunction:: get_atom_indices
.. autofunction:: get_bond_indices
.. autofunction:: get_dihedral_groups
.. autofunction:: get_paired_indices
.. autofunction:: dihedral_groups_ensemble
.. autofunction:: save_df
.. autofunction:: periodic_angle_padding
.. autofunction:: dihedral_violins
.. autofunction:: build_svg
.. autofunction:: plot_dihedral_violins

"""

import os
import logging
import pathlib

import numpy as np
import pandas as pd
import MDAnalysis as mda
from MDAnalysis.topology.guessers import guess_types
from rdkit import Chem
from rdkit.Chem import rdCoordGen
from rdkit.Chem.Draw import rdMolDraw2D
import matplotlib.pyplot as plt
import seaborn as sns
import pypdf
import cairosvg
import svgutils
import svgutils.compose
import svgutils.transform

from .base import guess_elements
from ..analysis import ensemble, dihedral

logger = logging.getLogger("mdpow.workflows.dihedrals")

SOLVENTS_DEFAULT = ("water", "octanol")
"""Default solvents are water and octanol:

    * must match solvents used in project directory
    * one or two solvents can be specified
    * current solvents supported,

   .. seealso:: :mod:`mdpow.forcefields`

"""

INTERACTIONS_DEFAULT = ("Coulomb", "VDW")
"""Default interactions set to Coulomb and VDW:

    * default values should not be changed
    * order should not be changed

"""

SMARTS_DEFAULT = "[!#1]~[!$(*#*)&!D1]-!@[!$(*#*)&!D1]~[!#1]"
"""Default SMARTS string to identify relevant dihedral atom groups:

     * ``[!#1]`` : any atom, not Hydrogen
     * ``~``  : any bond
     * ``[!$(*#*)&!D1]`` : any atom that is not part of linear triple
       bond and not atom with 1 explicit bond
     * ``-!@`` : single bond that is not ring bond
     * ``[!$(*#*)&!D1]-!@[!$(*#*)&!D1]`` : the central portion selects
       two atoms that are not involved in a triple bond and are not terminal,
       that are connected by a single, non-ring bond
     * ``[!#1]~` or `~[!#1]`` : the first and last portion specify any bond,
       to any atom that is not hydrogen

"""

PLOT_WIDTH_DEFAULT = 190
"""Plot width (`plot_pdf_width`) should be provided in millimeters (mm),
   and is converted to pixels (px) for use with :mod:`cairosvg`.

   conversion factor: 1 mm = 3.7795275591 px
   default value: 190 mm = 718.110236229 pixels
"""


[docs]def build_universe(dirname, solvents=SOLVENTS_DEFAULT): """Builds :class:`~MDAnalysis.core.universe.Universe` from the ``./Coulomb/0000`` topology and trajectory of the project for the first solvent specified. Output used by :func:`~mdpow.workflows.dihedrals.rdkit_conversion` and :func:`~mdpow.workflows.dihedrals.get_atom_indices` to obtain atom indices for each dihedral atom group. :keywords: *dirname* Molecule Simulation directory. Loads simulation files present in lambda directories into the new instance. With this method for generating an :class:`~mdpow.analysis.ensemble.Ensemble` the lambda directories are explored and :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, and .xtc files for trajectory. It will default to using the tpr file available. *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. Normally takes a two-tuple, but analysis is compatible with single solvent selections. Single solvent analyses will result in a figure with fully filled violins for the single solvent. :returns: *u* :class:`~MDAnalysis.core.universe.Universe` object """ path = pathlib.Path(dirname) topology = path / f"FEP/{solvents[0]}/Coulomb/0000" / "md.tpr" trajectory = path / f"FEP/{solvents[0]}/Coulomb/0000" / "md.xtc" u = mda.Universe(str(topology), str(trajectory)) return u
[docs]def rdkit_conversion(u, resname): """Converts the solute, `resname`, of the :class:`~MDAnalysis.core.universe.Universe` to :class:`rdkit.Chem.rdchem.Mol` object for use with a SMARTS selection string to identify dihedral atom groups. Accepts :class:`~MDAnalysis.core.universe.Universe` object made with :func:`~mdpow.workflows.dihedrals.build_universe` and a `resname` as input. Uses `resname` to select the solute for conversion by :class:`~MDAnalysis.converters.RDKit.RDKitConverter` to :class:`rdkit.Chem.rdchem.Mol`, and will add element attributes for Hydrogen if not listed in the topology, using :func:`MDAnalysis.topology.guessers.guess_atom_element`. :keywords: *u* :class:`~MDAnalysis.core.universe.Universe` object *resname* `resname` for the molecule as defined in the topology and trajectory :returns: *tuple(mol, solute)* function call returns tuple, see below *mol* :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *solute* the :any:`MDAnalysis` `AtomGroup` for the solute """ try: solute = u.select_atoms(f"resname {resname}") mol = solute.convert_to("RDKIT") except AttributeError: guessed_elements = guess_elements(u.atoms) u.add_TopologyAttr("elements", guessed_elements) solute = u.select_atoms(f"resname {resname}") mol = solute.convert_to("RDKIT") rdCoordGen.AddCoords(mol) for atom in mol.GetAtoms(): atom.SetProp("atomNote", str(atom.GetIdx())) return mol, solute
[docs]def get_atom_indices(mol, SMARTS=SMARTS_DEFAULT): """Uses a SMARTS selection string to identify atom indices for relevant dihedral atom groups. Requires a :class:`rdkit.Chem.rdchem.Mol` object as input for the :data:`SMARTS_DEFAULT` kwarg to match patterns to and identify relevant dihedral atom groups. :keywords: *mol* :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *SMARTS* The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. :returns: *atom_indices* tuple of tuples of indices for each dihedral atom group """ pattern = Chem.MolFromSmarts(SMARTS) atom_indices = mol.GetSubstructMatches(pattern) return atom_indices
[docs]def get_bond_indices(mol, atom_indices): """From the :class:`rdkit.Chem.rdchem.Mol` object, uses `atom_indices` to determine the indices of the bonds between those atoms for each dihedral atom group. :keywords: *mol* :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *atom_indices* tuple of tuples of indices for each dihedral atom group :returns: *bond_indices* tuple of tuples of indices for the bonds in each dihedral atom group """ bonds = [] for atom_index in atom_indices: x = mol.GetBondBetweenAtoms(atom_index[0], atom_index[1]).GetIdx() y = mol.GetBondBetweenAtoms(atom_index[1], atom_index[2]).GetIdx() z = mol.GetBondBetweenAtoms(atom_index[2], atom_index[3]).GetIdx() bix = (x, y, z) bonds.append(bix) bond_indices = tuple(bonds) return bond_indices
[docs]def get_dihedral_groups(solute, atom_indices): """Uses the 0-based `atom_indices` of the relevant dihedral atom groups determined by :func:`~mdpow.workflows.dihedrals.get_atom_indices` and returns the 1-based index names for each atom in each group. Requires the `atom_indices` from :func:`~mdpow.workflows.dihedrals.get_atom_indices` to index the `solute` specified by :func:`~MDAnalysis.core.groups.select_atoms` and return an array of the names of each atom within its respective dihedral atom group as identified by the SMARTS selection string. :keywords: *solute* the :any:`MDAnalysis` `AtomGroup` for the solute *atom_indices* tuple of tuples of indices for each dihedral atom group :returns: *dihedral_groups* list of :func:`numpy.array` for atom names in each dihedral atom group """ # currently uses RDKit Mol object atom indices to retrieve # atom names from the MDAnalysis solute object # RDKit-MDAnalysis index consistency is currently tested dihedral_groups = [ solute.atoms[list(atom_index)].names for atom_index in atom_indices ] return dihedral_groups
[docs]def get_paired_indices(atom_indices, bond_indices, dihedral_groups): """Combines `atom_indices` and `bond_indices` in tuples to be paired with their respective dihedral atom groups. A dictionary is created with key-value pairs as follows: `atom_indices` and `bond_indices` are joined in a tuple as the value, with the key being the respective member of `dihedral_groups` to facilitate highlighting the relevant dihedral atom group when generating violin plots. As an example, `'C1-N2-O3-S4': ((0, 1, 2, 3), (0, 1, 2))`, would be one key-value pair in the dictionary. :keywords: *atom_indices* tuple of tuples of indices for each dihedral atom group *bond_indices* tuple of tuples of indices for the bonds in each dihedral atom group *dihedral_groups* list of :func:`numpy.array` for atom names in each dihedral atom group :returns: *name_index_pairs* dictionary with key-value pair for dihedral atom group, atom indices, and bond indices """ all_dgs = [f"{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}" for dg in dihedral_groups] name_index_pairs = {} name_index_pairs = { all_dgs[i]: (atom_indices[i], bond_indices[i]) for i in range(len(all_dgs)) } return name_index_pairs
[docs]def dihedral_groups_ensemble( dirname, atom_indices, solvents=SOLVENTS_DEFAULT, interactions=INTERACTIONS_DEFAULT, start=None, stop=None, step=None, ): """Creates one :class:`~mdpow.analysis.ensemble.Ensemble` for the MDPOW project and runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` for each dihedral atom group identified by the SMARTS selection string. .. seealso:: :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`, :class:`~mdpow.analysis.dihedral.DihedralAnalysis` :keywords: *dirname* Molecule Simulation directory. Loads simulation files present in lambda directories into the new instance. With this method for generating an :class:`~mdpow.analysis.ensemble.Ensemble` the lambda directories are explored and :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, and .xtc files for trajectory. It will default to using the tpr file available. *atom_indices* tuples of atom indices for dihedral atom groups .. seealso:: :func:`~mdpow.workflows.dihedrals.get_atom_indices`, :data:`SMARTS_DEFAULT` *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. Normally takes a two-tuple, but analysis is compatible with single solvent selections. Single solvent analyses will result in a figure with fully filled violins for the single solvent. *interactions* The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. *start, stop, step* arguments passed to :func:`~mdpow.analysis.ensemble.EnsembleAnalysis.run`, as parameters for iterating through the trajectories of the current ensemble .. seealso:: :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` :returns: *df* :class:`pandas.DataFrame` of :class:`~mdpow.analysis.dihedral.DihedralAnalysis` results, including all dihedral atom groups for molecule of current project """ dih_ens = ensemble.Ensemble( dirname=dirname, solvents=solvents, interactions=interactions ) indices = atom_indices all_dihedrals = [ dih_ens.select_atoms( f"index {i[0]}", f"index {i[1]}", f"index {i[2]}", f"index {i[3]}" ) for i in indices ] da = dihedral.DihedralAnalysis(all_dihedrals) da.run(start=start, stop=stop, step=step) df = da.results return df
[docs]def save_df(df, df_save_dir, resname, molname=None): """Takes a :class:`pandas.DataFrame` of results from :class:`~mdpow.analysis.dihedral.DihedralAnalysis` as input before padding the angles to optionaly save the raw data. Optionally saves results before padding the angles for periodicity and plotting dihedral angle frequencies as KDE violins with :func:`~mdpow.workflows.dihedrals.dihedral_violins`. Given a parent directory, creates subdirectory for molecule, saves fully sampled, unpadded results :class:`pandas.DataFrame` as a compressed csv file, default: .csv.bz2. :keywords: *df* :class:`pandas.DataFrame` of :class:`~mdpow.analysis.dihedral.DihedralAnalysis` results, including all dihedral atom groups for molecule of current project *df_save_dir* optional, path to the location to save results :class:`pandas.DataFrame` *resname* `resname` for the molecule as defined in the topology and trajectory *molname* molecule name to be used for labelling plots, if different from `resname` """ df = df.sort_values( by=["selection", "solvent", "interaction", "lambda"] ).reset_index(drop=True) if molname is None: molname = resname subdir = molname newdir = os.path.join(df_save_dir, subdir) os.mkdir(newdir) # time and compression level can be adjusted as kwargs df.to_csv(f"{newdir}/{molname}_full_df.csv.bz2", index=False, compression="bz2") logger.info(f"Results DataFrame saved as {newdir}/{molname}_full_df.csv.bz2")
[docs]def periodic_angle_padding(df, padding=45): """Pads the angles from the results :class:`~pandas.DataFrame` to maintain periodicity in the violin plots. Takes a :class:`pandas.DataFrame` of results from :class:`~mdpow.analysis.dihedral.DihedralAnalysis` or :func:`~mdpow.workflows.dihedrals.dihedral_groups_ensemble` as input and pads the angles to maintain periodicity for properly plotting dihedral angle frequencies as KDE violins with :func:`~mdpow.workflows.dihedrals.dihedral_violins` and :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins`. Creates two new :class:`pandas.DataFrame` based on the `padding` value specified, pads the angle values, concatenates all three :class:`pandas.DataFrame`, maintaining original data and adding padded values, and returns new augmented :class:`pandas.DataFrame`. :keywords: *df* :class:`pandas.DataFrame` of :class:`~mdpow.analysis.dihedral.DihedralAnalysis` results, including all dihedral atom groups for molecule of current project *padding* value in degrees to specify angle augmentation threshold default: 45 :returns: *df_aug* augmented results :class:`pandas.DataFrame` containing padded dihedral angles as specified by `padding` """ df1 = df[df.dihedral > 180 - padding].copy(deep=True) df1.dihedral -= 360 df2 = df[df.dihedral < -180 + padding].copy(deep=True) df2.dihedral += 360 df_aug = pd.concat([df1, df, df2]).reset_index(drop=True) return df_aug
[docs]def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): """Plots kernel density estimates (KDE) of dihedral angle frequencies for one dihedral atom group as violin plots, using as input the augmented :class:`pandas.DataFrame` from :func:`~mdpow.workflows.dihedrals.periodic_angle_padding`. Output is converted to SVG by :func:`~mdpow.workflows.dihedrals.build_svg` and final output is saved as PDF by :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins` :keywords: *df* augmented results :class:`pandas.DataFrame` from :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` *width* width of the violin element (>1 overlaps) default: 0.9 *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. Normally takes a two-tuple, but analysis is compatible with single solvent selections. Single solvent analyses will result in a figure with fully filled violins for the single solvent. *plot_title* generated by :func:`~mdpow.workflows.dihedrals.build_svg` using `molname`, `dihedral_groups`, `atom_indices`, and `interactions` in this order and format: f'{molname}, {name[0]} {a} | ''{col_name}' :returns: *g* returns a :class:`seaborn.FacetGrid` object containing a violin plot of the kernel density estimates (KDE) of the dihedral angle frequencies for each dihedral atom group identified by :data:`SMARTS_DEFAULT` """ df["lambda"] = df["lambda"].astype("float") / 1000 df = df.sort_values( by=["selection", "solvent", "interaction", "lambda"] ).reset_index(drop=True) width_ratios = [ len(df[df["interaction"] == "Coulomb"]["lambda"].unique()) + 1, len(df[df["interaction"] == "VDW"]["lambda"].unique()), len(df[df["interaction"] == "Coulomb"]["lambda"].unique()) - 1, ] # Usage in Jupyter causes matplotlib figure object output, not the completed figure # Upcoming fix in issue #260 assert ( 0 < len(solvents) < 3 ), "one or two solvents must be specified, otherwise SOLVENTS_DEFAULT is used" split = len(solvents) > 1 g = sns.catplot( data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", kind="violin", split=split, width=width, inner=None, cut=0, linewidth=0.5, hue_order=list(solvents), col_order=["Coulomb", "VDW", "Structure"], sharex=False, sharey=True, height=3.0, aspect=2.0, facet_kws={ "ylim": (-180, 180), "gridspec_kws": { "width_ratios": width_ratios, }, }, ) g.set_xlabels(r"$\lambda$") g.set_ylabels(r"dihedral angle $\phi$") g.despine(offset=5) axC = g.axes_dict["Coulomb"] axC.yaxis.set_major_locator(plt.matplotlib.ticker.MultipleLocator(60)) axC.yaxis.set_minor_locator(plt.matplotlib.ticker.MultipleLocator(30)) axC.yaxis.set_major_formatter( plt.matplotlib.ticker.FormatStrFormatter(r"$%g^\circ$") ) axV = g.axes_dict["VDW"] axV.yaxis.set_visible(False) axV.spines["left"].set_visible(False) axIM = g.axes_dict["Structure"] axIM.axis("off") g.set_titles(plot_title) return g
[docs]def build_svg( mol, molname, name_index_pairs, atom_group_selection, solvents=SOLVENTS_DEFAULT, width=0.9, ): """Converts and combines figure components into an SVG object to be converted and saved as a publication quality PDF. :keywords: *mol* :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *molname* molecule name to be used for labelling plots, if different from `resname` (in this case, carried over from an upstream decision between the two) *name_index_pairs* dictionary with key-value pair for dihedral atom group, atom indices, and bond indices .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` *atom_group_selection* `name` of each section in the `groupby` series of atom group selections .. seealso:: :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins` *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. Normally takes a two-tuple, but analysis is compatible with single solvent selections. Single solvent analyses will result in a figure with fully filled violins for the single solvent. *width* width of the violin element (>1 overlaps) default: 0.9 :returns: *fig* :mod:`svgutils` SVG figure object """ atom_index = name_index_pairs[atom_group_selection[0]][0] bond_index = name_index_pairs[atom_group_selection[0]][1] drawer = rdMolDraw2D.MolDraw2DSVG(250, 250) drawer.DrawMolecule(mol=mol, highlightAtoms=atom_index, highlightBonds=bond_index) drawer.FinishDrawing() svg = drawer.GetDrawingText().replace("svg:", "") mol_svg = svgutils.transform.fromstring(svg) m = mol_svg.getroot() m.scale(0.0125).moveto(21.8, 0.35) plot_title = f"{molname}, {atom_group_selection[0]} {atom_index} | " "{col_name}" plot = dihedral_violins( atom_group_selection[1], width=width, solvents=solvents, plot_title=plot_title ) plot_svg = svgutils.transform.from_mpl(plot) p = plot_svg.getroot() p.scale(0.02) # order matters here, plot down first, mol on top (p, m) fig = svgutils.compose.Figure("28cm", "4.2cm", p, m) return fig
[docs]def plot_dihedral_violins( df, resname, mol, name_index_pairs, figdir=None, molname=None, width=0.9, plot_pdf_width=PLOT_WIDTH_DEFAULT, solvents=SOLVENTS_DEFAULT, ): """Coordinates plotting and saving figures for all dihedral atom groups. Makes a subdirectory for the current project within the specified `figdir` using `resname` or `molname` as title and saves production quality PDFs for each dihedral atom group separately. .. seealso:: :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`, :func:`~mdpow.workflows.dihedrals.dihedral_violins`, :func:`~mdpow.workflows.dihedrals.build_svg` :keywords: *df* augmented results :class:`pandas.DataFrame` from :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` *resname* `resname` for the molecule as defined in the topology and trajectory *mol* :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *name_index_pairs* dictionary with key-value pair for dihedral atom group, atom indices, and bond indices .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` *figdir* path to the location to save figures (REQUIRED but marked as a kwarg for technical reasons; will be changed in #244) *molname* molecule name to be used for labelling plots, if different from `resname` *width* width of the violin element (>1 overlaps) default: 0.9 .. seealso:: :func:`~mdpow.workflows.dihedrals.dihedral_violins` *plot_pdf_width* The default value for width of plot output is described in detail under :data:`PLOT_WIDTH_DEFAULT`. *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. Normally takes a two-tuple, but analysis is compatible with single solvent selections. Single solvent analyses will result in a figure with fully filled violins for the single solvent. """ assert ( figdir is not None ), "figdir MUST be set, even though it is a kwarg. Will be changed with #244" if molname is None: molname = resname subdir = molname newdir = os.path.join(figdir, subdir) os.mkdir(newdir) section = df.groupby(by="selection") plot_pdf_width_px = plot_pdf_width * 3.7795275591 pdf_list = [] for name in section: fig = build_svg( mol=mol, molname=molname, atom_group_selection=name, name_index_pairs=name_index_pairs, solvents=solvents, width=width, ) figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" if figdir is not None: plot_pdf = cairosvg.svg2pdf( bytestring=fig.tostr(), write_to=str(figfile), output_width=plot_pdf_width_px, ) # add PDF for each dihedral atom group to all_PDFs list pdf_list.append(f"{figfile}") logger.info(f"Figure saved as {figfile}") logger.info(f"All figures generated and saved in {figdir}") merger = pypdf.PdfWriter() for pdf in pdf_list: merger.append(pdf) merger.write(f"{figdir}/{molname}/{molname}_all_figures.pdf") merger.close() logger.info( f"PDF of combined figures generated and saved as {figdir}/{molname}/{molname}_all_figures.pdf" ) return None
[docs]def automated_dihedral_analysis( dirname, resname, figdir=None, # figdir is required and will cause issues if not specified # figdir=None is a temporary way to satisfy # workflows base tests until issue #244 is resolved # because it currently uses a **kwargs convention and the # positional argument figdir will not carry over nicely df_save_dir=None, molname=None, SMARTS=SMARTS_DEFAULT, plot_pdf_width=PLOT_WIDTH_DEFAULT, dataframe=None, padding=45, width=0.9, solvents=SOLVENTS_DEFAULT, interactions=INTERACTIONS_DEFAULT, start=None, stop=None, step=None, ): """Runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` for a single MDPOW project and creates violin plots of dihedral angle frequencies for each relevant dihedral atom group. For one MDPOW project, automatically determines all relevant dihedral atom groups in the molecule, runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` for each group, pads the dihedral angles to maintain periodicity, creates violin plots of dihedral angle frequencies (KDEs), and saves publication quality PDF figures for each group, separately. Optionally saves all pre-padded :class:`~mdpow.analysis.dihedral.DihedralAnalysis` results as a single :class:`pandas.DataFrame` in `df_save_dir` provided. :keywords: *dirname* Molecule Simulation directory. Loads simulation files present in lambda directories into the new instance. With this method for generating an :class:`~mdpow.analysis.ensemble.Ensemble` the lambda directories are explored and :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, and .xtc files for trajectory. It will default to using the tpr file available. *figdir* path to the location to save figures (REQUIRED but marked as a kwarg for technical reasons; will be changed in #244) *resname* `resname` for the molecule as defined in the topology and trajectory *df_save_dir* optional, path to the location to save results :class:`pandas.DataFrame` *molname* molecule name to be used for labelling plots, if different from `resname` *SMARTS* The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. *plot_pdf_width* The default value for width of plot output is described in detail under :data:`PLOT_WIDTH_DEFAULT`. *dataframe* optional, if :class:`~mdpow.analysis.dihedral.DihedralAnalysis` was done prior, then results :class:`pandas.DataFrame` can be input to utilize angle padding and violin plotting functionality *padding* value in degrees default: 45 .. seealso:: :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` *width* width of the violin element (>1 overlaps) default: 0.9 .. seealso:: :func:`~mdpow.workflows.dihedrals.dihedral_violins` *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. Normally takes a two-tuple, but analysis is compatible with single solvent selections. Single solvent analyses will result in a figure with fully filled violins for the single solvent. *interactions* The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. *start, stop, step* arguments passed to :func:`~mdpow.analysis.ensemble.EnsembleAnalysis.run`, as parameters for iterating through the trajectories of the current ensemble .. seealso:: :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` .. rubric:: Example Typical Workflow:: import dihedrals dihedrals.automated_dihedral_analysis(dirname='/foo/bar/MDPOW_project_data', figdir='/foo/bar/MDPOW_figure_directory', resname='UNK', molname='benzene', padding=45, width=0.9, solvents=('water','octanol'), interactions=('Coulomb','VDW'), start=0, stop=100, step=10) """ u = build_universe(dirname=dirname, solvents=solvents) mol, solute = rdkit_conversion(u=u, resname=resname) atom_indices = get_atom_indices(mol=mol, SMARTS=SMARTS) bond_indices = get_bond_indices(mol=mol, atom_indices=atom_indices) dihedral_groups = get_dihedral_groups(solute=solute, atom_indices=atom_indices) name_index_pairs = get_paired_indices( atom_indices=atom_indices, bond_indices=bond_indices, dihedral_groups=dihedral_groups, ) if dataframe is not None: df = dataframe logger.info(f"Proceeding with results DataFrame provided.") else: df = dihedral_groups_ensemble( dirname=dirname, atom_indices=atom_indices, solvents=solvents, interactions=interactions, start=start, stop=stop, step=step, ) if df_save_dir is not None: save_df(df=df, df_save_dir=df_save_dir, resname=resname, molname=molname) df_aug = periodic_angle_padding(df, padding=padding) plot_dihedral_violins( df=df_aug, resname=resname, mol=mol, name_index_pairs=name_index_pairs, figdir=figdir, molname=molname, width=width, plot_pdf_width=plot_pdf_width, solvents=solvents, ) logger.info(f"DihedralAnalysis completed for all projects in {dirname}") return