diff --git a/README.rst b/README.rst index 423f4ad..87479bc 100644 --- a/README.rst +++ b/README.rst @@ -24,7 +24,7 @@ Installation $ pip install springcraft -or +or .. code-block:: console @@ -41,12 +41,22 @@ via *pip*: $ git clone https://github.com/biotite-dev/springcraft.git $ pip install ./springcraft -A development conda environment with all required dependencies for testing -can be installed from `environment.yml` +Development +----------- -Scripts to generate reference files for tests are stored in tests/data; +For development and testing first create a conda enviroment from the +`environment.yml` and than let poetry install the required dependencies. + +.. code-block:: console + + $ conda env create -f environment.yml + $ conda activate springcraft-dev + $ poetry config virtualenvs.create false + $ poetry install + +Scripts to generate reference files for tests are stored in tests/data; a separate environment to rerun these locally can be found in `test_create_data_env.yml`. -BioPhysConnectoR has to be installed separately. +BioPhysConnectoR has to be installed separately. Example ======= @@ -91,4 +101,4 @@ Output: [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. -1. 0. 0. 0. -1. 4. -1. -1. 0.] [ 0. 0. 0. 0. 0. -1. 0. 0. 0. 0. 0. 0. 0. 0. 0. -1. -1. 5. -1. -1.] [ 0. -1. -1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. -1. -1. 5. -1.] - [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. -1. -1. 2.]] \ No newline at end of file + [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. -1. -1. 2.]] diff --git a/doc/apidoc.rst b/doc/apidoc.rst index fda276c..09e74fb 100644 --- a/doc/apidoc.rst +++ b/doc/apidoc.rst @@ -13,12 +13,14 @@ Network models .. autoclass:: GNM :members: + :inherited-members: | .. autoclass:: ANM :members: - + :inherited-members: + | @@ -56,4 +58,4 @@ Miscellaneous .. autofunction:: compute_kirchhoff -.. autofunction:: compute_hessian \ No newline at end of file +.. autofunction:: compute_hessian diff --git a/environment.yml b/environment.yml index a2343c5..0ca1b39 100644 --- a/environment.yml +++ b/environment.yml @@ -1,20 +1,12 @@ -# Conda environment, that includes all packages that are necessary for -# the complete Biotite development process. - +# Coda environment to facilitate Biotite development process. +# After creating the environment install the required packages with +# poetry config virtualenvs.create false +# poetry install name: springcraft-dev channels: - conda-forge dependencies: - - numpy >=2.0 - - biotite >=1.0.1 - - pytest >=5.2 - - matplotlib >=3.3 - - poetry >=1.0 - - jinja2 <3.1 - - sphinx >=3.0 - - sphinx-gallery =0.9.0 - - numpydoc >=0.8 - - ammolite >=0.8 - - ruff >=0.6.7 + - poetry + - python=3.11 diff --git a/pyproject.toml b/pyproject.toml index e80ac15..b13a859 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,19 +34,22 @@ packages = [ ] [tool.poetry.dependencies] -python = ">=3.10" -biotite = ">= 0.32" -numpy = ">= 1.25" +python = ">=3.10,<4.0" +biotite = ">=1.0" +numpy = ">=2.0" -[tool.poetry.dev-dependencies] -biotite = ">= 1.0" -pytest = "^5.2" -sphinx = "^3.0" -sphinx-gallery = "0.9.0" -numpy = ">= 2.0" -numpydoc = ">= 0.8" -ammolite = ">=0.8" +[tool.poetry.group.dev.dependencies] +pytest = ">=8.0" ruff = ">=0.6.7" +pytest-cov = "^7.1.0" + +[tool.poetry.group.docs.dependencies] +ammolite = ">=0.8" +jinja2 = ">=3.1" +matplotlib = ">=3.3" +numpydoc = ">=0.8" +sphinx = ">=3.0" +sphinx-gallery = ">=0.9.0" [build-system] requires = ["poetry_core>=1.0.0"] @@ -60,8 +63,8 @@ ignore = [ # Most other ocassions are caught by the ruff formatter "E501", # camel cases for forcefields - "N802", - # eANM/sdENM as static methods of TabulatedForceField + "N802", + # eANM/sdENM as static methods of TabulatedForceField # should not accept "self" as argument by design "N805", # Due to constants used in functions @@ -74,4 +77,4 @@ ignore = [ [tool.pytest.ini_options] # Pytest: Ignore Python scripts for reference data creation -addopts = "--ignore=tests/data" \ No newline at end of file +addopts = "--ignore=tests/data" diff --git a/src/springcraft/anm.py b/src/springcraft/anm.py index 1a944c2..56e9d4b 100644 --- a/src/springcraft/anm.py +++ b/src/springcraft/anm.py @@ -7,17 +7,18 @@ __author__ = "Patrick Kunzmann" __all__ = ["ANM"] +from typing import Literal + import biotite.structure as struc import numpy as np +from typing_extensions import override from . import nma -from .interaction import compute_hessian - -K_B = 1.380649e-23 -N_A = 6.02214076e23 +from .enm import ENM +from .forcefield import ForceField -class ANM: +class ANM(ENM): """ This class represents an *Anisotropic Network Model*. @@ -27,8 +28,8 @@ class ANM: The atoms or their coordinates that are part of the model. It usually contains only CA atoms. force_field : ForceField, natoms=n - The :class:`ForceField` that defines the force constants between - the given `atoms`. + The :class:`ForceField` that defines the cutoff distance and + pairwise interaction strengths between the given `atoms`. masses : bool or ndarray, shape=(n,), dtype=float, optional If an array is given, the Hessian is weighted with the inverse square root of the given masses. @@ -51,64 +52,60 @@ class ANM: Each dimension is partitioned in the form ``[x1, y1, z1, ... xn, yn, zn]``. This is not a copy: Create a copy before modifying this matrix. - covariance : ndarray, shape=(n*3,n*3), dtype=float + covariance : ndarray, shape=(n,n), dtype=float The covariance matrix for this model, i.e. the inverted - *Hessian*. + *Hessian* matrix. The returned covariance matrix is not scaled + correctly and does not have the correct unit. To obtain the true + covariance matrix, you can calculate + + .. math:: + + \\text{Cov}_\\text{true} = k_B T \\text{Cov} + + with Boltzman constant :math:`k_B` and absolut temperature + :math:`[T] = K` in Kelvin. + This is not a copy: Create a copy before modifying this matrix. masses : None or ndarray, shape=(n,), dtype=float The mass for each atom, `None` if no mass weighting is applied. """ - def __init__(self, atoms, force_field, masses=None, use_cell_list=True): - self._coord = struc.coord(atoms) - self._ff = force_field - self._use_cell_list = use_cell_list - - if masses is None or masses is False: - self._masses = None - elif masses is True: - if not isinstance(atoms, struc.AtomArray): - raise TypeError( - "An AtomArray is required to automatically infer masses" - ) - self._masses = np.array( - [ - struc.info.mass(res_name, is_residue=True) - for res_name in atoms.res_name - ] - ) - else: - if len(masses) != atoms.array_length(): - raise IndexError( - f"{len(masses)} masses for " f"{atoms.array_length()} atoms given" - ) - if np.any(masses == 0): - raise ValueError("Masses must not be 0") - self._masses = np.array(masses, dtype=float) - - if self._masses is not None: - mass_weights = 1 / np.sqrt(self._masses) - # 3 repetitions, - # as the Hessian has 3 entries (x, y, z) for each atom - mass_weights = np.repeat(mass_weights, 3) - self._mass_weight_matrix = np.outer(mass_weights, mass_weights) - else: - self._mass_weight_matrix = None + _hessian: np.ndarray | None - self._hessian = None - self._covariance = None + def __init__( + self, + atoms: struc.AtomArray | np.ndarray, + force_field: ForceField, + masses: bool | np.ndarray | None = None, + use_cell_list: bool = True, + ): + super().__init__(atoms, force_field, masses, use_cell_list) - @property - def masses(self): - return self._masses + self._hessian = None @property - def hessian(self): + def hessian(self) -> np.ndarray: if self._hessian is None: if self._covariance is None: - self._hessian, _ = compute_hessian( - self._coord, self._ff, self._use_cell_list + atom_i, atom_j, disp, sq_dist = self._calc_adjacency() + force_constants = self._ff.force_constant(atom_i, atom_j, sq_dist) + + self._hessian = np.zeros((self._natoms, self._natoms, 3, 3)) + self._hessian[atom_i, atom_j] = ( + -force_constants[:, np.newaxis, np.newaxis] + / sq_dist[:, np.newaxis, np.newaxis] + * disp[:, :, np.newaxis] + * disp[:, np.newaxis, :] + ) + # Set values for main diagonal + indices = np.arange(self._natoms) + self._hessian[indices, indices] = -np.sum(self._hessian, axis=0) + + # Reshape to (20*3, 20*3) matrix + self._hessian = np.transpose(self._hessian, (0, 2, 1, 3)).reshape( + self._natoms * 3, self._natoms * 3 ) + if self._mass_weight_matrix is not None: self._hessian *= self._mass_weight_matrix else: @@ -118,55 +115,46 @@ def hessian(self): return self._hessian @hessian.setter - def hessian(self, value): - if value.shape != (len(self._coord) * 3, len(self._coord) * 3): + def hessian(self, value: np.ndarray): + if value.shape != (self._natoms * 3, self._natoms * 3): raise IndexError( f"Expected shape " - f"{(len(self._coord) * 3, len(self._coord) * 3)}, " + f"{(self._natoms * 3, self._natoms * 3)}, " f"got {value.shape}" ) self._hessian = value # Invalidate dependent values self._covariance = None - - @property - def covariance(self): - if self._covariance is None: - self._covariance = np.linalg.pinv(self.hessian, hermitian=True, rcond=1e-6) - return self._covariance - - @covariance.setter - def covariance(self, value): - if value.shape != (len(self._coord) * 3, len(self._coord) * 3): - raise IndexError( - f"Expected shape " - f"{(len(self._coord) * 3, len(self._coord) * 3)}, " - f"got {value.shape}" - ) - self._covariance = value + self._eigen_values = None + self._eigen_values_zero = 0 + self._eigen_vectors = None + self._free_energy_contrib = None + + @ENM.covariance.setter + @override + def covariance(self, value: np.ndarray): + super().covariance = value # Invalidate dependent values self._hessian = None - def eigen(self): + @property + @override + def dof_per_node(self) -> int: """ - Compute the Eigenvalues and Eigenvectors of the - *Hessian* matrix. - - The first six Eigenvalues/Eigenvectors correspond to - trivial modes (translations/rotations) and are usually omitted - in normal mode analysis. - Returns ------- - eig_values : ndarray, shape=(k,), dtype=float - Eigenvalues of the *Hessian* matrix in ascending order. - eig_vectors : ndarray, shape=(k,n), dtype=float - Eigenvectors of the *Hessian* matrix. - ``eig_values[i]`` corresponds to ``eig_vectors[i]``. + dof_per_node : int + Returns the Degree of Freedom per atom. """ - return nma.eigen(self) - - def normal_mode(self, index, amplitude, frames, movement="sine"): + return 3 + + def normal_mode( + self, + index: int, + amplitude: int, + frames: int, + movement: Literal["sine", "triangle"] = "sine", + ) -> np.ndarray: """ Create displacements for a trajectory depicting the given normal mode. @@ -192,7 +180,7 @@ def normal_mode(self, index, amplitude, frames, movement="sine"): value for an atom is the given value. frames : int The number of frames (models) per oscillation. - movement : {'sinusoidal', 'triangle'} + movement : {'sine', 'triangle'} Defines how to depict the oscillation. If set to ``'sine'`` the atom movement is sinusoidal. If set to ``'triangle'`` the atom movement is linear with @@ -206,7 +194,7 @@ def normal_mode(self, index, amplitude, frames, movement="sine"): """ return nma.normal_mode(self, index, amplitude, frames, movement) - def linear_response(self, force): + def linear_response(self, force: np.ndarray) -> np.ndarray: """ Compute the atom displacement induced by the given force using *Linear Response Theory*. [1]_ @@ -237,151 +225,9 @@ def linear_response(self, force): """ return nma.linear_response(self, force) - def frequencies(self): - """ - Computes the frequency associated with each mode. - - The first six modes correspond to rigid-body translations/ - rotations and are omitted in the return value. - - The returned units are arbitrary and should only be compared - relative to each other. - - Returns - ------- - freq : ndarray, shape=(n,), dtype=float - The frequency in ascending order of the associated modes' - Eigenvalues. - """ - return nma.frequencies(self) - - def mean_square_fluctuation(self, mode_subset=None, tem=None, tem_factors=K_B): - """ - Compute the *mean square fluctuation* for the atoms according - to the ANM. - This is equal to the sum of the diagonal of each 3x3 - superelement of the ANM covariance matrix, if all k-6 non-trivial - modes are considered. - - Parameters - ---------- - mode_subset : ndarray, dtype=int, optional - Specifies the subset of modes considered in the MSF - computation. - Only non-trivial modes can be selected. - The first mode is counted as 0 in accordance with - Python conventions. - If mode_subset is None, all modes except the first six - trivial modes (0-5) are included. - tem : int, float, None, optional - Temperature in Kelvin to compute the temperature scaling - factor by multiplying with the Boltzmann constant. - If tem is None, no temperature scaling is conducted. - tem_factors : int, float, optional - Factors included in temperature weighting - (with K_B as preset). - - Returns - ------- - msqf : ndarray, shape=(n,), dtype=float - The mean square fluctuations for each atom in the model. - """ - return nma.mean_square_fluctuation(self, mode_subset, tem, tem_factors) - - def bfactor(self, mode_subset=None, tem=None, tem_factors=K_B): - """ - Computes the isotropic B-factors/temperature factors/ - Deby-Waller factors for atoms/coarse-grained nodes using - the mean-square fluctuation. - These can be used to relate results obtained from ENMs - to experimental results. - - Parameters - ---------- - mode_subset : ndarray, shape=(n,), dtype=int, optional - Specifies the subset of modes considered in the MSF - computation. - Only non-trivial modes can be selected. - The first mode is counted as 0 in accordance with - Python conventions. - If mode_subset is None, all modes except the first six - trivial modes (0-5) are included. - tem : int, float, None, optional - Temperature in Kelvin to compute the temperature scaling - factor by multiplying with the Boltzmann constant. - If tem is None, no temperature scaling is conducted. - tem_factors : int, float, optional - Factors included in temperature weighting - (with K_B as preset). - Returns - ------- - bfac_values : ndarray, shape=(n,), dtype=float - B-factors of C-alpha atoms. - """ - return nma.bfactor(self, mode_subset, tem, tem_factors) - - def dcc(self, mode_subset=None, norm=True, tem=None, tem_factors=K_B): - r""" - Computes the normalized *dynamic cross-correlation* between - nodes of the ANM. - - The DCC is a measure for the correlation in fluctuations - exhibited by a given pair of nodes. If normalized, pairs with - correlated fluctuations (same phase and period), - anticorrelated fluctuations (opposite phase, same period) - and non-correlated fluctuations are assigned (normalized) - DCC values of 1, -1 and 0 respectively. - For results consistent with MSFs, temperature-weighted - absolute values can be computed (only relevant if results - are not normalized). - - Parameters - ---------- - mode_subset : ndarray, shape=(n,), dtype=int, optional - Specifies the subset of modes considered in the MSF - computation. - Only non-trivial modes can be selected. - The first mode is counted as 0 in accordance with - Python conventions. - If mode_subset is None, all modes except the first six - trivial modes (0-5) are included. - norm : bool, optional - Normalize the DCC using the MSFs of interacting nodes. - tem : int, float, None, optional - Temperature in Kelvin to compute the temperature scaling - factor by multiplying with the Boltzmann constant. - If tem is None, no temperature scaling is conducted. - tem_factors : int, float, optional - Factors included in temperature weighting - (with :math:`k_B` as preset). - - Returns - ------- - dcc : ndarray, shape=(n, n), dtype=float - DCC values for ENM nodes. - - Notes - ----- - The DCC for a nodepair :math:`ij` is computed as: - - .. math:: - - DCC_{ij} = \frac{3 k_B T}{\gamma} \sum_k^L \left[ \frac{\vec{u}_k \cdot \vec{u}_k^T}{\lambda_k} \right]_{ij} - - with :math:`\lambda` and :math:`\vec{u}` as - Eigenvalues and Eigenvectors corresponding to mode :math:`k` of - the modeset :math:`L`. - - DCCs can be normalized to MSFs exhibited by two compared nodes - following: - - .. math:: - - nDCC_{ij} = \frac{DCC_{ij}}{[DCC_{ii} DCC_{jj}]^{1/2}} - """ - return nma.dcc(self, mode_subset, norm, tem, tem_factors) - - def prs_effector_sensor(self, norm=True): + def prs_effector_sensor( + self, norm: bool = True + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """ Compute the perturbation response scanning matrix following and the derived effector and sensor profiles after @@ -443,3 +289,41 @@ def prs_effector_sensor(self, norm=True): prs_mat = nma.prs(self, norm) eff, sens = nma.effector_sensor(prs_mat) return prs_mat, eff, sens + + @property + @override + def _interactions(self) -> np.ndarray | None: + return self._hessian + + @staticmethod + @override + def _calc_mass_weight_matrix(masses: np.ndarray) -> np.ndarray: + mass_weights = 1 / np.sqrt(masses) + # 3 repetitions, + # as the Hessian has 3 entries (x, y, z) for each atom + mass_weights = np.repeat(mass_weights, 3) + return np.outer(mass_weights, mass_weights) + + def eigen(self) -> tuple[np.ndarray, np.ndarray, int]: + """ + Compute the Eigenvalues and Eigenvectors of the + *Hessian* matrix. + + The first six Eigenvalues/Eigenvectors correspond to + trivial modes (translations/rotations) and are usually omitted + in normal mode analysis. + + Returns + ------- + eig_values : ndarray, shape=(k,), dtype=float + Eigenvalues of the *Hessian* matrix in ascending order. + + This is not a copy: Create a copy before modifying this matrix. + eig_vectors : ndarray, shape=(k,n), dtype=float + Eigenvectors of the *Hessian* matrix. + ``eig_values[i]`` corresponds to ``eig_vectors[i]``. + + This is not a copy: Create a copy before modifying this matrix. + """ + # only called for proper docstring + return super().eigen() diff --git a/src/springcraft/enm.py b/src/springcraft/enm.py new file mode 100644 index 0000000..09bad47 --- /dev/null +++ b/src/springcraft/enm.py @@ -0,0 +1,816 @@ +""" +This module contains the :class:`ENM` class. An abstract base class for molecular dynamics calculations Models. +""" + +__name__ = "springcraft" +__author__ = "Raphael Sutter" +__all__ = ["ENM"] + +from abc import ABC, abstractmethod + +import biotite.structure as struc +import biotite.structure.info as strucinfo +import numpy as np + +from . import nma +from .forcefield import ForceField + +K_B = 1.380649e-23 +N_A = 6.02214076e23 + + +class ENM(ABC): + """ + Abstract base class for an *Elastic Network Model*. + + Parameters + ---------- + atoms : AtomArray, shape=(n,) or ndarray, shape=(n,3), dtype=float + The atoms or their coordinates that are part of the model. + It usually contains only CA atoms. + force_field : ForceField, natoms=n + The :class:`ForceField` that defines the force constants between + the given `atoms`. + masses : bool or ndarray, shape=(n,), dtype=float, optional + If an array is given, the interaction matrix is weighted with the + inverse square root of the given masses. + If set to true, these masses are automatically inferred from the + ``res_name`` annotation of `atoms`, instead. + This requires `atoms` to be an :class:`AtomArray`. + By default no mass-weighting is applied. + use_cell_list : bool, optional + If true, a *cell list* is used to find atoms within cutoff + distance instead of checking all pairwise atom distances. + This significantly increases the performance for large number of + atoms, but is slower for very small systems. + If the `force_field` does not provide a cutoff, no cell list is + used regardless. + + Attributes + ---------- + covariance : ndarray, shape=(n,n), dtype=float + The covariance matrix for this model, i.e. the inverted + interaction matrix. The returned covariance matrix is not scaled + correctly and does not have the correct unit. To obtain the true + covariance matrix, you can calculate + + .. math:: + + \\text{Cov}_\\text{true} = k_B T \\text{Cov} + + with Boltzman constant :math:`k_B` and absolut temperature + :math:`[T] = K` in Kelvin. + + This is not a copy: Create a copy before modifying this matrix. + masses : None or ndarray, shape=(n,), dtype=float + The mass for each atom, `None` if no mass weighting is applied. + """ + + _adjacency: np.ndarray | None + _coord: np.ndarray + _covariance: np.ndarray | None + _eigen_values: np.ndarray | None + _eigen_values_zero: int + _eigen_vectors: np.ndarray | None + _ff: ForceField + _free_energy_contrib: float | None + _masses: np.ndarray | None + _masses_weight_matrix: np.ndarray | None + _natoms: int + _use_cell_list: bool + + def __init__( + self, + atoms: struc.AtomArray | np.ndarray, + force_field: ForceField, + masses=None, + use_cell_list=True, + ): + self._coord = np.asarray(struc.coord(atoms)) + self._natoms = len(self._coord) + self._ff = force_field + self._use_cell_list = use_cell_list + + if masses is None or masses is False: + self._masses = None + elif masses is True: + if not isinstance(atoms, struc.AtomArray): + raise TypeError( + "An AtomArray is required to automatically infer masses" + ) + self._masses = np.array( + [ + strucinfo.mass(res_name, is_residue=True) + for res_name in atoms.res_name # pyright: ignore[reportOptionalIterable] + ] + ) + else: + if len(masses) != self._natoms: + raise IndexError(f"{len(masses)} masses for {self._natoms} atoms given") + if np.any(masses == 0): + raise ValueError("Masses must not be 0") + self._masses = np.array(masses, dtype=float) + + if self._masses is not None: + self._mass_weight_matrix = self._calc_mass_weight_matrix(self._masses) + else: + self._mass_weight_matrix = None + + self._adjacency = None + self._covariance = None + self._eigen_values = None + self._eigen_values_zero = 0 + self._eigen_vectors = None + self._free_energy_contrib = None + + @property + def masses(self) -> np.ndarray | None: + return self._masses + + @property + def covariance(self) -> np.ndarray: + if self._covariance is None: + # same algorithm as linalg.pinv + # but we want to store calculates eigenvalues in the process + s, vt, k = self.eigen() + s[k:] = 1 / s[k:] # zero eigenvalues stay zero + u = vt.T + + self._covariance = u @ np.multiply(s[..., np.newaxis], vt) + + return self._covariance + + @covariance.setter + def covariance(self, value: np.ndarray): + length = self._natoms * self.dof_per_node + if value.shape != (length, length): + raise IndexError(f"Expected shape {(length, length)}, got {value.shape}") + self._covariance = value + self._eigen_values = None + self._eigen_values_zero = 0 + self._eigen_vectors = None + self._free_energy_contrib = None # unscaled + + @property + @abstractmethod + def dof_per_node(self) -> int: + pass + + def modify_contacts( + self, + atom_i: np.typing.ArrayLike, + atom_j: np.typing.ArrayLike, + delta: bool | float | np.typing.ArrayLike, + skip_checks=False, + ): + """ + Modifies the force constant in the `interaction` matrix between a + pair of `atoms`. The interaction between the atoms can either be + - turned off (the force constant is set to 0), + - changed by an arbitrary amount `delta` or + - turned on (reset to the value defined by the `force_field`). + Turning on only works, if the contact is within cutoff distance. + Otherwise nothing happens. + + If the `covariance` matrix exists, a low complexity algorithm is + used to update the covariance matrix according to the small + pertubation introduced to `interaction` matrix. + + The `interaction` matrix needs to be calculated beforehand. + + Parameters + ---------- + atom_i : array_like of int, shape=(k,) + First atom index + atom_j : array_like of int, shape=(k,) + Second atom index with ``atom_i[idx] != atom_j[idx]`` + delta : bool or float or array_like of bool or float, shape (1,) or (k,) + A bool gets interpreted as a turn on/off signal. + The amount by which the interaction strength between + atom i and j gets changed in the `kirchhoff` matrix. + Must not be 0. + skip_checks : bool, optional + Whether to skip argument checks, by default False + + Raises (if checks are enabled) + ------ + ValueError + If the `interaction` matrix does not exist or + If the index arrays cannot be converted to indices or + If index arrays do not have the same size or + If the `delta` value(s) are neither float nor bool. + IndexError + If any indices are out of bounds for the initialized structure or + If the contact of an atom with itself shall be modified. + TypeError + If `delta` is neither a bool nor a float. + """ + atom_i = np.atleast_1d(np.asarray(atom_i, dtype=int)) + atom_j = np.atleast_1d(np.asarray(atom_j, dtype=int)) + delta = np.asarray(delta) # becomes 1d later + + if not skip_checks: + if self._interactions is None: + raise ValueError("Kirchhoff matrix must exist.") + if atom_i.size != atom_j.size: + raise ValueError( + f"Expected atom index arrays to have the same size " + f"but got {atom_i.size} and {atom_j.size}." + ) + if ( + np.any(atom_i < 0) + or np.any(atom_i >= self._natoms) + or np.any(atom_j < 0) + or np.any(atom_j >= self._natoms) + ): + raise IndexError( + f"Index out of bounds for a structure of length {self._natoms}" + ) + mask = atom_i == atom_j + if np.any(mask): + raise IndexError( + f"Expected array indices to be different " + f"but was {np.stack((atom_i, atom_j), axis=1)[mask, :]} " + f"at {np.nonzero(mask)[0]}" + ) + if delta.size != 1 and delta.size != atom_i.size: + raise ValueError( + f"There must be either 1 delta for all updates " + f"or as many as updates. " + f"Expected {atom_i.size} or 1 " + f"but was {delta.size}." + ) + if ( + not np.issubdtype(delta.dtype, np.integer) + and not np.issubdtype(delta.dtype, np.floating) + and not np.issubdtype(delta.dtype, np.bool) + ): + raise TypeError( + f"Expected delta to be float or bool but was {delta.dtype}" + ) + + if delta.size == 1: + delta = np.repeat(delta, atom_i.size) + + if np.issubdtype(delta.dtype, np.bool): + sq_dist = self._adjacency[atom_i, atom_j] + + mask_on = delta & (sq_dist != 0) + mask_off = ~delta + + force_constants = np.zeros(delta.size) + force_constants[mask_on] = self._ff.force_constant( + atom_i[mask_on], atom_j[mask_on], sq_dist[mask_on] + ) + + delta = np.select( + [mask_on, mask_off], + [ + -(force_constants + self._interactions[atom_i, atom_j]), + -self._interactions[atom_i, atom_j], + ], + default=0, + ) + + non_zero_mask = abs(delta) > 1e-8 + self._modify_contact_pair( + atom_i[non_zero_mask], atom_j[non_zero_mask], delta[non_zero_mask] + ) + + def modify_atom(self, atom_i: int, new_atom: bool | struc.Atom, skip_checks=False): + """ + Modifies the force constants in the `interaction` matrix between the + `atom_i` and all its adjacent atoms. An atom is defined as adjacent + if it is within cutoff distance. An atom can be either be + - turned off (interactions to all atoms are turned off), + - turned on (interactions to all adjacent atoms are turned on) or + - modified in a way, that the interaction strengths to its + adjacent atoms change. This results in a recalculation of all + interactions of `atom_i`. + + If the `covariance` matrix exists, a low complexity algorithm is + used to update the covariance matrix according to the small + pertubation introduced to `kirchhoff` matrix. + + The `interaction` matrix needs to be calculated beforehand. + + Parameters + ---------- + atom_i : int + The index of the atom to change. + new_atom : bool or Atom + A bool gets interpreted as a turn on/off signal. + An Atom may result in a change to the `ForceField`. + skip_checks : bool, optional + Whether to skip argument checks, by default False + + Raises (if checks are enabled) + ------ + ValueError + If the `interaction` matrix does not exist. + IndexError + If any indices are out of bounds for the initialized structure. + TypeError + If `delta` is neither a bool nor an Atom. + """ + if not skip_checks: + if self._interactions is None: + raise ValueError("Interactions matrix must exist.") + if atom_i < 0 or atom_i >= self._natoms: + raise IndexError( + f"Index out of bounds for a structure of length {self._natoms}" + ) + + if isinstance(new_atom, bool): + delta = new_atom + else: # new_atom is Atom + if not self._ff.update(atom_i, new_atom, skip_checks=True): + return # ForceField did not change + delta = True + + length = self._natoms + atom_j = np.arange(length - 1) + atom_j[atom_i:] = np.arange(atom_i + 1, length) + self.modify_contacts( + np.repeat(atom_i, length - 1), atom_j, delta, skip_checks=True + ) + + def eigen(self) -> tuple[np.ndarray, np.ndarray, int]: + """ + Compute or fetch the Eigenvalues and Eigenvectors of the + *interaction* matrix. + + Returns + ------- + eig_values : ndarray, shape=(k,), dtype=float + Eigenvalues of the matrix in ascending order. + + This is not a copy: Create a copy before modifying this matrix. + eig_vectors : ndarray, shape=(k,n), dtype=float + Eigenvectors of the matrix. + ``eig_values[i]`` corresponds to ``eigenvectors[i]``. + + This is not a copy: Create a copy before modifying this matrix. + k : int + The number of zero Eigenvalues. + """ + if self._eigen_values is None or self._eigen_vectors is None: + if self._interactions is None: + raise ValueError("Initialize interactions matrix first.") + + self._eigen_values, self._eigen_vectors = np.linalg.eigh(self._interactions) + + self._eigen_values_zero = len(self._eigen_values) + i = 0 + while i < self._eigen_values_zero: + if self._eigen_values[i] > 1e-10: + self._eigen_values_zero = i + break + i = i + 1 + + return self._eigen_values, self._eigen_vectors.T, self._eigen_values_zero + + def frequencies(self) -> np.ndarray: + """ + Compute the oscillation frequencies of the model. + + The first mode corresponds to rigid-body translations/rotations + and is omitted in the return value. + The returned units are arbitrary and should only be compared + relative to each other. + + Returns + ------- + frequencies : ndarray, shape=(n,), dtype=float + Oscillation frequencies of the model in in ascending order. + *NaN* values mark frequencies corresponding to translations + or rotations. + """ + return nma.frequencies(self) + + def mean_square_fluctuation( + self, + mode_subset: np.ndarray | None = None, + tem: float | None = None, + tem_factors: float = K_B, + ) -> np.ndarray: + """ + Compute the *mean square fluctuation* for the atoms according to + the GNM. + This is equal to the sum of the diagonal of of the + GNM covariance matrix, if all k-1 non-trivial + modes are considered. + + Parameters + ---------- + mode_subset : ndarray, shape=(n,), dtype=int, optional + Specifies the subset of modes considered in the MSF + computation. + Only non-trivial modes can be selected. + The first mode is counted as 0 in accordance with + Python conventions. + If mode_subset is None, all modes except the first + trivial mode (0) are included. + tem : int, float, None, optional + Temperature in Kelvin to compute the temperature scaling + factor by multiplying with the Boltzmann constant. + If tem is None, no temperature scaling is conducted. + tem_factors : int, float, optional + Factors included in temperature weighting + (with K_B as preset). + + Returns + ------- + msqf : ndarray, shape=(n,), dtype=float + The mean square fluctuations for each atom in the model. + """ + return nma.mean_square_fluctuation(self, mode_subset, tem, tem_factors) + + def bfactor( + self, + mode_subset: np.ndarray | None = None, + tem: float | None = None, + tem_factors: float = K_B, + ) -> np.ndarray: + """ + Computes the isotropic B-factors/temperature factors/ + Deby-Waller factors for atoms/coarse-grained nodes using + the mean-square fluctuation. + + These can be used to relate results obtained from ENMs + to experimental results. + + Parameters + ---------- + mode_subset : ndarray, shape=(n,), dtype=int, optional + Specifies the subset of modes considered in the MSF + computation. + Only non-trivial modes can be selected. + The first mode is counted as 0 in accordance with + Python conventions. + If mode_subset is None, all modes except the first + trivial mode (0) are included. + tem : int, float, None, optional + Temperature in Kelvin to compute the temperature scaling + factor by multiplying with the Boltzmann constant. + If tem is None, no temperature scaling is conducted. + tem_factors : int, float, optional + Factors included in temperature weighting + (with K_B as preset). + Returns + ------- + bfac_values : ndarray, shape=(n,), dtype=float + B-factors of C-alpha atoms. + """ + return nma.bfactor(self, mode_subset, tem, tem_factors) + + def dcc( + self, + mode_subset: np.ndarray | None = None, + norm: bool = True, + tem: float | None = None, + tem_factors: float = K_B, + ) -> np.ndarray: + r""" + Computes the normalized *dynamic cross-correlation* between + nodes of the GNM. + + The DCC is a measure for the correlation in fluctuations + exhibited by a given pair of nodes. If normalized, pairs with + correlated fluctuations (same phase and period), + anticorrelated fluctuations (opposite phase, same period) + and non-correlated fluctuations are assigned (normalized) + DCC values of 1, -1 and 0 respectively. + For results consistent with MSFs, temperature-weighted + absolute values can be computed (only relevant if results + are not normalized). + + Parameters + ---------- + mode_subset : ndarray, shape=(n,), dtype=int, optional + Specifies the subset of modes considered in the MSF + computation. + Only non-trivial modes can be selected. + The first mode is counted as 0 in accordance with + Python conventions. + If mode_subset is None, all modes except the first + trivial mode (0) are included. + norm : bool, optional + Normalize the DCC using the MSFs of interacting nodes. + tem : int, float, None, optional + Temperature in Kelvin to compute the temperature scaling + factor by multiplying with the Boltzmann constant. + If tem is None, no temperature scaling is conducted. + tem_factors : int, float, optional + Factors included in temperature weighting + (with :math:`k_B` as preset). + + Returns + ------- + dcc : ndarray, shape=(n, n), dtype=float + DCC values for ENM nodes. + + Notes + ----- + The DCC for a nodepair :math:`ij` is computed as: + + .. math:: + + DCC_{ij} = \frac{3 k_B T}{\gamma} \sum_k^L \left[ \frac{\vec{u}_k \cdot \vec{u}_k^T}{\lambda_k} \right]_{ij} + + with :math:`\lambda` and :math:`\vec{u}` as + Eigenvalues and Eigenvectors corresponding to mode :math:`k` of + the modeset :math:`L`. + + DCCs can be normalized to MSFs exhibited by two compared nodes + following: + + .. math:: + + nDCC_{ij} = \frac{DCC_{ij}}{[DCC_{ii} DCC_{jj}]^{1/2}} + """ + return nma.dcc(self, mode_subset, norm, tem, tem_factors) + + def free_energy_contribution(self, tem=None, tem_factors=K_B): + """ + Calculates the contribution of the protein configuration to the + Helmholtz free energy. + + According to + Hamacher K. Free energy of contact formation in proteins: efficient computation in the elastic network approximation. Phys Rev E Stat Nonlin Soft Matter Phys. 2011 Jul;84(1 Pt 2):016703. doi: 10.1103/PhysRevE.84.016703. Epub 2011 Jul 7. PMID: 21867339. + """ + if self._free_energy_contrib is None: + self._free_energy_contrib = nma.free_energy_contribution(self) + + tem_scaling = 1 + offset = 0 + if tem is not None: + tem_scaling = tem * tem_factors + offset = ( + self._natoms - self._eigen_values_zero * self.dof_per_node + ) * np.log(tem_scaling) + return tem_scaling * (self._free_energy_contrib - offset) + + @property + @abstractmethod + def _interactions(self) -> np.ndarray | None: + """ + Returns the characteristic interaction matrix for this ENM e.g. + - the *Kirchhoff* matrix in case of the GNM or + - the *Hessian* matrix in case of the ANM. + + Primary goal is to create a private accessor for the interaction + matrix as the characteristic name can differ but the same operations + are performed on both matrices (like Eigenvalue calculation). + + This is not a copy: be careful when modifying the matrix or create + a copy. + + Returns + ------- + interactions : ndarray, dtype=float, optional + The characteristic interactions matrix. + """ + pass + + @staticmethod + @abstractmethod + def _calc_mass_weight_matrix(masses: np.ndarray) -> np.ndarray: + pass + + def _calc_adjacency(self): + """ + Calculates the adjacency matrix and returns the values as lists. + + The adjacency matrix is a weigthed graph that spanns the + elastic network. Atoms are considered as being in contact + when their distance is below the `ForceField`s threshold. + The contacts may be overridden by using a `PatchedForceField`. + + When `_use_cell_list` is set uses an optimized strategy + working on point lists instead off matrizes which reduces + base operations for sparse cases. Otherwise uses a + computationally expensive brute-force approach. + + Sets the class attribute `_adjacency` and returns the indices, + displacements and squared distances as list for faster + processing in sparse matrices. + + Returns + ------- + atom_i_list: ndarray, shape=(k,), dtype=list + list of the first atom indices + atom_j_list: ndarray, shape=(k,), dtype=list + list of the sceond atom indices + disp_list: ndarray, shape=(k,3), dtype=float + displacement between atom_i and atom_j + sq_dist_list: ndarray, shape=(k,), dtype=float + squared distance between atom_i and atom_j + """ + # Convert into higher precision to avert numerical issues in pseudoinverse calculation + coord = self._coord.astype(np.float64, copy=False) + # Find interacting atoms within cutoff distance + cutoff_dist = self._ff.cutoff_distance + + # contacts to shut off + turn_off = np.zeros((self._natoms, self._natoms), dtype=np.bool) + if self._ff.contact_shutdown is not None: + turn_off[self._ff.contact_shutdown, :] = True + turn_off[:, self._ff.contact_shutdown] = True + if self._ff.contact_pair_off is not None: + contact_off = np.sort(self._ff.contact_pair_off, axis=1).T + turn_off[contact_off[0, :], contact_off[1, :]] = True + turn_off[contact_off[1, :], contact_off[0, :]] = True + + if cutoff_dist is not None and self._use_cell_list: + # use optimized strategy + sq_dist_matrix = np.zeros((self._natoms, self._natoms)) + cell_list = struc.CellList(coord, cutoff_dist) # pyright: ignore[reportAttributeAccessIssue] + adj_indices = cell_list.get_atoms(coord, cutoff_dist) + + # allocate return values + init_len = adj_indices.size + if self._ff.contact_pair_on is not None: + init_len += np.size(self._ff.contact_pair_on, axis=0) + atom_i_list = np.empty(init_len, dtype=np.int32) + atom_j_list = np.empty(init_len, dtype=np.int32) + disp_list = np.empty((init_len, 3)) + sq_dist_list = np.empty(init_len) + idx = 0 + + # iterate over atoms + i_iter, j_iter = np.nested_iters(adj_indices, [[0], [1]], flags=["c_index"]) + for _ in i_iter: + atom_i = i_iter.index + atom_i_coord = coord[atom_i] + + # iterate over contacts + for atom_j in j_iter: + if atom_j < 0: # pyright: ignore[reportOperatorIssue] + # handled every contact for atom + break + if atom_j >= atom_i: # pyright: ignore[reportOperatorIssue] + # only calc sq_distance for lower triangle + continue + if turn_off[atom_i, atom_j]: + continue + + disp = struc.displacement(atom_i_coord, coord[atom_j]) + sq_dist = np.dot(disp, disp) + sq_dist_matrix[atom_i, atom_j] = sq_dist + sq_dist_matrix[atom_j, atom_i] = sq_dist + + atom_i_list[idx] = atom_i + atom_j_list[idx] = atom_j + disp_list[idx, :] = disp + sq_dist_list[idx] = sq_dist + idx += 1 + atom_i_list[idx] = atom_j + atom_j_list[idx] = atom_i + disp_list[idx, :] = disp + sq_dist_list[idx] = sq_dist + idx += 1 + + # manually switched on contacts + if self._ff.contact_pair_on is not None: + # TODO move input validation to ForceField + contacts = np.sort(self._ff.contact_pair_on, axis=1) + for atom_i, atom_j in np.nditer((contacts[:, 0], contacts[:, 1])): + if sq_dist_matrix[atom_i, atom_j] != 0: + # atom already on + continue + + disp = struc.displacement(coord[atom_i], coord[atom_j]) + sq_dist = np.dot(disp, disp) + sq_dist_matrix[atom_i, atom_j] = sq_dist + sq_dist_matrix[atom_j, atom_i] = sq_dist + + atom_i_list[idx] = atom_i + atom_j_list[idx] = atom_j + disp_list[idx, :] = disp + sq_dist_list[idx] = sq_dist + idx += 1 + atom_i_list[idx] = atom_j + atom_j_list[idx] = atom_i + disp_list[idx, :] = disp + sq_dist_list[idx] = sq_dist + idx += 1 + + # trim output arrays + atom_i_list = np.resize(atom_i_list, idx) + atom_j_list = np.resize(atom_j_list, idx) + disp_list = np.resize(disp_list, (idx, 3)) + sq_dist_list = np.resize(sq_dist_list, idx) + + else: + # brute force + disp_matrix = struc.displacement( + coord[np.newaxis, :, :], coord[:, np.newaxis, :] + ) + sq_dist_matrix = np.sum(disp_matrix * disp_matrix, axis=-1) + + # map which contacts to set zero + if cutoff_dist is not None: + map = sq_dist_matrix > cutoff_dist**2 + else: + map = np.zeros(sq_dist_matrix.shape, dtype=np.bool) + map |= turn_off + if self._ff.contact_pair_on is not None: + contacts_on = np.sort(self._ff.contact_pair_on, axis=1).T + map[contacts_on[0, :], contacts_on[1, :]] = False + map[contacts_on[1, :], contacts_on[0, :]] = False + sq_dist_matrix[map] = 0 + + # retrieve lists + atom_i_list, atom_j_list = np.where(sq_dist_matrix) + disp_list = disp_matrix[atom_i_list, atom_j_list, :] + sq_dist_list = sq_dist_matrix[atom_i_list, atom_j_list] + + self._adjacency = sq_dist_matrix + + return atom_i_list, atom_j_list, disp_list, sq_dist_list + + def _modify_contact_pair( + self, atom_i: np.ndarray, atom_j: np.ndarray, deltas: np.ndarray + ): + """ + Modifies the interaction strengths between the atoms i and j + in the `kirchhoff` matrix. Requires for the `kirchhoff` matrix + to exist and for the change delta to be not null. The interaction + strength of an atom with itself shall not be changed. + + As this is a private method, the input arguments are not + validated. Input argument validation happens in the user facing + functions which will always supply semantically correct + arguments. + + If the `covariance` matrix exists, this method performs a fast + permutation to the `covariance` matrix based on the given + permutation to the `kirchhoff` matrix. This speeds up + calculations as the `covariance` matrix does not need to be + calculated by SVD again. + + TODO + - formulas + - speedup + + Parameters + ---------- + atom_i : ndarray, shape=(k,), dtype=int + First atom index + atom_j : ndarray, shape=(k,), dtype=int + Second atom index with ``atom_i[idx] != atom_j[idx]`` + delta : ndarray, shape=(k,), dtype=float + The amount by which the interaction strength between + atom i and j gets changed in the `kirchhoff` matrix. + Must not be 0. + """ + for i, j, delta in np.nditer([atom_i, atom_j, deltas]): + if self._covariance is not None: + x = self._covariance[i, :] - self._covariance[j, :] + beta = float(1 + delta * (x[j] - x[i])) + + if np.abs(beta) < 1e-10: # TODO use relative instead of absolute diff? + self._modify_contact_pair_rank_decrease(x) + self._free_energy_contrib = None + elif np.abs(self._covariance[i, j]) < 1e-10: # TODO mathematical proof + self._modify_contact_pair_rank_increase(i, j, delta, x, beta) + self._free_energy_contrib = None + else: + # default case + self._covariance += np.outer(x * delta / beta, x) + + if self._free_energy_contrib is not None: + self._free_energy_contrib += beta # TODO *3 for GNM + + self._interactions[i, j] += delta + self._interactions[j, i] += delta + # self._kirchhoff[j, i] = self._kirchhoff[i, j] # TODO why does this not work + self._interactions[i, i] -= delta + self._interactions[j, j] -= delta + + # TODO can we make it faster, if we only compute the upper triangle? + + self._eig_values = None + self._eig_vectors = None + + def _modify_contact_pair_rank_decrease(self, x): + cov_mul_diff = np.matvec(self._covariance, x) + x_norm_sq = np.inner(x, x) + + dd_mul_cov = np.outer(x / -x_norm_sq, cov_mul_diff) + alpha = np.inner(x, cov_mul_diff) / (x_norm_sq * x_norm_sq) + k_cov_h_mul_kh = np.outer(alpha * x, x) + + self._covariance += dd_mul_cov + dd_mul_cov.T + k_cov_h_mul_kh + + def _modify_contact_pair_rank_increase(self, i, j, delta, x, beta): + y = -np.matvec(self._interactions, x) + y[i] += 1 + y[j] -= 1 + + y_norm_sq = np.inner(y, y) + x_y = np.outer(x / -y_norm_sq, y) + beta_y_y = np.outer(y * beta / (-delta * y_norm_sq * y_norm_sq), y) + + self._covariance += x_y + x_y.T + beta_y_y diff --git a/src/springcraft/forcefield.py b/src/springcraft/forcefield.py index ad5f069..f55f194 100644 --- a/src/springcraft/forcefield.py +++ b/src/springcraft/forcefield.py @@ -3,6 +3,8 @@ i.e. Kirchhoff and Hessian matrices. """ +from __future__ import annotations + __name__ = "springcraft" __author__ = "Patrick Kunzmann, Jan Krumbach" __all__ = [ @@ -21,6 +23,7 @@ import biotite.sequence as seq import biotite.structure as struc import numpy as np +from typing_extensions import override DATA_DIR = join(dirname(realpath(__file__)), "data") @@ -29,7 +32,7 @@ AA_LIST = [ seq.ProteinSequence.convert_letter_1to3(letter) # Omit ambiguous amino acids and stop signal - for letter in seq.ProteinSequence.alphabet.get_symbols()[:N_AMINO_ACIDS] + for letter in seq.ProteinSequence.alphabet.get_symbols()[:N_AMINO_ACIDS] # type: ignore[index] ] AA_TO_INDEX = {aa: i for i, aa in enumerate(AA_LIST)} @@ -65,7 +68,9 @@ class ForceField(metaclass=abc.ABCMeta): """ @abc.abstractmethod - def force_constant(self, atom_i, atom_j, sq_distance): + def force_constant( + self, atom_i: np.ndarray, atom_j: np.ndarray, sq_distance: np.ndarray + ) -> np.ndarray: """ Get the force constant for the interaction of the given atoms. @@ -80,6 +85,12 @@ def force_constant(self, atom_i, atom_j, sq_distance): The distance between the atoms indicated by `atom_i` and `atom_j`. + Returns + ------- + force_constants: ndarray, shape=(n,), dtype=float + The force constant between the atoms indicated by `atom_i` and + `atom_j`. + Notes ----- Implementations of this method do not need @@ -93,24 +104,52 @@ def force_constant(self, atom_i, atom_j, sq_distance): """ pass + def update(self, atom_i, new_atom, skip_checks=False): + """ + Allows a small pertubation to the `ForceField` if the `ForceField` + depends on the `Atom` configuration in the model. + + Override when inheriting or leave passed when the `ForceField` + does not depend on the a actual molecule configuration. + + Parameters + ---------- + atom_i : int + The atom to modify + Atom : Atom + The changed atom + skip_checks : bool, optional + Whether to skip argument checks, by default False + + Returns + ------- + bool + Whether the `ForceField` was updated. + """ + pass + + @property + def cutoff_distance(self) -> float | None: + return None + @property - def cutoff_distance(self): + def contact_shutdown(self) -> np.ndarray | None: return None @property - def contact_shutdown(self): + def contact_pair_off(self) -> np.ndarray | None: return None @property - def contact_pair_off(self): + def contact_pair_on(self) -> np.ndarray | None: return None @property - def contact_pair_on(self): + def natoms(self) -> int | None: return None @property - def natoms(self): + def _force_constants(self) -> np.ndarray | None: return None @@ -142,11 +181,11 @@ class PatchedForceField(ForceField): def __init__( self, - force_field, - contact_shutdown=None, - contact_pair_off=None, - contact_pair_on=None, - force_constants=None, + force_field: ForceField, + contact_shutdown: np.ndarray | None = None, + contact_pair_off: np.ndarray | None = None, + contact_pair_on: np.ndarray | None = None, + force_constants: np.ndarray | None = None, ): # Support other array-like objects self._force_field = force_field @@ -159,28 +198,31 @@ def __init__( self._contact_pair_on = ( np.asarray(contact_pair_on) if contact_pair_on is not None else None ) - self._force_constants = ( + self._force_constants_local = ( np.asarray(force_constants) if force_constants is not None else None ) # Input argument checks + # TODO accept all indices if no bound is given? _check_indices(force_field.natoms, self._contact_shutdown) _check_indices(force_field.natoms, self._contact_pair_off) _check_indices(force_field.natoms, self._contact_pair_on) if self._contact_pair_on is not None: - if self._force_constants is None: + if self._force_constants_local is None: raise TypeError( "Individual force constants must be given, " "if contacts are turned on" ) - if len(self._force_constants) != len(self._contact_pair_on): + if len(self._force_constants_local) != len(self._contact_pair_on): raise IndexError( - f"{len(self._force_constants)} force constants were " + f"{len(self._force_constants_local)} force constants were " f"given for " f"{len(self._contact_pair_on)} switched on contact_pairs" ) - def force_constant(self, atom_i, atom_j, sq_distance): + def force_constant( + self, atom_i: np.ndarray, atom_j: np.ndarray, sq_distance: np.ndarray + ) -> np.ndarray: if self._force_field.cutoff_distance is None: force_constants = self._force_field.force_constant( atom_i, atom_j, sq_distance @@ -194,8 +236,8 @@ def force_constant(self, atom_i, atom_j, sq_distance): atom_i[cutoff_mask], atom_j[cutoff_mask], sq_distance[cutoff_mask] ) - if self._contact_pair_on is not None: - patch_atom_i, patch_atom_j = self._contact_pair_on.T + if self.contact_pair_on is not None: + patch_atom_i, patch_atom_j = self.contact_pair_on.T # The minimum required size of the patch matrix is the # maximum of the indices + 1 required_size = ( @@ -225,13 +267,16 @@ def force_constant(self, atom_i, atom_j, sq_distance): # No pairs are switched on -> no patching necessary return force_constants + def update(self, atom_i, new_atom, skip_checks=False): + return self._force_field.update(atom_i, new_atom, skip_checks) + @property - def cutoff_distance(self): + def cutoff_distance(self) -> float | None: return self._force_field.cutoff_distance @property - def contact_shutdown(self): - if self._force_field.contact_shutdown is None: + def contact_shutdown(self) -> np.ndarray | None: + if self._contact_shutdown is None or self._force_field.contact_shutdown is None: return self._contact_shutdown else: return np.concatenate( @@ -239,8 +284,8 @@ def contact_shutdown(self): ) @property - def contact_pair_off(self): - if self._force_field.contact_pair_off is None: + def contact_pair_off(self) -> np.ndarray | None: + if self._contact_pair_off is None or self._force_field.contact_pair_off is None: return self._contact_pair_off else: return np.concatenate( @@ -248,8 +293,8 @@ def contact_pair_off(self): ) @property - def contact_pair_on(self): - if self._force_field.contact_pair_on is None: + def contact_pair_on(self) -> np.ndarray | None: + if self._contact_pair_on is None or self._force_field.contact_pair_on is None: return self._contact_pair_on else: return np.concatenate( @@ -257,7 +302,19 @@ def contact_pair_on(self): ) @property - def natoms(self): + def _force_constants(self) -> np.ndarray | None: + if ( + self._force_constants_local is None + or self._force_field._force_constants is None + ): + return self._force_constants_local + else: + return np.concatenate( + [self._force_constants_local, self._force_field._force_constants] + ) + + @property + def natoms(self) -> int | None: return self._force_field.natoms @@ -273,7 +330,7 @@ class InvariantForceField(ForceField): between them is smaller or equal to this value. """ - def __init__(self, cutoff_distance): + def __init__(self, cutoff_distance: float): if cutoff_distance is None: # A value of 'None' would give a fully connected network # with equal force constants for each connection, @@ -281,11 +338,15 @@ def __init__(self, cutoff_distance): raise ValueError("Cutoff distance must be a float") self._cutoff_distance = cutoff_distance - def force_constant(self, atom_i, atom_j, sq_distance): + @override + def force_constant( + self, atom_i: np.ndarray, atom_j: np.ndarray, sq_distance: np.ndarray + ) -> np.ndarray: return np.ones(len(atom_i)) @property - def cutoff_distance(self): + @override + def cutoff_distance(self) -> float: return self._cutoff_distance @@ -315,10 +376,12 @@ class HinsenForceField(ForceField): Chemical Physics 261(1-2): 25-37 (2000). """ - def __init__(self, cutoff_distance=None): + def __init__(self, cutoff_distance: float | None = None): self._cutoff_distance = cutoff_distance - def force_constant(self, atom_i, atom_j, sq_distance): + def force_constant( + self, atom_i: np.ndarray, atom_j: np.ndarray, sq_distance: np.ndarray + ) -> np.ndarray: distance = np.sqrt(sq_distance) distance = np.clip(distance, a_min=2.9, a_max=None) return np.where( @@ -326,7 +389,7 @@ def force_constant(self, atom_i, atom_j, sq_distance): ) @property - def cutoff_distance(self): + def cutoff_distance(self) -> float | None: return self._cutoff_distance @@ -355,14 +418,16 @@ class ParameterFreeForceField(ForceField): PNAS. 106, 30, 12347-12352 (2009). """ - def __init__(self, cutoff_distance=None): + def __init__(self, cutoff_distance: float | None = None): self._cutoff_distance = cutoff_distance - def force_constant(self, atom_i, atom_j, sq_distance): + def force_constant( + self, atom_i: np.ndarray, atom_j: np.ndarray, sq_distance: np.ndarray + ) -> np.ndarray: return 1 / sq_distance @property - def cutoff_distance(self): + def cutoff_distance(self) -> float | None: return self._cutoff_distance @@ -410,6 +475,8 @@ class TabulatedForceField(ForceField): Individual value for each distance bin and pair of amino acid types. + The quadratic layers of the matrizes must be symmetric. + cutoff_distance : float or None or ndarray, shape=(k), dtype=float If no distance dependent values are given for `bonded`, `intra_chain` and `inter_chain`, this parameter accepts a float, @@ -434,9 +501,14 @@ class TabulatedForceField(ForceField): field. """ - def __init__(self, atoms, bonded, intra_chain, inter_chain, cutoff_distance): - if not isinstance(atoms, struc.AtomArray): - raise TypeError(f"Expected 'AtomArray', not {type(atoms).__name__}") + def __init__( + self, + atoms: struc.AtomArray, + bonded: float | np.ndarray, + intra_chain: float | np.ndarray, + inter_chain: float | np.ndarray, + cutoff_distance: float | np.ndarray | None, + ): if not np.all((atoms.atom_name == "CA") & (atoms.element == "C")): raise struc.BadStructureError( "AtomArray does not contain exclusively CA atoms" @@ -465,12 +537,13 @@ def __init__(self, atoms, bonded, intra_chain, inter_chain, cutoff_distance): self._inter_chain = _convert_to_matrix(inter_chain, n_bins) # Maps pos-specific indices to type-specific_indices - matrix_indices = np.array([AA_TO_INDEX[aa] for aa in atoms.res_name]) + self._matrix_indices = np.array([AA_TO_INDEX[aa] for aa in atoms.res_name]) # pyright: ignore[reportOptionalIterable] # Find peptide bonds continuous_res_id = np.diff(atoms.res_id) == 1 continuous_chain_id = atoms.chain_id[:-1] == atoms.chain_id[1:] - peptide_bond_i = np.where(continuous_res_id & continuous_chain_id)[0] + self._is_peptide_bond = continuous_res_id & continuous_chain_id + peptide_bond_i = np.where(self._is_peptide_bond)[0] ### Fill interaction matrix ## Handle non-bonded interactions @@ -481,15 +554,24 @@ def __init__(self, atoms, bonded, intra_chain, inter_chain, cutoff_distance): np.tile(np.arange(self._natoms), self._natoms), ) # Convert indices to type-specific_indices - type_indices = (matrix_indices[pos_indices[0]], matrix_indices[pos_indices[1]]) + type_indices = ( + self._matrix_indices[pos_indices[0]], + self._matrix_indices[pos_indices[1]], + ) intra_interactions = self._intra_chain[type_indices[0], type_indices[1]] inter_interactions = self._inter_chain[type_indices[0], type_indices[1]] # Distinguish between intra- and inter-chain interactions + is_intra_interaction = ( + atoms.chain_id[pos_indices[0]] == atoms.chain_id[pos_indices[1]] + ) interactions = np.where( - atoms.chain_id[pos_indices[0]] == atoms.chain_id[pos_indices[1]], + is_intra_interaction, intra_interactions.T, inter_interactions.T, ).T + self._is_intra_interaction = is_intra_interaction.reshape( + (self._natoms, self._natoms) + ) # Initialize pos-specific interaction matrix # For simplicity bonded interactions are also handled as # non-bonded interactions at this point, @@ -501,7 +583,10 @@ def __init__(self, atoms, bonded, intra_chain, inter_chain, cutoff_distance): ## Handle bonded interactions # Convert pos-specific indices to type-specific indices # -> general case - indices = (matrix_indices[peptide_bond_i], matrix_indices[peptide_bond_i + 1]) + indices = ( + self._matrix_indices[peptide_bond_i], + self._matrix_indices[peptide_bond_i + 1], + ) constants = self._bonded[indices] # Overwrite previous values @@ -512,7 +597,9 @@ def __init__(self, atoms, bonded, intra_chain, inter_chain, cutoff_distance): diag_i, diag_j = np.diag_indices(len(self._interaction_matrix)) self._interaction_matrix[diag_i, diag_j, :] = 0 - def force_constant(self, atom_i, atom_j, sq_distance): + def force_constant( + self, atom_i: np.ndarray, atom_j: np.ndarray, sq_distance: np.ndarray + ) -> np.ndarray: if self._edges is None or len(self._edges) == 1: # Only a single distance bin -> No distance dependency return self._interaction_matrix[atom_i, atom_j, 0] @@ -532,20 +619,83 @@ def force_constant(self, atom_i, atom_j, sq_distance): else: raise + def update(self, atom_i, new_atom, skip_checks=False): + """ + Allows a small pertubation to the `ForceField` if the `ForceField` + depends on the `Atom` configuration in the model. Results in a + fast modification as not the whole `ForceField` gets recalculated + but only the affected interactions. Only changes the amino acid + type changes. + + Parameters + ---------- + atom_i : int + The atom to modify + Atom : Atom + The changed atom + + Returns + ------- + bool + Whether the `ForceField` was updated. + """ + if not skip_checks: + if atom_i < 0 or atom_i >= self._natoms: + raise IndexError( + f"Atom i {atom_i} out of bounds" + f"for a structure of length {self._natoms}" + ) + if not isinstance(new_atom, struc.Atom): + raise TypeError(f"New_atom needs to an Atom but was {type(new_atom)}") + + matrix_index = AA_TO_INDEX[new_atom.res_name] + if self._matrix_indices[atom_i] == matrix_index: + return False + self._matrix_indices[atom_i] = matrix_index + + # Update non-bonded interactions with atom_i + # overriding bonded interactions as they are updated later + atom_interactions = np.where( + self._is_intra_interaction[atom_i, :, np.newaxis], + self._intra_chain[self._matrix_indices[atom_i], self._matrix_indices], + self._inter_chain[self._matrix_indices[atom_i], self._matrix_indices], + ) + self._interaction_matrix[atom_i, :] = atom_interactions + self._interaction_matrix[:, atom_i] = atom_interactions + + # Override with bonded interactions, if they exist + if atom_i > 0 and self._is_peptide_bond[atom_i - 1]: + constant = self._bonded[ + self._matrix_indices[atom_i - 1], self._matrix_indices[atom_i] + ] + self._interaction_matrix[atom_i - 1, atom_i] = constant + self._interaction_matrix[atom_i, atom_i - 1] = constant + if atom_i < self._natoms - 1 and self._is_peptide_bond[atom_i]: + constant = self._bonded[ + self._matrix_indices[atom_i], self._matrix_indices[atom_i + 1] + ] + self._interaction_matrix[atom_i, atom_i + 1] = constant + self._interaction_matrix[atom_i + 1, atom_i] = constant + + # Interaction of atom_i with itself + self._interaction_matrix[atom_i, atom_i, :] = 0 + + return True + @property - def cutoff_distance(self): + def cutoff_distance(self) -> float | None: return None if self._edges is None else self._edges[-1] @property - def natoms(self): + def natoms(self) -> int: return self._natoms @property - def interaction_matrix(self): + def interaction_matrix(self) -> np.ndarray: return self._interaction_matrix @staticmethod - def s_enm_10(atoms): + def s_enm_10(atoms: struc.AtomArray) -> TabulatedForceField: r""" The sENM10 forcefield by Dehouck and Mikhailov was parametrized by statisctical analysis of a NMR conformational @@ -581,7 +731,7 @@ def s_enm_10(atoms): return TabulatedForceField(atoms, 10.0, fc, fc, 10.0) @staticmethod - def s_enm_13(atoms): + def s_enm_13(atoms: struc.AtomArray) -> TabulatedForceField: r""" The sENM13 forcefield by Dehouck and Mikhailov was parametrized by statisctical analysis of a NMR conformational ensemble dataset. @@ -616,7 +766,7 @@ def s_enm_13(atoms): return TabulatedForceField(atoms, 10.0, fc, fc, 13.0) @staticmethod - def d_enm(atoms): + def d_enm(atoms: struc.AtomArray) -> TabulatedForceField: r""" The dENM forcefield by Dehouck and Mikhailov was parametrized by statisctical analysis of a NMR conformational @@ -655,7 +805,7 @@ def d_enm(atoms): return TabulatedForceField(atoms, 46.83, fc, fc, bin_edges) @staticmethod - def sd_enm(atoms): + def sd_enm(atoms: struc.AtomArray) -> TabulatedForceField: r""" The sdENM forcefield by Dehouck and Mikhailov was parametrized by statistical analysis of a NMR conformational ensemble @@ -699,7 +849,9 @@ def sd_enm(atoms): return TabulatedForceField(atoms, bonded, fc, fc, bin_edges) @staticmethod - def e_anm(atoms, nonbonded_mean=False): + def e_anm( + atoms: struc.AtomArray, nonbonded_mean: bool = False + ) -> TabulatedForceField: r""" The "extended ANM" (eANM) method discriminates between non-bonded interactions of amino acids within a single @@ -765,7 +917,10 @@ def e_anm(atoms, nonbonded_mean=False): return TabulatedForceField(atoms, 82.0, intra, inter, 13.0) - def e_anm_mj(atoms, nonbonded_mean=False): + @staticmethod + def e_anm_mj( + atoms: struc.AtomArray, nonbonded_mean: bool = False + ) -> TabulatedForceField: r""" In this variant of the "extended ANM" (eANM) method, non-bonded interactions between amino acids are parametrized in @@ -821,7 +976,10 @@ def e_anm_mj(atoms, nonbonded_mean=False): return TabulatedForceField(atoms, 82.0, intra, inter, 13.0) - def e_anm_ke(atoms, nonbonded_mean=False): + @staticmethod + def e_anm_ke( + atoms: struc.AtomArray, nonbonded_mean: bool = False + ) -> TabulatedForceField: r""" For this variant of the "extended ANM" (eANM), non-bonded interactions between amino-acid pairs are parametrized in a @@ -876,7 +1034,7 @@ def e_anm_ke(atoms, nonbonded_mean=False): return TabulatedForceField(atoms, 82.0, intra, inter, 13.0) -def _convert_to_matrix(value, n_bins): +def _convert_to_matrix(value: float | np.ndarray, n_bins: int) -> np.ndarray: """ Perform checks on input interactions matrices and return consistent 3D matrix. @@ -894,8 +1052,7 @@ def _convert_to_matrix(value, n_bins): # Individual value for distances if len(array) != n_bins: raise IndexError( - f"Array contains {len(array)} elements " - f"for {n_bins} distance bins" + f"Array contains {len(array)} elements for {n_bins} distance bins" ) # Reapeat bin-wise values into both residue type dimensions for _ in range(2): @@ -912,8 +1069,7 @@ def _convert_to_matrix(value, n_bins): _check_matrix(array) if array.shape[-1] != n_bins: raise IndexError( - f"Array contains {len(array)} elements " - f"for {n_bins} distance bins" + f"Array contains {len(array)} elements for {n_bins} distance bins" ) return array @@ -923,7 +1079,7 @@ def _convert_to_matrix(value, n_bins): ) -def _check_matrix(matrix): +def _check_matrix(matrix: np.ndarray) -> None: """ Check matrix on number of elements and symmetry. """ @@ -937,10 +1093,10 @@ def _check_matrix(matrix): raise ValueError("Input matrix is not symmetric") -matrices = {} +matrices: dict[str, np.ndarray] = {} -def _load_matrix(fname): +def _load_matrix(fname: str) -> np.ndarray: if fname in matrices: # Matrix was already loaded return matrices[fname] @@ -950,11 +1106,11 @@ def _load_matrix(fname): return matrix -def _check_indices(length, indices): +def _check_indices(length: int | None, indices: np.ndarray | None) -> None: if indices is None or length is None: return flat_indices = indices.flatten() - out_of_bounds_i = np.where(flat_indices >= length)[0] + out_of_bounds_i = np.where((flat_indices < 0) | (flat_indices >= length))[0] if len(out_of_bounds_i) > 0: raise IndexError( f"Index {flat_indices[out_of_bounds_i[0]]} is out of bounds " diff --git a/src/springcraft/gnm.py b/src/springcraft/gnm.py index a0c9fe3..5e84ad0 100644 --- a/src/springcraft/gnm.py +++ b/src/springcraft/gnm.py @@ -9,15 +9,13 @@ import biotite.structure as struc import numpy as np +from typing_extensions import override -from . import nma -from .interaction import compute_kirchhoff +from .enm import ENM +from .forcefield import ForceField -K_B = 1.380649e-23 -N_A = 6.02214076e23 - -class GNM: +class GNM(ENM): """ This class represents a *Gaussian Network Model*. @@ -27,8 +25,8 @@ class GNM: The atoms or their coordinates that are part of the model. It usually contains only CA atoms. force_field : ForceField, natoms=n - The :class:`ForceField` that defines the force constants between - the given `atoms`. + The :class:`ForceField` that defines the cutoff distance and + pairwise interaction strengths between the given `atoms`. masses : bool or ndarray, shape=(n,), dtype=float, optional If an array is given, the Kirchhoff matrix is weighted with the inverse square root of the given masses. @@ -47,61 +45,55 @@ class GNM: Attributes ---------- kirchhoff : ndarray, shape=(n,n), dtype=float - The *Kirchhoff* matrix for this model. + The *Kirchhoff* matrix for this model. Adjacency matrix of `atoms` + that are within cutoff distance of another. The weights of this + adjacency matrix are the force constants of the abstract springs + between the atoms in the ENM. This is not a copy: Create a copy before modifying this matrix. - covariance : ndarray, shape=(n*3,n*3), dtype=float + covariance : ndarray, shape=(n,n), dtype=float The covariance matrix for this model, i.e. the inverted - *Kirchhofff* matrix. + *Kirchhofff* matrix. The returned covariance matrix is not scaled + correctly and does not have the correct unit. To obtain the true + covariance matrix, you can calculate + + .. math:: + + \\text{Cov}_\\text{true} = k_B T \\text{Cov} + + with Boltzman constant :math:`k_B` and absolut temperature + :math:`[T] = K` in Kelvin. + This is not a copy: Create a copy before modifying this matrix. + masses : None or ndarray, shape=(n,), dtype=float + The mass for each atom, `None` if no mass weighting is applied. """ - def __init__(self, atoms, force_field, masses=None, use_cell_list=True): - self._coord = struc.coord(atoms) - self._ff = force_field - self._use_cell_list = use_cell_list + _kirchhoff: np.ndarray | None - if masses is None or masses is False: - self._masses = None - elif masses is True: - if not isinstance(atoms, struc.AtomArray): - raise TypeError( - "An AtomArray is required to automatically infer masses" - ) - self._masses = np.array( - [ - struc.info.mass(res_name, is_residue=True) - for res_name in atoms.res_name - ] - ) - else: - if len(masses) != atoms.array_length(): - raise IndexError( - f"{len(masses)} masses for " f"{atoms.array_length()} atoms given" - ) - if np.any(masses == 0): - raise ValueError("Masses must not be 0") - self._masses = np.array(masses, dtype=float) - - if self._masses is not None: - mass_weights = 1 / np.sqrt(self._masses) - self._mass_weight_matrix = np.outer(mass_weights, mass_weights) - else: - self._mass_weight_matrix = None + def __init__( + self, + atoms: struc.AtomArray | np.ndarray, + force_field: ForceField, + masses=None, + use_cell_list=True, + ): + super().__init__(atoms, force_field, masses, use_cell_list) self._kirchhoff = None - self._covariance = None - - @property - def masses(self): - return self._masses @property - def kirchhoff(self): + def kirchhoff(self) -> np.ndarray: if self._kirchhoff is None: if self._covariance is None: - self._kirchhoff, _ = compute_kirchhoff( - self._coord, self._ff, self._use_cell_list - ) + atom_i, atom_j, _, sq_dist = self._calc_adjacency() + force_constants = self._ff.force_constant(atom_i, atom_j, sq_dist) + + self._kirchhoff = np.zeros((self._natoms, self._natoms)) + self._kirchhoff[atom_i, atom_j] = -force_constants + + # Set values for main diagonal + np.fill_diagonal(self._kirchhoff, -np.sum(self._kirchhoff, axis=0)) + if self._mass_weight_matrix is not None: self._kirchhoff *= self._mass_weight_matrix else: @@ -111,193 +103,65 @@ def kirchhoff(self): return self._kirchhoff @kirchhoff.setter - def kirchhoff(self, value): - if value.shape != (len(self._coord), len(self._coord)): + def kirchhoff(self, value: np.ndarray): + if value.shape != (self._natoms, self._natoms): raise ValueError( - f"Expected shape " - f"{(len(self._coord), len(self._coord))}, " - f"got {value.shape}" + f"Expected shape {(self._natoms, self._natoms)}, got {value.shape}" ) self._kirchhoff = value # Invalidate dependent values self._covariance = None - - @property - def covariance(self): - if self._covariance is None: - self._covariance = np.linalg.pinv( - self.kirchhoff, hermitian=True, rcond=1e-6 - ) - return self._covariance - - @covariance.setter - def covariance(self, value): - if value.shape != (len(self._coord), len(self._coord)): - raise IndexError( - f"Expected shape " - f"{(len(self._coord), len(self._coord))}, " - f"got {value.shape}" - ) - self._covariance = value + self._eigen_values = None + self._eigen_values_zero = 0 + self._eigen_vectors = None + self._free_energy_contrib = None + + @ENM.covariance.setter + @override + def covariance(self, value: np.ndarray): + super().covariance = value # Invalidate dependent values self._kirchhoff = None - def eigen(self): - """ - Compute the Eigenvalues and Eigenvectors of the - *Kirchhoff* matrix. - - Returns - ------- - eig_values : ndarray, shape=(k,), dtype=float - Eigenvalues of the *Kirchhoff* matrix in ascending order. - eig_vectors : ndarray, shape=(k,n), dtype=float - Eigenvectors of the *Kirchhoff* matrix. - ``eig_values[i]`` corresponds to ``eigenvectors[i]``. - """ - return nma.eigen(self) - - def frequencies(self): + @property + @override + def dof_per_node(self) -> int: """ - Compute the oscillation frequencies of the model. - - The first mode corresponds to rigid-body translations/rotations - and is omitted in the return value. - The returned units are arbitrary and should only be compared - relative to each other. - Returns ------- - frequencies : ndarray, shape=(n,), dtype=float - Oscillation frequencies of the model in in ascending order. - *NaN* values mark frequencies corresponding to translations - or rotations. + dof_per_node : int + Returns the Degree of Freedom per atom. """ - return nma.frequencies(self) + return 1 - def mean_square_fluctuation(self, mode_subset=None, tem=None, tem_factors=K_B): - """ - Compute the *mean square fluctuation* for the atoms according to - the GNM. - This is equal to the sum of the diagonal of of the - GNM covariance matrix, if all k-1 non-trivial - modes are considered. - - Parameters - ---------- - mode_subset : ndarray, shape=(n,), dtype=int, optional - Specifies the subset of modes considered in the MSF - computation. - Only non-trivial modes can be selected. - The first mode is counted as 0 in accordance with - Python conventions. - If mode_subset is None, all modes except the first - trivial mode (0) are included. - tem : int, float, None, optional - Temperature in Kelvin to compute the temperature scaling - factor by multiplying with the Boltzmann constant. - If tem is None, no temperature scaling is conducted. - tem_factors : int, float, optional - Factors included in temperature weighting - (with K_B as preset). - - Returns - ------- - msqf : ndarray, shape=(n,), dtype=float - The mean square fluctuations for each atom in the model. - """ - return nma.mean_square_fluctuation(self, mode_subset, tem, tem_factors) - - def bfactor(self, mode_subset=None, tem=None, tem_factors=K_B): - """ - Computes the isotropic B-factors/temperature factors/ - Deby-Waller factors for atoms/coarse-grained nodes using - the mean-square fluctuation. + @property + @override + def _interactions(self) -> np.ndarray | None: + return self._kirchhoff - These can be used to relate results obtained from ENMs - to experimental results. + @staticmethod + @override + def _calc_mass_weight_matrix(masses: np.ndarray) -> np.ndarray: + mass_weights = 1 / np.sqrt(masses) + return np.outer(mass_weights, mass_weights) - Parameters - ---------- - mode_subset : ndarray, shape=(n,), dtype=int, optional - Specifies the subset of modes considered in the MSF - computation. - Only non-trivial modes can be selected. - The first mode is counted as 0 in accordance with - Python conventions. - If mode_subset is None, all modes except the first - trivial mode (0) are included. - tem : int, float, None, optional - Temperature in Kelvin to compute the temperature scaling - factor by multiplying with the Boltzmann constant. - If tem is None, no temperature scaling is conducted. - tem_factors : int, float, optional - Factors included in temperature weighting - (with K_B as preset). - Returns - ------- - bfac_values : ndarray, shape=(n,), dtype=float - B-factors of C-alpha atoms. + @override + def eigen(self) -> tuple[np.ndarray, np.ndarray, int]: """ - return nma.bfactor(self, mode_subset, tem, tem_factors) - - def dcc(self, mode_subset=None, norm=True, tem=None, tem_factors=K_B): - r""" - Computes the normalized *dynamic cross-correlation* between - nodes of the GNM. - - The DCC is a measure for the correlation in fluctuations - exhibited by a given pair of nodes. If normalized, pairs with - correlated fluctuations (same phase and period), - anticorrelated fluctuations (opposite phase, same period) - and non-correlated fluctuations are assigned (normalized) - DCC values of 1, -1 and 0 respectively. - For results consistent with MSFs, temperature-weighted - absolute values can be computed (only relevant if results - are not normalized). - - Parameters - ---------- - mode_subset : ndarray, shape=(n,), dtype=int, optional - Specifies the subset of modes considered in the MSF - computation. - Only non-trivial modes can be selected. - The first mode is counted as 0 in accordance with - Python conventions. - If mode_subset is None, all modes except the first - trivial mode (0) are included. - norm : bool, optional - Normalize the DCC using the MSFs of interacting nodes. - tem : int, float, None, optional - Temperature in Kelvin to compute the temperature scaling - factor by multiplying with the Boltzmann constant. - If tem is None, no temperature scaling is conducted. - tem_factors : int, float, optional - Factors included in temperature weighting - (with :math:`k_B` as preset). + Compute the Eigenvalues and Eigenvectors of the + *Kirchhoff* matrix. Returns ------- - dcc : ndarray, shape=(n, n), dtype=float - DCC values for ENM nodes. - - Notes - ----- - The DCC for a nodepair :math:`ij` is computed as: - - .. math:: - - DCC_{ij} = \frac{3 k_B T}{\gamma} \sum_k^L \left[ \frac{\vec{u}_k \cdot \vec{u}_k^T}{\lambda_k} \right]_{ij} - - with :math:`\lambda` and :math:`\vec{u}` as - Eigenvalues and Eigenvectors corresponding to mode :math:`k` of - the modeset :math:`L`. - - DCCs can be normalized to MSFs exhibited by two compared nodes - following: + eig_values : ndarray, shape=(k,), dtype=float + Eigenvalues of the *Kirchhoff* matrix in ascending order. - .. math:: + This is not a copy: Create a copy before modifying this matrix. + eig_vectors : ndarray, shape=(k,n), dtype=float + Eigenvectors of the *Kirchhoff* matrix. + ``eig_values[i]`` corresponds to ``eigenvectors[i]``. - nDCC_{ij} = \frac{DCC_{ij}}{[DCC_{ii} DCC_{jj}]^{1/2}} + This is not a copy: Create a copy before modifying this matrix. """ - return nma.dcc(self, mode_subset, norm, tem, tem_factors) + # only called for proper docstring + return super().eigen() diff --git a/src/springcraft/interaction.py b/src/springcraft/interaction.py index a81f0c7..b6a4ef7 100644 --- a/src/springcraft/interaction.py +++ b/src/springcraft/interaction.py @@ -3,15 +3,29 @@ i.e. Kirchhoff and Hessian matrices. """ +from __future__ import annotations + +from typing_extensions import deprecated + __name__ = "springcraft" __author__ = "Patrick Kunzmann, Jan Krumbach" __all__ = ["compute_kirchhoff", "compute_hessian"] +from typing import TYPE_CHECKING + import biotite.structure as struc import numpy as np +if TYPE_CHECKING: + from springcraft.forcefield import ForceField + -def compute_kirchhoff(coord, force_field, use_cell_list=True): +@deprecated("Use GNM property instead.") +def compute_kirchhoff( + coord: np.ndarray, + force_field: ForceField, + use_cell_list: bool = True, +) -> tuple[np.ndarray, np.ndarray]: """ Compute the *Kirchhoff* matrix for atoms with given coordinates and the chosen force field. @@ -54,7 +68,12 @@ def compute_kirchhoff(coord, force_field, use_cell_list=True): return kirchhoff, pairs -def compute_hessian(coord, force_field, use_cell_list=True): +@deprecated("Use ANM property instead.") +def compute_hessian( + coord: np.ndarray, + force_field: ForceField, + use_cell_list: bool = True, +) -> tuple[np.ndarray, np.ndarray]: """ Compute the *Hessian* matrix for atoms with given coordinates and the chosen force field. @@ -111,7 +130,11 @@ def compute_hessian(coord, force_field, use_cell_list=True): return hessian, pairs -def _prepare_values_for_interaction_matrix(coord, force_field, use_cell_list): +def _prepare_values_for_interaction_matrix( + coord: np.ndarray, + force_field: ForceField, + use_cell_list: bool, +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """ Check input values and calculate common intermediate values for :func:`compute_kirchhoff()` and :func:`compute_hessian()`. @@ -152,7 +175,7 @@ def _prepare_values_for_interaction_matrix(coord, force_field, use_cell_list): # Include all possible interactions, except an atom with itself adj_matrix = np.ones((len(coord), len(coord)), dtype=bool) elif use_cell_list: - cell_list = struc.CellList( + cell_list = struc.CellList( # pyright: ignore[reportAttributeAccessIssue] coord, cutoff_distance, ) @@ -184,15 +207,18 @@ def _prepare_values_for_interaction_matrix(coord, force_field, use_cell_list): sq_dist = np.sum(disp * disp, axis=-1) else: # Displacements and squared distances were already calculated - disp = disp_matrix[pairs[:, 0], pairs[:, 1]] - sq_dist = sq_dist_matrix[pairs[:, 0], pairs[:, 1]] + disp = disp_matrix[pairs[:, 0], pairs[:, 1]] # pyright: ignore[reportPossiblyUnboundVariable] + sq_dist = sq_dist_matrix[pairs[:, 0], pairs[:, 1]] # pyright: ignore[reportPossiblyUnboundVariable] return pairs, disp, sq_dist def _patch_adjacency_matrix( - matrix, contact_shutdown, contact_pair_off, contact_pair_on -): + matrix: np.ndarray, + contact_shutdown: np.ndarray | None, + contact_pair_off: np.ndarray | None, + contact_pair_on: np.ndarray | None, +) -> None: """ Apply contacts that are artificially switched off/on to an adjacency matrix. diff --git a/src/springcraft/nma.py b/src/springcraft/nma.py index 84c8305..c32b528 100644 --- a/src/springcraft/nma.py +++ b/src/springcraft/nma.py @@ -17,7 +17,10 @@ "effector_sensor", ] +from typing import Literal + import numpy as np +from typing_extensions import deprecated # -> Import ANM/GNM in functions to prevent circular import error @@ -26,7 +29,8 @@ ## NMA functions for GNMs/ANMs -def eigen(enm): +@deprecated("Use class method instead.") +def eigen(enm) -> tuple[np.ndarray, np.ndarray]: """ Compute the Eigenvalues and Eigenvectors of the *Kirchhoff*/*Hessian* matrix for GNMs and ANMs respectively. @@ -46,24 +50,10 @@ def eigen(enm): Eigenvectors of the *Kirchhoff*/*Hessian* matrix. ``eig_values[i]`` corresponds to ``eig_vectors[i]``. """ - from .anm import ANM - from .gnm import GNM - - # Assign Kirchhoff/Hessian - if isinstance(enm, GNM): - mech_matrix = enm.kirchhoff - elif isinstance(enm, ANM): - mech_matrix = enm.hessian - else: - raise ValueError("Instance of GNM/ANM class expected.") + return enm.eigen() - # 'np.eigh' can be used since the Hessian/Kirchhoff matrix is symmetric - eig_values, eig_vectors = np.linalg.eigh(mech_matrix) - return eig_values, eig_vectors.T - - -def frequencies(enm): +def frequencies(enm) -> np.ndarray: """ Computes the frequency associated with each mode. @@ -94,7 +84,7 @@ def frequencies(enm): else: raise ValueError("Instance of GNM/ANM class expected.") - eig_values, _ = eigen(enm) + eig_values, _, ntriv_modes = enm.eigen() # The very first / first six Eigenvalue(s) is/are usually close to 0; # but can have a negative sign. @@ -105,7 +95,12 @@ def frequencies(enm): return freq -def mean_square_fluctuation(enm, mode_subset=None, tem=None, tem_factors=K_B): +def mean_square_fluctuation( + enm, + mode_subset: np.ndarray | None = None, + tem: int | float | None = None, + tem_factors: int | float = K_B, +) -> np.ndarray: """ Compute the *mean square fluctuation* for the atoms according to the ANM/GNM. @@ -142,17 +137,15 @@ def mean_square_fluctuation(enm, mode_subset=None, tem=None, tem_factors=K_B): if not isinstance(enm, (GNM, ANM)): raise ValueError("Instance of GNM/ANM class expected.") - eig_values, eig_vectors = eigen(enm) + eig_values, eig_vectors, ntriv_modes = enm.eigen() if isinstance(enm, ANM): # Eigenvectors: 3N -> N cols_n = np.arange(0, len(eig_vectors[0]), 3) eig_vectors = np.add.reduceat(np.square(eig_vectors), cols_n, axis=1) - ntriv_modes = 6 # -> GNMs else: eig_vectors = np.square(eig_vectors) - ntriv_modes = 1 # Choose modes included in computation; raise error, if trivial # modes are included @@ -184,7 +177,12 @@ def mean_square_fluctuation(enm, mode_subset=None, tem=None, tem_factors=K_B): return msqf -def bfactor(enm, mode_subset=None, tem=None, tem_factors=K_B): +def bfactor( + enm, + mode_subset: np.ndarray | None = None, + tem: int | float | None = None, + tem_factors: int | float = K_B, +) -> np.ndarray: """ Computes the isotropic B-factors/temperature factors/ Deby-Waller factors for atoms/coarse-grained nodes using @@ -230,7 +228,13 @@ def bfactor(enm, mode_subset=None, tem=None, tem_factors=K_B): return b_factors -def dcc(enm, mode_subset=None, norm=True, tem=None, tem_factors=K_B): +def dcc( + enm, + mode_subset: np.ndarray | None = None, + norm: bool = True, + tem: int | float | None = None, + tem_factors: int | float = K_B, +) -> np.ndarray: r""" Computes the normalized *dynamic cross-correlation* between nodes of the GNM/ANM. @@ -293,16 +297,14 @@ def dcc(enm, mode_subset=None, norm=True, tem=None, tem_factors=K_B): from .anm import ANM from .gnm import GNM - eig_values, eig_vectors = enm.eigen() + eig_values, eig_vectors, ntriv_modes = enm.eigen() n_nodes = len(enm._coord) if isinstance(enm, ANM): is_gnm = False - ntriv_modes = 6 num_dim = 3 elif isinstance(enm, GNM): is_gnm = True - ntriv_modes = 1 num_dim = 1 else: raise ValueError("Instance of GNM/ANM class expected.") @@ -359,8 +361,45 @@ def dcc(enm, mode_subset=None, norm=True, tem=None, tem_factors=K_B): return dcc +def free_energy_contribution(enm, tem=None, tem_factors=K_B) -> float: + """ + Calculates the contribution of the protein configuration to the + Helmholtz free energy. + + According to + Hamacher K. Free energy of contact formation in proteins: efficient computation in the elastic network approximation. Phys Rev E Stat Nonlin Soft Matter Phys. 2011 Jul;84(1 Pt 2):016703. doi: 10.1103/PhysRevE.84.016703. Epub 2011 Jul 7. PMID: 21867339. + """ + + from .enm import ENM + from .gnm import GNM + + if not isinstance(enm, ENM): + raise ValueError("Instance of ENM class expected.") + + eig_vals, _, nzero = enm.eigen() + dof = enm.dof_per_node + + tem_scaling = 1 + if tem is not None: + tem_scaling = tem * tem_factors + + pdet_log = np.multiply.reduce(np.log(eig_vals[nzero:])) + base = (len(eig_vals) - nzero * dof) * np.log(2 * np.pi * tem_scaling) + free_energy_contrib = tem_scaling * (pdet_log - base) + if isinstance(enm, GNM): + free_energy_contrib *= 3 + + return free_energy_contrib + + ## ANM specific functions -def normal_mode(anm, index, amplitude, frames, movement="sine"): +def normal_mode( + anm, + index: int, + amplitude: int, + frames: int, + movement: Literal["sine", "triangle"] = "sine", +) -> np.ndarray: """ Create displacements for a trajectory depicting the given normal mode for ANMs. @@ -382,7 +421,7 @@ def normal_mode(anm, index, amplitude, frames, movement="sine"): value for an atom is the given value. frames : int The number of frames (models) per oscillation. - movement : {'sinusoidal', 'triangle'} + movement : {'sine', 'triangle'} Defines how to depict the oscillation. If set to ``'sine'`` the atom movement is sinusoidal. If set to ``'triangle'`` the atom movement is linear with @@ -399,7 +438,7 @@ def normal_mode(anm, index, amplitude, frames, movement="sine"): if not isinstance(anm, ANM): raise ValueError("Instance of ANM class expected.") else: - _, eig_vectors = eigen(anm) + _, eig_vectors, _ = anm.eigen() # Extract vectors for given mode and reshape to (n,3) array mode_vectors = eig_vectors[index].reshape((-1, 3)) # Rescale, so that the largest vector has the length 'amplitude' @@ -419,7 +458,7 @@ def normal_mode(anm, index, amplitude, frames, movement="sine"): return disp -def linear_response(anm, force): +def linear_response(anm, force: np.ndarray) -> np.ndarray: """ Compute the atom displacement induced by the given force using *Linear Response Theory*. [1]_ @@ -473,7 +512,7 @@ def linear_response(anm, force): return np.dot(anm.covariance, force).reshape(len(anm._coord), 3) -def prs(anm, norm=True): +def prs(anm, norm: bool = True) -> np.ndarray: """ Compute the perturbation response scanning matrix following Atilgan et al. [1]_ @@ -524,7 +563,7 @@ def prs(anm, norm=True): return prs_matrix -def effector_sensor(prs_matrix): +def effector_sensor(prs_matrix: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ Compute effector/sensor residues according to the PRS-Matrix as described in General et al. [1]_ diff --git a/tests/test_forcefield.py b/tests/test_forcefield.py index 892f40c..6fd14ae 100644 --- a/tests/test_forcefield.py +++ b/tests/test_forcefield.py @@ -5,8 +5,8 @@ import biotite.structure.io.pdb as pdb import numpy as np import pytest + import springcraft -from springcraft.forcefield import InvariantForceField from .util import data_dir @@ -37,81 +37,185 @@ def atoms_singlechain(atoms): def test_patched_force_field_shutdown(atoms): - N_CONTACTS = 5 + N_CONTACTS_1 = 5 + N_CONTACTS_2 = 2 np.random.seed(0) shutdown_indices = np.random.choice( - np.arange(len(atoms)), size=N_CONTACTS, replace=False + np.arange(len(atoms)), size=N_CONTACTS_1 + N_CONTACTS_2, replace=False ) + shutdown_indices_1 = shutdown_indices[:N_CONTACTS_1] + shutdown_indices_2 = shutdown_indices[N_CONTACTS_1:] - ref_ff = InvariantForceField(7.0) - ref_kirchhoff, _ = springcraft.compute_kirchhoff(atoms.coord, ref_ff) + ref_ff = springcraft.TabulatedForceField.e_anm(atoms) + ref_kirchhoff_1 = springcraft.GNM(atoms, ref_ff).kirchhoff # Manual shutdown of contacts after Kirchhoff calculation - ref_kirchhoff[shutdown_indices, :] = 0 - ref_kirchhoff[:, shutdown_indices] = 0 + ref_kirchhoff_1[shutdown_indices_1, :] = 0 + ref_kirchhoff_1[:, shutdown_indices_1] = 0 + + ref_kirchhoff_2 = ref_kirchhoff_1.copy() + ref_kirchhoff_2[shutdown_indices_2, :] = 0 + ref_kirchhoff_2[:, shutdown_indices_2] = 0 + + with pytest.raises(IndexError, match="out of bounds for a structure of length"): + springcraft.PatchedForceField(ref_ff, contact_shutdown=np.array(-1)) + with pytest.raises(IndexError, match="out of bounds for a structure of length"): + springcraft.PatchedForceField(ref_ff, contact_shutdown=np.array(40)) - test_ff = springcraft.PatchedForceField(ref_ff, contact_shutdown=shutdown_indices) - test_kirchhoff, _ = springcraft.compute_kirchhoff(atoms.coord, test_ff) + test_ff_1 = springcraft.PatchedForceField( + ref_ff, contact_shutdown=shutdown_indices_1 + ) + test_kirchhoff_1 = springcraft.GNM(atoms, test_ff_1).kirchhoff + + # chained patched FF should combine shutdown indices + test_ff_2 = springcraft.PatchedForceField( + test_ff_1, contact_shutdown=shutdown_indices_2 + ) + test_kirchhoff_2 = springcraft.GNM(atoms, test_ff_2).kirchhoff # Main diagonal is not easily adjusted # -> simply set main diagonal of ref and test matrix to 0 - np.fill_diagonal(test_kirchhoff, 0) - np.fill_diagonal(ref_kirchhoff, 0) - assert np.all(test_kirchhoff == ref_kirchhoff) + np.fill_diagonal(ref_kirchhoff_1, 0) + np.fill_diagonal(test_kirchhoff_1, 0) + np.fill_diagonal(ref_kirchhoff_2, 0) + np.fill_diagonal(test_kirchhoff_2, 0) + assert np.all(test_kirchhoff_1 == ref_kirchhoff_1) + assert np.all(test_kirchhoff_2 == ref_kirchhoff_2) def test_patched_force_field_pairs_off(atoms): - N_CONTACTS = 5 + N_CONTACTS_1 = 3 + N_CONTACTS_2 = 2 np.random.seed(0) off_indices = np.random.choice( - np.arange(len(atoms)), size=(N_CONTACTS, 2), replace=False + np.arange(len(atoms)), size=(N_CONTACTS_1 + N_CONTACTS_2, 2), replace=False ) + off_indices_1 = off_indices[:N_CONTACTS_1] + off_indices_2 = off_indices[N_CONTACTS_1:] - ref_ff = InvariantForceField(7.0) - ref_kirchhoff, _ = springcraft.compute_kirchhoff(atoms.coord, ref_ff) + ref_ff = springcraft.TabulatedForceField.e_anm(atoms) + ref_kirchhoff_1 = springcraft.GNM(atoms, ref_ff).kirchhoff # Manual shutdown of contacts after Kirchhoff calculation - atom_i, atom_j = off_indices.T - ref_kirchhoff[atom_i, atom_j] = 0 - ref_kirchhoff[atom_j, atom_i] = 0 + atom_i, atom_j = off_indices_1.T + ref_kirchhoff_1[atom_i, atom_j] = 0 + ref_kirchhoff_1[atom_j, atom_i] = 0 + + atom_i, atom_j = off_indices_2.T + ref_kirchhoff_2 = ref_kirchhoff_1.copy() + ref_kirchhoff_2[atom_i, atom_j] = 0 + ref_kirchhoff_2[atom_j, atom_i] = 0 - test_ff = springcraft.PatchedForceField(ref_ff, contact_pair_off=off_indices) - test_kirchhoff, _ = springcraft.compute_kirchhoff(atoms.coord, test_ff) + with pytest.raises(IndexError, match="out of bounds for a structure of length"): + springcraft.PatchedForceField(ref_ff, contact_shutdown=np.array(-1)) + with pytest.raises(IndexError, match="out of bounds for a structure of length"): + springcraft.PatchedForceField(ref_ff, contact_shutdown=np.array(40)) + + test_ff_1 = springcraft.PatchedForceField(ref_ff, contact_pair_off=off_indices_1) + test_kirchhoff_1 = springcraft.GNM(atoms, test_ff_1).kirchhoff + + # chained patched FF should combine off indices + test_ff_2 = springcraft.PatchedForceField(test_ff_1, contact_pair_off=off_indices_2) + test_kirchhoff_2 = springcraft.GNM(atoms, test_ff_2).kirchhoff # Main diagonal is not easily adjusted # -> simply set main diagonal of ref and test matrix to 0 - np.fill_diagonal(test_kirchhoff, 0) - np.fill_diagonal(ref_kirchhoff, 0) - assert np.all(test_kirchhoff == ref_kirchhoff) + np.fill_diagonal(ref_kirchhoff_1, 0) + np.fill_diagonal(test_kirchhoff_1, 0) + np.fill_diagonal(ref_kirchhoff_2, 0) + np.fill_diagonal(test_kirchhoff_2, 0) + assert np.all(test_kirchhoff_1 == ref_kirchhoff_1) + assert np.all(test_kirchhoff_2 == ref_kirchhoff_2) def test_patched_force_field_pairs_on(atoms): - N_CONTACTS = 5 + N_CONTACTS_1 = 3 + N_CONTACTS_2 = 2 np.random.seed(0) on_indices = np.random.choice( - np.arange(len(atoms)), size=(N_CONTACTS, 2), replace=False + np.arange(len(atoms)), size=(N_CONTACTS_1 + N_CONTACTS_2, 2), replace=False ) - force_constants = np.random.rand(N_CONTACTS) - - ref_ff = InvariantForceField(7.0) - ref_kirchhoff, _ = springcraft.compute_kirchhoff(atoms.coord, ref_ff) - # Manual shutdown of contacts after Kirchhoff calculation - atom_i, atom_j = on_indices.T - ref_kirchhoff[atom_i, atom_j] = -force_constants - ref_kirchhoff[atom_j, atom_i] = -force_constants + on_indices_1 = on_indices[:N_CONTACTS_1] + on_indices_2 = on_indices[N_CONTACTS_1:] + force_constants = np.random.rand(N_CONTACTS_1 + N_CONTACTS_2) + force_constants_1 = force_constants[:N_CONTACTS_1] + force_constants_2 = force_constants[N_CONTACTS_1:] + + ref_ff = springcraft.TabulatedForceField.e_anm(atoms) + ref_kirchhoff_1 = springcraft.GNM(atoms, ref_ff).kirchhoff + # Manual change of contacts after Kirchhoff calculation + atom_i, atom_j = on_indices_1.T + ref_kirchhoff_1[atom_i, atom_j] = -force_constants_1 + ref_kirchhoff_1[atom_j, atom_i] = -force_constants_1 + + atom_i, atom_j = on_indices_2.T + ref_kirchhoff_2 = ref_kirchhoff_1.copy() + ref_kirchhoff_2[atom_i, atom_j] = -force_constants_2 + ref_kirchhoff_2[atom_j, atom_i] = -force_constants_2 + + with pytest.raises(IndexError, match="out of bounds for a structure of length"): + springcraft.PatchedForceField(ref_ff, contact_shutdown=np.array(-1)) + with pytest.raises(IndexError, match="out of bounds for a structure of length"): + springcraft.PatchedForceField(ref_ff, contact_shutdown=np.array(40)) + with pytest.raises(TypeError, match="Individual force constants must be given"): + springcraft.PatchedForceField(ref_ff, contact_pair_on=on_indices) + with pytest.raises(IndexError, match="force constants were given for"): + springcraft.PatchedForceField( + ref_ff, contact_pair_on=on_indices, force_constants=force_constants_1 + ) + + test_ff_1 = springcraft.PatchedForceField( + ref_ff, contact_pair_on=on_indices_1, force_constants=force_constants_1 + ) + test_kirchhoff_1 = springcraft.GNM(atoms, test_ff_1).kirchhoff - test_ff = springcraft.PatchedForceField( - ref_ff, contact_pair_on=on_indices, force_constants=force_constants + # chained patched FF should combine on indices + test_ff_2 = springcraft.PatchedForceField( + test_ff_1, contact_pair_on=on_indices_2, force_constants=force_constants_2 ) - test_kirchhoff, _ = springcraft.compute_kirchhoff(atoms.coord, test_ff) + test_kirchhoff_2 = springcraft.GNM(atoms, test_ff_2).kirchhoff # Main diagonal is not easily adjusted # -> simply set main diagonal of ref and test matrix to 0 - np.fill_diagonal(test_kirchhoff, 0) - np.fill_diagonal(ref_kirchhoff, 0) - np.set_printoptions(threshold=10000, linewidth=1000) - assert np.all(test_kirchhoff == ref_kirchhoff) + np.fill_diagonal(ref_kirchhoff_1, 0) + np.fill_diagonal(test_kirchhoff_1, 0) + np.fill_diagonal(ref_kirchhoff_2, 0) + np.fill_diagonal(test_kirchhoff_2, 0) + assert np.all(test_kirchhoff_1 == ref_kirchhoff_1) + assert np.all(test_kirchhoff_2 == ref_kirchhoff_2) + + +def test_patched_force_field_propagates_update(atoms): + """ + Tests whether the PatchedForceField calls the update method of the + underlying ForceField resulting in force changes. + """ + # Create symmetric random type-specific interaction matrices + np.random.seed(0) + triu = np.triu(np.random.rand(3, 20, 20)) + bonded, intra, inter = triu + np.transpose(triu, (0, 2, 1)) + + ff = springcraft.TabulatedForceField(atoms, bonded, intra, inter, None) + patched_ff = springcraft.PatchedForceField(ff) + force_before = patched_ff.force_constant(np.array(0), np.array(1), np.array(1)) + atoms.res_name[1] = "ALA" + assert patched_ff.update(1, atoms[1]) + force_after = patched_ff.force_constant(np.array(0), np.array(1), np.array(1)) + assert force_before != force_after + + +def test_invariant_force_field(atoms): + "Tests whether the basic InvariantForceField works." + N_CONTACTS = 5 + CUTOFF_DIST = 7.0 + + ff = springcraft.InvariantForceField(CUTOFF_DIST) + force_constants = ff.force_constant( + np.arange(N_CONTACTS), np.arange(N_CONTACTS), np.arange(N_CONTACTS) + ) + assert np.all(force_constants == np.ones(N_CONTACTS)) + assert ff.cutoff_distance == CUTOFF_DIST def test_tabulated_forcefield_homogeneous(atoms): @@ -256,7 +360,7 @@ def test_tabulated_forcefield_cutoff(atoms, cutoff_distance): that simply represents adjacency. """ ff = springcraft.TabulatedForceField(atoms, 1, 1, 1, cutoff_distance) - kirchhoff, _ = springcraft.compute_kirchhoff(atoms.coord, ff) + kirchhoff = springcraft.GNM(atoms, ff).kirchhoff ref_adj_matrix = -kirchhoff np.fill_diagonal(ref_adj_matrix, 0) assert np.isin(ref_adj_matrix.flatten(), [0, 1]).all() @@ -334,6 +438,59 @@ def test_tabulated_forcefield_predefined(atoms, name): assert ff is not None +@pytest.mark.parametrize( + "idx, res_name, expected", + [ + [0, "GLU", True], # start of first chain + [5, "PHE", True], # middle of chain + [19, "ALA", True], # end of first chain + [20, "GLU", True], # start of second chain + [39, "ALA", True], # end of second chain + [1, "LEU", False], # no change + ], +) +def test_tabulated_forcefield_update(atoms, idx, res_name, expected): + """ + Test the pertubation of the ForceField. A pertubation is considered + successful if the pertubated ForceField results in the same + interaction matrix as a ForceField created with the pertubated atoms. + Special attention needs to be given to edge cases (endings of chains). + """ + N_BINS = 3 + + upper = np.triu(np.ones((20, 20), dtype=bool), 1) + np.random.seed(0) + bonded = np.random.rand(20, 20, N_BINS) + bonded[upper, :] = bonded.transpose(1, 0, 2)[upper, :] + intra = np.random.rand(20, 20, N_BINS) + intra[upper, :] = intra.transpose(1, 0, 2)[upper, :] + inter = np.random.rand(20, 20, N_BINS) + inter[upper, :] = inter.transpose(1, 0, 2)[upper, :] + edges = np.sort(np.random.random(N_BINS)) + + ff = springcraft.TabulatedForceField(atoms, bonded, intra, inter, edges) + + atoms.res_name[idx] = res_name + assert ff.update(idx, atoms[idx]) == expected + + ff_new = springcraft.TabulatedForceField(atoms, bonded, intra, inter, edges) + pytest.approx(ff.interaction_matrix, ff_new.interaction_matrix) + + +def test_tabulated_forcefield_update_checks(atoms): + """ + Tests whether the input arguments are checked correctly. + """ + ff = springcraft.TabulatedForceField(atoms, 1, 1, 1, None) + new_atom = atoms[0] + with pytest.raises(IndexError): + ff.update(-1, new_atom) + with pytest.raises(IndexError): + ff.update(len(atoms) + 1, new_atom) + with pytest.raises(TypeError): + ff.update(0, "LEU") + + def test_parameterfree_forcefield(): """ Test whether all entries in the kirchhoff matrix are @@ -349,7 +506,7 @@ def test_parameterfree_forcefield(): ref_kirchhoff = -1 / dist_matrix**2 ff = springcraft.ParameterFreeForceField() - test_kirchhoff, _ = springcraft.compute_kirchhoff(coord, ff) + test_kirchhoff = springcraft.GNM(coord, ff).kirchhoff # Ignore main diagonal -> Set main diagonal of both matrices to 0 np.fill_diagonal(ref_kirchhoff, 0) @@ -375,7 +532,7 @@ def test_compare_with_biophysconnector_heterogenous(atoms_singlechain, ff_name): ff = springcraft.TabulatedForceField.e_anm_ke(atoms_singlechain) ref_file = "biophysconnector_anm_eanm_ke_hessian_1l2y.csv" - test_hessian, _ = springcraft.compute_hessian(atoms_singlechain.coord, ff) + test_hessian = springcraft.ANM(atoms_singlechain.coord, ff).hessian # Load .csv.gz file data from BiophysConnectoR ref_hessian = np.genfromtxt( @@ -397,7 +554,6 @@ def test_compare_with_bio3d(atoms_singlechain, ff_name): The following ENM forcefields are compared: Hinsen-Calpha, sdENM and pfENM. """ - atoms_singlechain if ff_name == "Hinsen": ff = springcraft.HinsenForceField() ff_bio3d_str = "calpha" @@ -413,7 +569,7 @@ def test_compare_with_bio3d(atoms_singlechain, ff_name): delimiter=",", ) - test_hessian, _ = springcraft.compute_hessian(atoms_singlechain.coord, ff) + test_hessian = springcraft.ANM(atoms_singlechain.coord, ff).hessian # Higher deviation for Hinsen-FF if ff_name == "Hinsen": diff --git a/tests/test_gnm.py b/tests/test_gnm.py index db728c4..0e7fcc0 100644 --- a/tests/test_gnm.py +++ b/tests/test_gnm.py @@ -4,6 +4,7 @@ import biotite.structure.io.pdb as pdb import numpy as np import pytest + import springcraft from .util import data_dir @@ -16,6 +17,7 @@ def prepare_gnm(file_path, cutoff): ff = springcraft.InvariantForceField(cutoff) test_gnm = springcraft.GNM(ca, ff) + test_gnm.kirchhoff return test_gnm @@ -60,7 +62,7 @@ def test_eigen(file_path, cutoff): """ test_gnm = prepare_gnm(file_path, cutoff) - test_eig_values, test_eig_vectors = test_gnm.eigen() + test_eig_values, test_eig_vectors, _ = test_gnm.eigen() pdb_name = basename(file_path).split(".")[0] @@ -150,3 +152,178 @@ def test_fluctuation_dcc(file_path, cutoff): assert np.allclose(test_dcc, reference_dcc) assert np.allclose(test_dcc_subset, reference_dcc_norm_subset) assert np.allclose(test_dcc_absolute, reference_dcc_absolute) + + +@pytest.mark.parametrize("nb_of_changes", [1, 3, 20, 27]) +def test_modify_contact_pair(nb_of_changes): + """ + Tests whether permutations to the `kirchhoff` matrix are + performed correctly and the resulting permutations to the + `covariance` matrix are correct. + + TODO integrate numerical tests here? + + Parameters + ---------- + nb_of_changes : int + number of permutations to perform + """ + pdb_file = pdb.PDBFile.read(join(data_dir(), "1l2y.pdb")) + atoms = pdb.get_structure(pdb_file, model=1) + ca = atoms[(atoms.atom_name == "CA") & (atoms.element == "C")] + ff = springcraft.InvariantForceField(7.9) + + gnm = springcraft.GNM(ca, ff) + ref_kirchhoff = gnm.kirchhoff.copy() + gnm.covariance + + rng = np.random.default_rng(1) + atom_i, atom_j = np.zeros((2, nb_of_changes), dtype=int) + delta = (rng.random(nb_of_changes) + 0.5) * rng.choice((-1, 1), nb_of_changes) + for k in range(nb_of_changes): + atom_i[k], atom_j[k] = rng.choice(len(ca), size=2, replace=False) + + ref_kirchhoff[atom_i[k], atom_j[k]] += delta[k] + ref_kirchhoff[atom_j[k], atom_i[k]] += delta[k] + ref_kirchhoff[atom_i[k], atom_i[k]] -= delta[k] + ref_kirchhoff[atom_j[k], atom_j[k]] -= delta[k] + + ref_gnm = springcraft.GNM(ca, ff) + ref_gnm.kirchhoff = ref_kirchhoff + ref_covariance = ref_gnm.covariance + + gnm._modify_contact_pair(atom_i, atom_j, delta) + mod_kirchhoff = gnm.kirchhoff + mod_covariance = gnm.covariance + + assert np.allclose(mod_kirchhoff, ref_kirchhoff) + print(np.max(np.abs(mod_covariance - ref_covariance))) + assert np.allclose(mod_covariance, ref_covariance) + + +@pytest.mark.parametrize( + "atom_i, atom_j, delta, expected_kirchhoff_change", + [ + (1, 4, 0.3, 0.3), + ([1, 8, 11], [4, 6, 12], [0.3, -1.1, 1e-9], [0.3, -1.1, 0]), + ([1, 11], [4, 12], 0.3, [0.3, 0.3]), + ([1, 3, 8], [4, 7, 11], [False, True, True], [6.616000175476074, 2.4, 0]), + ([1, 11], [4, 12], False, [6.616000175476074, 46.83000183105469]), + ([3, 15], [7, 18], True, [2.4, -11.7]), + ], +) +def test_modify_contacts(atom_i, atom_j, delta, expected_kirchhoff_change): + """ + Tests the wrapper function for correct argument conversion. + """ + pdb_file = pdb.PDBFile.read(join(data_dir(), "1l2y.pdb")) + atoms = pdb.get_structure(pdb_file, model=1) + ca = atoms[(atoms.atom_name == "CA") & (atoms.element == "C")] + ff = springcraft.TabulatedForceField.d_enm(ca) + + gnm = springcraft.GNM(ca, ff) + ref_kirchhoff = gnm.kirchhoff + ref_kirchhoff[3, 7] -= 2.4 + ref_kirchhoff[15, 18] += 11.7 + ref_kirchhoff[8, 11] += 1e-9 + ref_kirchhoff = ref_kirchhoff.copy() + gnm.covariance + + if type(atom_i) is list: + for i, j, d in zip(atom_i, atom_j, expected_kirchhoff_change): + ref_kirchhoff[i, j] += d + ref_kirchhoff[j, i] += d + ref_kirchhoff[i, i] -= d + ref_kirchhoff[j, j] -= d + else: + ref_kirchhoff[atom_i, atom_j] += expected_kirchhoff_change + ref_kirchhoff[atom_j, atom_i] += expected_kirchhoff_change + ref_kirchhoff[atom_i, atom_i] -= expected_kirchhoff_change + ref_kirchhoff[atom_j, atom_j] -= expected_kirchhoff_change + + ref_gnm = springcraft.GNM(ca, ff) + ref_gnm.kirchhoff = ref_kirchhoff + ref_covariance = ref_gnm.covariance + + gnm.modify_contacts(atom_i, atom_j, delta) + mod_kirchhoff = gnm.kirchhoff + mod_covariance = gnm.covariance + + assert np.allclose(mod_kirchhoff, ref_kirchhoff) + assert np.allclose(mod_covariance, ref_covariance) + + +def test_modify_contacts_checks(): + """ + Tests the wrapper function checks the input arguments correctly. + """ + pdb_file = pdb.PDBFile.read(join(data_dir(), "1l2y.pdb")) + atoms = pdb.get_structure(pdb_file, model=1) + ca = atoms[(atoms.atom_name == "CA") & (atoms.element == "C")] + ff = springcraft.InvariantForceField(7.0) + + gnm = springcraft.GNM(ca, ff) + with pytest.raises(ValueError, match="Kirchhoff matrix must exist"): + gnm.modify_contacts([1, 8, 11], [4, 6, 12], [0.3, -1.1, 1e-9]) + + gnm.kirchhoff + gnm.covariance + with pytest.raises(ValueError, match="atom index arrays to have the same size"): + gnm.modify_contacts([1, 8], [4, 6, 12], [0.3, -1.1, 1e-9]) + with pytest.raises(IndexError, match="Expected array indices to be different"): + gnm.modify_contacts([1, 8, 11], [4, 8, 11], [0.3, -1.1, 1e-9]) + with pytest.raises(IndexError, match="Index out of bounds"): + gnm.modify_contacts([-1, 8], [4, 6], [0.3, -1.1]) + with pytest.raises(IndexError, match="Index out of bounds"): + gnm.modify_contacts([1, 8], [20, 6], [0.3, -1.1]) + with pytest.raises(IndexError, match="Index out of bounds"): + gnm.modify_contacts([1, -1], [4, 6], [0.3, -1.1]) + with pytest.raises(IndexError, match="Index out of bounds"): + gnm.modify_contacts([1, 8], [4, 20], [0.3, -1.1]) + with pytest.raises(ValueError, match=r"1 delta .* or as many as updates"): + gnm.modify_contacts([1, 8, 11], [4, 6, 12], [0.3, -1.1]) + with pytest.raises(ValueError, match="invalid literal for int"): + gnm.modify_contacts(["a", 8, 11], [4, 6, 12], [0.3, -1.1, 1e-9]) + with pytest.raises(ValueError, match="invalid literal for int"): + gnm.modify_contacts([1, 8, 11], ["e", 6, 12], [0.3, -1.1, 1e-9]) + with pytest.raises(TypeError, match="Expected delta to be float or bool"): + gnm.modify_contacts([1, 8, 11], [4, 6, 12], ["a", -1.1, 1e-9]) + + +def test_modify_atom(): + """ + Tests the wrapper function for correct argument conversion. + """ + pdb_file = pdb.PDBFile.read(join(data_dir(), "1l2y.pdb")) + atoms = pdb.get_structure(pdb_file, model=1) + ca = atoms[(atoms.atom_name == "CA") & (atoms.element == "C")] + ff = springcraft.TabulatedForceField.e_anm(ca) + # ff = springcraft.InvariantForceField(7.0) + + gnm = springcraft.GNM(ca, ff) + orig_kirchhoff = gnm.kirchhoff.copy() + orig_covariance = gnm.covariance.copy() + + # turn off contact + gnm.modify_atom(7, False) + assert np.allclose(gnm.kirchhoff[7, :], np.zeros(len(ca))) + + ref_gnm = springcraft.GNM(ca, ff) + ref_gnm.kirchhoff = gnm.kirchhoff.copy() + ref_covariance = ref_gnm.covariance + print(np.max(np.abs(gnm.covariance - ref_covariance))) + assert np.allclose(gnm.covariance, ref_covariance) + + # turn on contact + gnm.modify_atom(7, True) + assert np.allclose(gnm.kirchhoff, orig_kirchhoff) + assert np.allclose(gnm.covariance, orig_covariance) + + # change amino acid type + ca.res_name[7] = "GLN" + gnm.modify_atom(7, ca[7]) + ff = springcraft.TabulatedForceField.e_anm(ca) + ref_gnm = springcraft.GNM(ca, ff) + ref_gnm.kirchhoff = gnm.kirchhoff.copy() + ref_covariance = ref_gnm.covariance + assert np.allclose(gnm.covariance, ref_covariance)