Source code for ccu.structure.comparator

"""This module defines the Comparator class.

The Comparator class can be used to determine teh similarity of two structures
as follows:

>>> import ase
>>> from ccu.structure.comparator import Comparator
>>> co1 = ase.Atoms("CO", positions=[[0, 0, 0], [1, 0, 0]])
>>> co2 = ase.Atoms("CO", positions=[[0, 1, 1], [1, 1, 1]])
>>> oc = ase.Atoms("OC", positions=[[0, 0, 0], [1, 0, 0]])
>>> Comparator.check_similarity(co1, co2)
True
>>> Comparator.check_similarity(co1, oc)
False
"""

from collections.abc import Iterable
from collections.abc import Sequence
from copy import deepcopy
from itertools import permutations
import math
from typing import TYPE_CHECKING

import ase
import numpy as np
from numpy.linalg import norm

from ccu.structure import fingerprint

if TYPE_CHECKING:
    from numpy.typing import NDArray


[docs] class Comparator: """An object which compares the similarity of two structures."""
[docs] @staticmethod def check_similarity( structure1: ase.Atoms, structure2: ase.Atoms, tol: float = 5e-2 ) -> bool: """Determines similarity of two structures within a given tolerance. Args: structure1: An :class:`~ase.Atoms` instance representing the first structure to compare. structure2: An :class:`~ase.Atoms` instance representing the second structure to compare. tol: A float specifying the tolerance for the average cumulative displacement for fingerprint in Angstroms. Defaults to 5e-2. The average cumulative displacement is the cumulative displacement between each set of :class:`Fingerprints <ccu.structure.fingerprint.Fingerprint` derived `structure1` and `structure2` divided by the number of atoms represented in the :class:`~ccu.structure.fingerprint.Fingerprint`. Returns: A bool indicating whether or not the two structures are similar within the specified tolerance. Note: The notion of similarity here can be summarized as: Two structures are similar if they can be superimposed via a translation operation. .. seealso:: :meth:`.Comparator.calculate_cumulative_displacement` """ if len(structure1) != len(structure2): return False fingerprints1 = fingerprint.Fingerprint.from_structure(structure1) fingerprints2 = fingerprint.Fingerprint.from_structure(structure2) fingerprints2 = list( Comparator.cosort_fingerprints(fingerprints1, fingerprints2) ) for i, fingerprint_ in enumerate(fingerprints2): disp = Comparator.calculate_cumulative_displacement( fingerprints1[i], fingerprint_ ) if disp / len(fingerprints1[i]) > tol: return False return True
[docs] @staticmethod def cosort_histograms( fingerprint1: fingerprint.Fingerprint, fingerprint2: fingerprint.Fingerprint, ) -> dict[str, np.ndarray]: """Minimizes the cumulative displacement of atoms in each fingerprint. Given the first fingerprint, this method determines the ordering of the second fingerprint's histogram which minimizes the cumulative displacement of atoms in each structure. The two supplied `Fingerprints` need not have the same keys or the same number of entries under each key. Such cases are handled as follows: Let :math:`k` be a key in both the histograms of `fingerprint1` and `fingerprint2`. Let :math:`p` be the iterable corresponding to the key :math:`k` in the histogram of `fingerprint1`, and let :math:`q` be the iterable corresponding to the key :math:`k` in the histogram of `fingerprint2`. If :math:`len(p) > len(q)`, then :math:`q` is ordered according to its match with the first :math:`len(q)` elements of :math:`p`. If :math:`len(p) <= len(q)`, then :math:`q` is ordered according to the best match with :math:`p` and the first :math:`len(p)` elements of :math:`q`. Args: fingerprint1: The :class:`ccu.structure.fingerprint.Fingerprint` object to be used as a reference for each displacement in the other `Fingerprint` object's histogram. fingerprint2: The :class:`ccu.structure.fingerprint.Fingerprint` object for which the optimally ordered histogram is to be determined. Returns: A dict constructed from `fingerprint2._histogram` mapping chemical symbols to a :class:`numpy.ndarray` containing the displacement vectors to atoms with the corresponding chemical symbol. The order of the displacement vectors is such that the cumulative displacement of the displacement vectors is minimized relative to `fingerprint1._histogram`. """ histogram = {} for element, histogram2 in fingerprint2.items(): minimal_cumulative_displacement = math.inf minimally_displaced_ordering = list(histogram2) if element not in fingerprint1: continue # This is used as the "reference" histogram from which the # displacement is to be minimized histogram1 = fingerprint1[element] perm_length = min(len(histogram1), len(histogram2)) displacements_permutations = permutations( histogram2, r=perm_length ) for displacements in displacements_permutations: cumulative_displacement = 0.0 for i, displacement in enumerate(displacements): cumulative_displacement += float( norm(histogram1[i] - displacement) ) if cumulative_displacement < minimal_cumulative_displacement: minimal_cumulative_displacement = cumulative_displacement minimally_displaced_ordering = list(displacements) missing_displacements = Comparator._missing_displacements( histogram2, minimally_displaced_ordering ) minimally_displaced_ordering.extend(missing_displacements) histogram[element] = np.vstack(minimally_displaced_ordering) return histogram
[docs] @staticmethod def _missing_displacements( all_displacements: "Iterable[NDArray]", minimally_displaced_ordering: "Iterable[NDArray]", ) -> "list[NDArray]": """Determines the displacements not in the M.D.O. Args: all_displacements: All displacements. minimally_displaced_ordering: The displacements in the minimally displaced ordering (M.D.O.) Returns: The missing displacements. """ missing_displacements = [] for displacement in all_displacements: for included_displacement in minimally_displaced_ordering: displacement_missing = True if (displacement == included_displacement).all(): displacement_missing = False break if displacement_missing: missing_displacements.append(displacement) return missing_displacements
[docs] @staticmethod def cosort_fingerprints( fingerprints1: Sequence[fingerprint.Fingerprint], fingerprints2: Sequence[fingerprint.Fingerprint], ) -> tuple[fingerprint.Fingerprint, ...]: """Determines the second fingerprints's minimally displaced ordering. The minimally displaced ordering of the second :class:`~ccu.structure.fingerprint.Fingerprint` list relative to the first is the ordering of the second supplied iterable of :class:`Fingerprints <ccu.structure.fingerprint.Fingerprint>` which minimizes the cumulative displacement across the two iterables of :class:`Fingerprints <ccu.structure.fingerprint.Fingerprint>`. Args: fingerprints1: An iterable containing :class:`~ccu.structure.fingerprint.Fingerprint` instances. fingerprints2: An iterable containing :class:`~ccu.structure.fingerprint.Fingerprint` instances. Note that the two iterables must be of the same length and that the :meth:`ccu.structure.fingerprint.Fingerprint.values` methods of all :class:`~ccu.structure.fingerprint.Fingerprint` instances across the two iterables must be of the same length. Returns: A tuple containing the ordering of `fingerprints2` which minimizes the cumulative displacement across the two iterables of :class:`Fingerprints <ccu.structure.fingerprint.Fingerprint>`. Raises: RuntimeError: Unable to find minimally displaced fingerprint. """ minimal_cumulative_displacement = math.inf fingerprints_permutations = list( permutations(fingerprints2, r=len(fingerprints2)) ) mimimally_displaced_fingerprints = None for fingerprints in fingerprints_permutations: cumulative_displacement = 0.0 for i, fingerprint_ in enumerate(fingerprints): fingerprint_.update( Comparator.cosort_histograms( fingerprints1[i], fingerprint_ ) ) displacement = Comparator.calculate_cumulative_displacement( fingerprints1[i], fingerprint_ ) cumulative_displacement += displacement if cumulative_displacement < minimal_cumulative_displacement: minimal_cumulative_displacement = cumulative_displacement mimimally_displaced_fingerprints = deepcopy(fingerprints) if mimimally_displaced_fingerprints is None: msg = "Something went wrong!" raise RuntimeError(msg) return mimimally_displaced_fingerprints
[docs] @staticmethod def calculate_cumulative_displacement( fingerprint1: fingerprint.Fingerprint, fingerprint2: fingerprint.Fingerprint, ) -> float: """Calculates the cumulative displacement for `fingerprint2`. The cumulative displacement is calculated for `fingerprint2` relative to the corresponding atomic positions in `fingerprint1`. The cumulative displacement is defined as follows: Note that each row in each :class:`numpy.ndarray` associated with each histogram key corresponds to a displacement vector between two atoms. With each such displacement vector in the histogram of `fingerprint1`, we can identify a corresponding displacement vector in the histogram of `fingerprint2` as the displacement vector associated with the same histogram key and index. We then define a difference vector as the difference between a displacement vector in `fingerprint1` and its counterpart in `fingerprint2`. The set of all difference vectors is defined on the basis of `fingerprint1`. That is, if :math:`X` is the set of all displacement vectors in `fingerprint1` and :math:`Y` is the set of all corresponding vectors in `fingerprint2`, the set of all difference vectors is the set of all vectors :math:`x - y` where :math:`x` is a displacement vector in fingerprint1 and y is the corresponding displacement vector in :math:`Y`. (Note that this requires that the histogram of `fingerprint2` must include all the keys that the histogram of `fingerprint1` includes. Additionally, this requires that for each key in the histogram of `fingerprint1`, the value in `fingerprint2` includes at least as many displacement vectors as the value in `fingerprint1`.) The cumulative displacement is then defined as the sum of the norms of all the difference vectors corresponding to `fingerprint1` and `fingerprint2`. Args: fingerprint1: The :class:`.fingerprint.Fingerprint` instance used as a reference to calculate the cumulative displacement. fingerprint2: The second :class:`.fingerprint.Fingerprint` instance used to calculate the cumulative displacement. Returns: A float representing the cumulative displacement for `fingerprint2` relative to `fingerprint1`. """ cumulative_displacement = 0.0 for element in fingerprint1: for i, displacement in enumerate(fingerprint1[element]): d = norm(displacement - fingerprint2[element][i]) cumulative_displacement += float(d) return cumulative_displacement