Source code for ccu.structure.comparator

"""This module defines the Comparator class.

The Comparator class can be used to determine teh similarity of two structures
as follows:

>>> import ase
>>> from ccu.structure.comparator import Comparator
>>> co1 = ase.Atoms('CO', positions=[[0, 0, 0], [1, 0, 0]])
>>> co2 = ase.Atoms('CO', positions=[[0, 1, 1], [1, 1, 1]])
>>> oc = ase.Atoms('OC', positions=[[0, 0, 0], [1, 0, 0]])
>>> Comparator.check_similarity(co1, co2)
True
>>> Comparator.check_similarity(co1, oc)
False
"""

from collections.abc import Iterable
from copy import deepcopy
from itertools import permutations
import math

import ase
import numpy as np
from numpy.linalg import norm

from ccu.structure import fingerprint


[docs] class Comparator: """An object which compares the similarity of two structures."""
[docs] @staticmethod def check_similarity( structure1: ase.Atoms, structure2: ase.Atoms, tol: float = 5e-2 ) -> bool: """Determines whether the atomic positions of two structures are similar to within a given tolerance. Args: structure1: An ase.Atoms instance representing the first structure to compare. structure2: An ase.Atoms instance representing the second structure to compare. tol: A float specifying the tolerance for the cumulative displacement for fingerprint in Angstroms. Defaults to 5e-2. Returns: A boolean indicating whether or not the two structures are similar within the specified tolerance. Two structures are similar if they can be superimposed via a translation operation. """ if len(structure1) != len(structure2): return False fingerprints1 = fingerprint.Fingerprint.from_structure(structure1) fingerprints2 = fingerprint.Fingerprint.from_structure(structure2) fingerprints2 = Comparator.cosort_fingerprints( fingerprints1, fingerprints2 ) for i, fingerprint_ in enumerate(fingerprints2): if ( Comparator.calculate_cumulative_displacement( fingerprints1[i], fingerprint_ ) > tol ): return False return True
[docs] @staticmethod def cosort_histograms( fingerprint1: fingerprint.Fingerprint, fingerprint2: fingerprint.Fingerprint, ) -> dict[str, np.ndarray]: """Determines the ordering of the second fingerprint's histogram which minimizes the cumulative displacement of the atoms in each structure. The two supplied Fingerprints need not have the same keys or the same number of entries under each key. Such cases are handled as follows: Let k be a key in both the histograms of fingerprint1 and fingerprint2. Let p be the iterable corresponding to the key k in the histogram of fingerprint1, and let q be the iterable corresponding to the key k in the histogram of fingerprint2. If len(p) > len(q), then q is ordered according to its match with the first len(q) elements of p. If len(p) <= len(q), then q is ordered according to the best match with p and the first len(p) elements of q. Args: fingerprint1: The Fingerprint object to be used as a reference for each displacement in the other Fingerprint's histogram. fingerprint2: The Fingerprint object for which the optimally ordered histogram is to be determined. Returns: A dict constructed from fingerprint2._histogram mapping chemical symbols to a numpy.ndarray containing the displacement vectors to atoms with the corresponding chemical symbol. The order of the displacement vectors is such that the cumulative displacement of the displacement vectors is minimized relative to fingerprint1._histogram. """ histogram = {} for element in fingerprint2: minimal_cumulative_displacement = math.inf minimally_displaced_ordering = fingerprint2[element] if element not in fingerprint1: continue reference_displacements = fingerprint1[element] perm_length = min( len(fingerprint1[element]), len(fingerprint2[element]) ) displacements_permutations = permutations( fingerprint2[element], r=perm_length ) for displacements in displacements_permutations: cumulative_displacement = 0 for i, displacement in enumerate(displacements): cumulative_displacement += norm( reference_displacements[i] - displacement ) if cumulative_displacement < minimal_cumulative_displacement: minimal_cumulative_displacement = cumulative_displacement minimally_displaced_ordering = list(displacements) missing_displacements = Comparator._missing_displacements( fingerprint2[element], minimally_displaced_ordering ) minimally_displaced_ordering.extend(missing_displacements) histogram[element] = np.vstack(minimally_displaced_ordering) return histogram
@staticmethod def _missing_displacements( all_displacements: Iterable[np.array], minimally_displaced_ordering: Iterable[np.array], ) -> list[np.array]: missing_displacements = [] for displacement in all_displacements: for included_displacement in minimally_displaced_ordering: displacement_missing = True if (displacement == included_displacement).all(): displacement_missing = False break if displacement_missing: missing_displacements.append(displacement) return missing_displacements
[docs] @staticmethod def cosort_fingerprints( fingerprints1: Iterable[fingerprint.Fingerprint], fingerprints2: Iterable[fingerprint.Fingerprint], ) -> tuple[fingerprint.Fingerprint]: """Determines the ordering of the second supplied iterable of Fingerprints which minimizes the cumulative displacement across the two iterables of Fingerprints. Args: fingerprints1: An iterable containing Fingerprint instances. fingerprints2: An iterable containing Fingerprint instances. Note that the two iterables must be of the same length and that the values() methods of all Fingerprint instances across the two iterables must be of the same length. Returns: A tuple containing the ordering of fingerprints2 which minimizes the cumulative displacement across the two iterables of Fingerprints. """ minimal_cumulative_displacement = math.inf fingerprints_permutations = permutations( fingerprints2, r=len(fingerprints2) ) mimimally_displaced_fingerprints = None for fingerprints in fingerprints_permutations: cumulative_displacement = 0 for i, fingerprint_ in enumerate(fingerprints): fingerprint_.update( Comparator.cosort_histograms( fingerprints1[i], fingerprint_ ) ) displacement = Comparator.calculate_cumulative_displacement( fingerprints1[i], fingerprint_ ) cumulative_displacement += displacement if cumulative_displacement < minimal_cumulative_displacement: minimal_cumulative_displacement = cumulative_displacement mimimally_displaced_fingerprints = deepcopy(fingerprints) return mimimally_displaced_fingerprints
[docs] @staticmethod def calculate_cumulative_displacement( fingerprint1: fingerprint.Fingerprint, fingerprint2: fingerprint.Fingerprint, ) -> float: """Calculates the cumulative displacement of each atomic position in fingerprint2 relative to the corresponding atomic position in fingerprint1. The cumulative displacement is defined as follows: Note that each row in each np.ndarray associated with each histogram key corresponds to a displacement vector between two atoms. With each such displacement vector in the histogram of fingerprint1, we can identify a corresponding displacement vector in the histogram of fingerprint2 as the displacement vector associated with the same histogram key and index. We then define a difference vector as the difference between a displacement vector in fingerprint1 and its counterpart in fingerprint2. The set of all difference vectors is defined on the basis of fingerprint1. That is, if X is the set of all displacement vectors in fingerprint1 and Y is the set of all corresponding vectors in fingerprint2, the set of all difference vectors is the set of all vectors x - y where x is a displacement vector in fingerprint1 and y is the corresponding displacement vector in Y. (Note that this requires that the histogram of fingerprint2 must include all the keys that that of the histogram of fingerprint1 includes. Additionally, this requires that for each key in the histogram of fingerprint1, the value in fingerprint2 includes at least as many displacement vectors as the value in fingerprint1.) The cumulative displacement is then defined as the sum of the norms of all the difference vectors corresponding to fingerprint1 and fingerprint2. Args: fingerprint1: The Fingerprint instance used as a reference to calculate the cumulative displacement. fingerprint2: The second Fingerprint instance used to calculate the cumulative displacement. Returns: A float representing the cumulative displacement for fingerprint2 relative to fingerprint1. """ cumulative_displacement = 0 for element in fingerprint1: for i, displacement in enumerate(fingerprint1[element]): displacement = norm(displacement - fingerprint2[element][i]) cumulative_displacement += displacement return cumulative_displacement