Common Neighbour Analysis

pySNOW contains functions that allow users to compute CNA signature for each pair of nearest neighbours as well as identify to which signature each atom in the system partecipates to. The latter is especially useful in cathegorizing the atom based on its belonging to a particular geometrical structure, extending what done in Ovito, where atoms are only catheorized based on the local cristalline structure, allowing to identify atoms belonging to a facet, an edge or a vertex.

The CNA signature is a triplet of numbers (r, s, t) for each pair of neighbours where:

r: number of common neighbours of the pair
s: number of bonds between the r common neighbours
t: the longest path that can be formed by joining the r neighbours that are neighbours of each other

Functions

The CNA calculator containes a series of functions for the comutation of the CAN signatures and patterns:

Calculate CNA Fast

Faster version of calculate_cna that precomputes neighbor sets.

Parameters

_description_

coords : ndarray 3xNatoms array containing the coordinates of each atom cut_off : float cutoff radius for the determination of nearest neighbors return_pair : bool, optional Whether to return an ordered list of the indices of the atoms forming a given pair, by default False pbc : bool whether to use pbc or not box : ndarray if you want pbc, you need to provide the simulation box. display_progress: bool Wheter to display a progress bar - needs the tqdm library

Returns

tuple[int, np.ndarray, list] The number of pairs, the cna signatures [r, s, t] for each pair and the ordered list of pairs (if return_pair == True) tuple[int, np.ndarray] The number of pairs, the cna signatures [r, s, t] for each pair

Source code in snow/descriptors/cna.py

def calculate_cna_fast(
    coords, cut_off=None, return_pair=False, pbc=False, box=None, display_progress=False
):
    """
    Faster version of calculate_cna that precomputes neighbor sets.

    Parameters
    ----------

        _description_
    coords : ndarray
        3xNatoms array containing the coordinates of each atom
    cut_off : float
        cutoff radius for the determination of nearest neighbors
    return_pair : bool, optional
        Whether to return an ordered list of the indices of the atoms forming a given pair, by default False
    pbc : bool
        whether to use pbc or not
    box : ndarray
        if you want pbc, you need to provide the simulation box.
    display_progress: bool
        Wheter to display a progress bar - needs the tqdm library

    Returns
    -------
    tuple[int, np.ndarray, list]
        The number of pairs, the cna signatures [r, s, t] for each pair and the ordered list of pairs (if return_pair == True)
    tuple[int, np.ndarray]
        The number of pairs, the cna signatures [r, s, t] for each pair
    """
    # Get neighbor list and pair list (assumed to be implemented efficiently)

    if cut_off == None:
        r_i = np.zeros(len(coords))

    neigh_list = nearest_neighbours(
        coords=coords, cut_off=cut_off, pbc=pbc, box=box
    )
    pairs = pair_list(
        coords=coords, cut_off=cut_off, pbc=pbc, box=box
    )

    # Precompute neighbor sets for fast membership tests
    neigh_sets = [set(neigh) for neigh in neigh_list]

    # Initialize result arrays
    r = np.empty(len(pairs), dtype=int)
    s = np.empty(len(pairs), dtype=float)
    t = np.empty(len(pairs), dtype=float)
    ret_pair = [] if return_pair else None

    iterator = enumerate(tqdm(pairs, desc="Processing pairs")) if display_progress \
            else enumerate(pairs)
    for i, p in iterator:
        # Get neighbor sets for the two atoms in the pair
        set1 = neigh_sets[p[0]]
        set2 = neigh_sets[p[1]]
        # Compute common neighbors using set intersection
        common = set1 & set2
        r[i] = len(common)

        # For s, sum the number of common neighbors between each neighbor in 'common'
        # and the set 'common' itself, then divide by 2.
        s_val = sum(len(neigh_sets[j] & common) for j in common) / 2
        s[i] = s_val

        # Compute t using the existing function.
        # If longest_path_or_cycle expects a numpy array, we convert common accordingly.
        t[i] = longest_path_or_cycle(np.array(list(common)), neigh_list)

        if return_pair:
            ret_pair.append(p)

    cna = np.column_stack((r, s, t))
    if return_pair:
        return len(pairs), cna, ret_pair
    return len(pairs), cna

Write CNA to file

Source code in snow/descriptors/cna.py

def write_cna(
    frame,
    len_pair,
    cna,
    pair_list,
    file_path=None,
    signature=True,
    cna_unique=True,
):

    if frame == 0 and os.path.exists(file_path + "signatures.csv"):
        os.remove(file_path + "signatures.csv")

    if frame == 0 and os.path.exists(file_path + "cna_unique.csv"):
        os.remove(file_path + "cna_unique.csv")

    perc = 100 * np.unique(cna, axis=0, return_counts=True)[1] / len_pair

    if signature == True:

        with open(file_path + "signatures.csv", "a") as f:
            f.write(f"\n{frame}\n")

            for i, p in enumerate(pair_list):
                f.write(f"{p[0]}, {p[1]}, {cna[i]}\n")

    if cna_unique == True:
        with open(file_path + "cna_unique.csv", "a") as f:
            f.write(f"\n{frame}\n")

            for i, p in enumerate(perc):
                f.write(
                    #f"{np.unique(cna, axis=0, return_counts=True)[0][i]}, {np.unique(cna, axis=0, return_counts=True)[1][i]},{p}\n"
                    f"{p}, {np.unique(cna, axis=0, return_counts=True)[0][i]},\n" #percentage and signature
                )

CNA Patterns per atoms

Optimized per-atom CNA calculation by precomputing a mapping from atom indices to pair indices. This avoids scanning the entire pair list for every atom.

Parameters

_description_

coords : np.ndarray Array containing the coordinates of each atom. cut_off : float Cutoff radius for nearest-neighbor determination. pbc : bool Whether to use or not periodic boundary conditions box : np.ndarray Simulation box. Only needed if you enable PBC

Returns

list of tuple[np.ndarray, np.ndarray] For each atom, a tuple (unique_signatures, counts) representing the unique CNA signatures from all pairs involving that atom and their respective counts.

Source code in snow/descriptors/cna.py

def cna_peratom(
    coords: np.ndarray,
    cut_off: float = None,
    pbc: bool = False,
    box: np.ndarray = None
):
    """
    Optimized per-atom CNA calculation by precomputing a mapping from atom indices
    to pair indices. This avoids scanning the entire pair list for every atom.

    Parameters
    ----------
        _description_
    coords : np.ndarray
        Array containing the coordinates of each atom.
    cut_off : float
        Cutoff radius for nearest-neighbor determination.
    pbc : bool
        Whether to use or not periodic boundary conditions
    box : np.ndarray
        Simulation box. Only needed if you enable PBC

    Returns
    -------
    list of tuple[np.ndarray, np.ndarray]
        For each atom, a tuple (unique_signatures, counts) representing the unique
        CNA signatures from all pairs involving that atom and their respective counts.
    """
    # Compute CNA signatures and the corresponding pair list

    _, cna, pair_list = calculate_cna_fast(
        coords=coords,
        cut_off=cut_off,
        return_pair=True,
        pbc=pbc,
        box = box
    )
    num_atoms = len(coords)

    # Precompute a mapping from each atom to the indices of pairs that involve it.
    atom_to_pair = [[] for _ in range(num_atoms)]
    for idx, (i, j) in enumerate(pair_list):
        atom_to_pair[i].append(idx)
        atom_to_pair[j].append(idx)

    # Build the per-atom CNA information using the precomputed mapping.
    cna_atom = []
    for i in range(num_atoms):
        pair_indices = atom_to_pair[i]
        if pair_indices:
            # Use NumPy advanced indexing to quickly select the relevant CNA rows.
            cna_i = cna[pair_indices]
            unique_signatures, counts = np.unique(
                cna_i, axis=0, return_counts=True
            )
            cna_atom.append((unique_signatures, counts))
        else:
            # If no pairs include atom i, return empty arrays.
            cna_atom.append((np.array([]), np.array([])))
    return cna_atom

CNAp Index per atom

Usage:

cnap_peratom(index_frame: int, coords: np.ndarray, cut_off: float = None, pbc: bool = False) -> np.ndarray

Computes the CNA patterns per atom and assigns an integer structure ID (see README for mapping).

Parameters

coords : np.ndarray (N, 3) array with atomic coordinates cut_off : float Cutoff radius for neighbor determination pbc : bool Whether to use or not periodic boundary conditions box : np.ndarray Simulation box. Only needed if you enable PBC display_progress: bool Wheter to display a progress bar

Returns

np.ndarray Array of integer structure IDs per atom

Source code in snow/descriptors/cna.py

def cnap_peratom(
    coords: np.ndarray,
    cut_off: float = None,
    pbc: bool = False,
    box: np.ndarray = None,
    display_progress: bool = False,) -> np.ndarray:
    """
    Computes the CNA patterns per atom and assigns an integer structure ID
    (see README for mapping).

    Parameters
    ----------
    coords : np.ndarray
        (N, 3) array with atomic coordinates
    cut_off : float
        Cutoff radius for neighbor determination
    pbc : bool
        Whether to use or not periodic boundary conditions
    box : np.ndarray
        Simulation box. Only needed if you enable PBC
    display_progress: bool
        Wheter to display a progress bar

    Returns
    -------
    np.ndarray
        Array of integer structure IDs per atom
    """
    # Compute CNA info
    cna = cna_peratom(coords, cut_off, pbc=pbc, box=box)
    n_atoms = len(coords)
    cna_atom = np.zeros(n_atoms, dtype=int)

    # --- Define pattern rules as a lookup table ---
    # Each rule is a tuple (required signatures, required counts) -> assigned ID
    PATTERNS = [
        # n_sigs == 1
        ((([5, 5, 5],), (12,)), 5),
        (([[4, 2, 1]], (12,)), 4),
        # n_sigs == 2
        (([[4, 2, 2], [5, 5, 5]], (10, 2)), 3),
        (([[4, 2, 1], [3, 1, 1]], (3, 6)), 15),
        (([[2, 1, 1], [4, 2, 1]], (4, 1)), 11),
        (([[2, 1, 1], [4, 2, 1]], (4, 4)), 12),
        (([[3, 2, 2], [5, 5, 5]], (5, 1)), 14),
        (([[4, 2, 1], [4, 2, 2]], (6, 6)), 16),
        # n_sigs == 3
        (([[1, 0, 0], [2, 1, 1], [4, 2, 2]], (2, 2, 2)), 6),
        (([[2, 0, 0], [3, 1, 1], [4, 2, 1]], (2, 4, 1)), 8),
        (([[2, 1, 1], [3, 1, 1], [4, 2, 1]], (3, 2, 2)), 10),
        (([[3, 1, 1], [3, 2, 2], [4, 2, 2]], (4, 2, 2)), 13),
        (([[2, 1, 1], [3, 1, 1], [4, 2, 1]], (1, 4, 5)), 17),
        # n_sigs == 4
        (([[1, 0, 0], [2, 1, 1], [3, 2, 2], [4, 2, 2]], (1, 2, 1, 1)), 1),
        (([[2, 0, 0], [2, 1, 1], [3, 1, 1], [4, 2, 1]], (1, 2, 2, 1)), 2),
        (([[3, 0, 0], [3, 1, 1], [4, 2, 1], [4, 2, 2]], (2, 4, 2, 2)), 9),
        (([[4, 2, 1], [4, 2, 2], [4, 3, 3], [5, 4, 4]], (4, 4, 2, 2)), 18),
        # n_sigs == 5
        (
            (
                [[2, 0, 0], [3, 0, 0], [3, 1, 1], [3, 2, 2], [4, 2, 2]],
                (2, 1, 2, 1, 1),
            ),
            7,
        ),
    ]

    def match_pattern(sigs, counts):
        """Try to match CNA signatures/counts to a known structure pattern."""
        for (req_sigs, req_counts), struct_id in PATTERNS:
            if len(req_sigs) != len(sigs):
                continue
            # Convert both to sets of tuples for order-insensitive comparison
            sig_dict = {tuple(sig): cnt for sig, cnt in zip(sigs, counts)}
            if all(
                tuple(rs) in sig_dict and sig_dict[tuple(rs)] == rc
                for rs, rc in zip(req_sigs, req_counts)
            ):
                return struct_id
        return 0  # default (unidentified)

    # --- Process atoms ---
    iterator = tqdm(range(n_atoms), desc="Processing CNA patterns") if display_progress \
            else range(n_atoms)
    for i in iterator:
        sigs = np.array(cna[i][0])
        counts = np.array(cna[i][1]).flatten()

        if len(sigs) == 0:
            continue

        cna_atom[i] = match_pattern(sigs, counts)

    return cna_atom