Skip to content

Input Output

Dealing with input

Functions

read_xyz(file, extra_cols_indexes=None)

wrapper of read_xyz_movie to read single-frame movies - mostyl for compatibility

Source code in snow/io/xyz.py
def read_xyz(file, extra_cols_indexes=None):
    """
    wrapper of read_xyz_movie to read single-frame movies - mostyl for compatibility
    """

    if extra_cols_indexes is not None:
        el, coords, extra_cols = read_xyz_movie(file, extra_cols_indexes)
        return el[0], coords[0], extra_cols[0]

    else:
        el, coords = read_xyz_movie(file)
        return el[0], coords[0]

read_xyz_movie(file_path, extra_cols_indexes=None)

Obtains the coordinates and elements for each frame of an xyz trajectory.

Parameters

file_path : str Path to the xyz file with the structure extra_frames_indexes : str index for the extra columns of per-atom data to be extracted from the .xyz file. Consider that the first three 'indexes' are element and three cartesian coordinates and are returned by deafult from the function. Example: if your .xyz file has per-atom information like " El pos1 pos2 pos3 force1 force2 force3 charge ", you can get the extra columns force1, force2, charge by passing extra_cols_indexes=[4, 5, 7]. For now only float values parsing is supported.

Returns

Tuple[np.ndarray, np.ndarray] list of lists of chemical symbols and a list of (n_atoms, 3) arrays for the coordinates

Source code in snow/io/xyz.py
def read_xyz_movie(file_path: str, extra_cols_indexes: list = None) -> Tuple[list, np.ndarray]:
    """
    Obtains the coordinates and elements for each frame of an xyz trajectory.

    Parameters
    ----------
    file_path : str
        Path to the xyz file with the structure
    extra_frames_indexes : str
        index for the extra columns of per-atom data to be extracted from the .xyz file. Consider that the first three 'indexes'
        are element and three cartesian coordinates and are returned by deafult from the function.
        Example: if your .xyz file has per-atom information like " El pos1 pos2 pos3 force1 force2 force3 charge ",
        you can get the extra columns force1, force2, charge by passing extra_cols_indexes=[4, 5, 7].
        For now only float values parsing is supported.

    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        list of lists of chemical symbols and a list of (n_atoms, 3) arrays for the coordinates
    """

    el_list = []
    coords_list = []

    if extra_cols_indexes is not None:
        n_extra_cols = len(extra_cols_indexes)
        extra_cols_list = []

    with open(file_path, "r") as file:

        conf_line_iter = 0

        for line in file:

            conf_line_iter +=1

            #beginning frame
            if conf_line_iter == 1:
                n_atoms = int(line.strip())

                elements   = []
                coords     = np.zeros((n_atoms, 3))
                if extra_cols_indexes is not None:
                    extra_cols = np.zeros((n_atoms, n_extra_cols))

            elif conf_line_iter == 2:
                #skip the comment/general information line
                pass
                #continue 

            elif conf_line_iter >2 and conf_line_iter <= n_atoms+2: 
                #read elements and positions
                parts = line.strip().split()

                if len(parts)<4:
                    print(f'warning this line should have at least 4 values in it but has {len(parts)}')

                elements.append(parts[0])

                atom_index = int(conf_line_iter-3)

                coords[ atom_index, 0] = float(parts[1])
                coords[ atom_index, 1] = float(parts[2])
                coords[ atom_index, 2] = float(parts[3])

                if extra_cols_indexes is not None:
                    for i, index in enumerate(extra_cols_indexes):
                        extra_cols[atom_index, i] = float(parts[index])

                #frame is over
                if conf_line_iter == n_atoms+2:
                    el_list.append(elements)
                    coords_list.append(coords)
                    conf_line_iter = 0
                    if extra_cols_indexes is not None:
                        extra_cols_list.append(extra_cols)

    if extra_cols_indexes is not None:
        return el_list, coords_list, extra_cols_list

    return el_list, coords_list

write_phantom_xyz(filename, coords, additional_data=None)

Writes atomic data to an XYZ file in OVITO-compatible format.

Parameters

filename: str Name of the output .xyz file. elements: ndarray List of atomic symbols (e.g., ['Au', 'Au', ...]). coords: ndarray) Nx3 array of atomic coordinates. additional_data: list or np.ndarray, optional Additional per-atom data, such as coordination numbers.

Returns:

Type Description

An xyz file containing the elements and coordinates of each atom and any additional per atom data (e.g. coordination number, agcn, strain...)

Source code in snow/io/xyz.py
def write_phantom_xyz(filename, coords, additional_data=None):
    """
    Writes atomic data to an XYZ file in OVITO-compatible format.

    Parameters
    ----------
    filename: str
        Name of the output .xyz file.
    elements: ndarray
        List of atomic symbols (e.g., ['Au', 'Au', ...]).
    coords: ndarray)
        Nx3 array of atomic coordinates.
    additional_data: list or np.ndarray, optional
        Additional per-atom data, such as coordination numbers.

    Returns:
        An xyz file containing the elements and coordinates of each atom and any additional per atom data (e.g. coordination number, agcn, strain...) 
    """
    n_atoms = len(coords)
    elements=['X'] * n_atoms

    # Check if additional_data is provided and has the correct shape
    if additional_data is not None:
        additional_data = np.array(additional_data)
        if additional_data.shape[0] != n_atoms:
            raise ValueError(f"The number of rows in additional_data ({additional_data.shape[0]}) must match the number of atoms ({n_atoms}).")

    with open(filename, 'w') as xyz_file:
        # Write header
        xyz_file.write(f"{n_atoms}\n")
        xyz_file.write("Generated XYZ file with optional properties\n")

        # Write atom data
        for i in range(n_atoms):
            atom_line = f"{elements[i]} {coords[i, 0]:.6f} {coords[i, 1]:.6f} {coords[i, 2]:.6f}"
            if additional_data is not None:

                # Add the additional per-atom data
                atom_line += ' ' + ' '.join([f"{additional_data[i, j]:.6f}" for j in range(additional_data.shape[1])])
            xyz_file.write(atom_line + "\n")

write_xyz(filename, elements, coords, additional_data=None, box=None, mode='w')

Writes atomic data to an XYZ file in OVITO-compatible format. Currently only accepting numbers as additional data.

Parameters

filename: str Name of the output .xyz file. elements: ndarray List of atomic symbols (e.g., ['Au', 'Au', ...]). coords: ndarray) Nx3 array of atomic coordinates. additional_data: list or np.ndarray, optional Additional per-atom data, such as coordination numbers. box: np.ndarray a box to be written to file mode: str mode for writing ('a'->append, 'w'->(over)write)

Returns:

Type Description

None

Source code in snow/io/xyz.py
def write_xyz(filename, elements, coords, additional_data=None, box=None, mode='w'):
    """
    Writes atomic data to an XYZ file in OVITO-compatible format. Currently only accepting numbers
    as additional data.

    Parameters
    ----------
    filename: str
        Name of the output .xyz file.
    elements: ndarray
        List of atomic symbols (e.g., ['Au', 'Au', ...]).
    coords: ndarray)
        Nx3 array of atomic coordinates.
    additional_data: list or np.ndarray, optional
        Additional per-atom data, such as coordination numbers.
    box: np.ndarray
        a box to be written to file
    mode: str
        mode for writing ('a'->append,  'w'->(over)write)

    Returns:
        None
    """

    n_atoms = len(elements)

    #some controls to cast data in the right shape (convert to shape==(n_atoms, 1) if possible)
    if type(additional_data) == np.ndarray and additional_data.shape == (n_atoms, ):
        additional_data = additional_data[:,None]
    elif type(additional_data) == list:
        additional_data = np.array(additional_data)[:,None]
    elif type(additional_data) == np.ndarray or additional_data is None:
        pass
    else:
        raise ValueError('Please provide additional data as either a list or a np.ndarray')

    # Check if additional_data is provided and has the correct shape
    if additional_data is not None:
        additional_data = np.array(additional_data)
        if additional_data.shape[0] != n_atoms:
            raise ValueError(f"The number of rows in additional_data ({additional_data.shape[0]}) must match the number of atoms ({n_atoms}).")

    with open(filename, mode) as xyz_file:
        # Write header
        xyz_file.write(f"{n_atoms}\n")

        #write general info line
        if box is not None:
            xyz_file.write('Lattice="')
            #suppose box is shape=(3,3)
            if box.shape == (3,3):
                for i in range(3):
                    for j in range(3):
                        xyz_file.write(f'{box[i,j]} ')
            elif box.shape == (3,1):
                xyz_file.write(f'{box[0,0]} 0.0 0.0 ')
                xyz_file.write(f'0.0 {box[1,0]} 0.0 ')
                xyz_file.write(f'0.0 0.0 {box[2,0]}')
            elif box.shape == (3,2):
                xyz_file.write(f'{box[0,0]} {box[0,1]} 0.0 ')
                xyz_file.write(f'{box[1,0]} {box[1,1]} 0.0 ')
                xyz_file.write(f'{box[2,0]} {box[2,1]} 0.0')  
            else:
                raise Exception('only implemented style for boxes are np.ndarrays with shape (3,3) or (3,2) or (3,1).')       
            xyz_file.write('" - ')
        xyz_file.write("Generated XYZ file with optional properties\n")

        # Write atom data
        for i in range(n_atoms):
            atom_line = f"{elements[i]} {coords[i, 0]:.6f} {coords[i, 1]:.6f} {coords[i, 2]:.6f}"
            if additional_data is not None:

                # Add the additional per-atom data
                atom_line += ' ' + ' '.join([f"{additional_data[i, j]:.6f}" for j in range(additional_data.shape[1])])
            xyz_file.write(atom_line + "\n")

write_xyz_movie(filename, elements_list, coords_list, additional_data_list=None, box_list=None)

Writes an xyz movie by reiterating the usage of write_xyz function.

Parameters

filename: str Name of the output .xyz file. elements: ndarray List of atomic symbols (e.g., ['Au', 'Au', ...]). coords: ndarray) Nx3 array of atomic coordinates. additional_data: list or np.ndarray, optional Additional per-atom data, such as coordination numbers. box: np.ndarray a box to be written to file

Returns:

Type Description

None

Source code in snow/io/xyz.py
def write_xyz_movie(filename, elements_list, coords_list, additional_data_list=None, box_list=None):
    """
    Writes an xyz movie by reiterating the usage of write_xyz function.

    Parameters
    ----------
    filename: str
        Name of the output .xyz file.
    elements: ndarray
        List of atomic symbols (e.g., ['Au', 'Au', ...]).
    coords: ndarray)
        Nx3 array of atomic coordinates.
    additional_data: list or np.ndarray, optional
        Additional per-atom data, such as coordination numbers.
    box: np.ndarray
        a box to be written to file

    Returns:
        None
    """

    if additional_data_list is None:
        additional_data_list = [None] * len(elements_list)
    if box_list is None:
        box_list = [None] * len(elements_list)

    for iframe, (els, coords, add_data, box) in enumerate( zip(elements_list, coords_list, additional_data_list, box_list) ):
        if iframe == 0:
            write_xyz(filename, els, coords, add_data, box=box, mode='w')
        else:
            write_xyz(filename, els, coords, add_data, box=box, mode='a')

write_xyz_movie_old(frame, filename, elements, coords, additional_data=None)

Writes atomic data to an XYZ file in OVITO-compatible format.

Parameters

frame: int Frame number. filename: str Name of the output .xyz file. elements: ndarray List of atomic symbols (e.g., ['Au', 'Au', ...]). coords: ndarray) Nx3 array of atomic coordinates. additional_data: list or np.ndarray, optional Additional per-atom data, such as coordination numbers.

Returns:

Type Description

An xyz file containing the elements and coordinates of each atom and any additional per atom data (e.g. coordination number, agcn, strain...)

Source code in snow/io/xyz.py
def write_xyz_movie_old(frame, filename, elements, coords, additional_data=None):
    """
    Writes atomic data to an XYZ file in OVITO-compatible format.

    Parameters
    ----------
    frame: int
        Frame number.
    filename: str
        Name of the output .xyz file.
    elements: ndarray
        List of atomic symbols (e.g., ['Au', 'Au', ...]).
    coords: ndarray)
        Nx3 array of atomic coordinates.
    additional_data: list or np.ndarray, optional
        Additional per-atom data, such as coordination numbers.

    Returns:
        An xyz file containing the elements and coordinates of each atom and any additional per atom data (e.g. coordination number, agcn, strain...)
    """

    if frame==0 and os.path.exists(filename):
            os.remove(filename)

    n_atoms = len(coords)

    # Check if additional_data is provided and has the correct shape
    if additional_data is not None:
        additional_data = np.array(additional_data)
        if additional_data.shape[0] != n_atoms:
            raise ValueError(
                f"The number of rows in additional_data ({additional_data.shape[0]}) must match the number of atoms ({n_atoms}).")

    with open(filename, 'a') as xyz_file:
        # Write header
        xyz_file.write(f"{n_atoms}\n\n")
        #xyz_file.write(f"\n{frame}\n")
        #xyz_file.write("Generated XYZ file with optional properties\n")

        # Write atom data
        for i in range(n_atoms):
            atom_line = f"{elements[i]} {coords[i, 0]:.6f} {coords[i, 1]:.6f} {coords[i, 2]:.6f}"
            if additional_data is not None:
                # Add the additional per-atom data
                atom_line += ' ' + ' '.join([f"{additional_data[i, j]:.6f}" for j in range(additional_data.shape[1])])
            xyz_file.write(atom_line + "\n")

write_xyz_movie_with_str(frame, filename, elements, coords, additional_data=None)

Writes atomic data to an XYZ file in OVITO-compatible format.

Parameters

frame: int Frame number. filename: str Name of the output .xyz file. elements: list or ndarray List of atomic symbols (e.g., ['Au', 'Au', ...]). coords: ndarray Nx3 array of atomic coordinates. additional_data: list or np.ndarray, optional Additional per-atom data, such as coordination numbers, site types, etc. Can contain both numeric and string data.

Returns

None

Source code in snow/io/xyz.py
def write_xyz_movie_with_str(frame, filename, elements, coords, additional_data=None):
    """
    Writes atomic data to an XYZ file in OVITO-compatible format.

    Parameters
    ----------
    frame: int
        Frame number.
    filename: str
        Name of the output .xyz file.
    elements: list or ndarray
        List of atomic symbols (e.g., ['Au', 'Au', ...]).
    coords: ndarray
        Nx3 array of atomic coordinates.
    additional_data: list or np.ndarray, optional
        Additional per-atom data, such as coordination numbers, site types, etc.
        Can contain both numeric and string data.

    Returns
    -------
    None
    """

    if frame == 0 and os.path.exists(filename):
        os.remove(filename)

    n_atoms = len(coords)

    # Convert additional_data to array of objects if provided
    if additional_data is not None:
        additional_data = np.array(additional_data, dtype=object)
        if additional_data.shape[0] != n_atoms:
            raise ValueError(
                f"The number of rows in additional_data ({additional_data.shape[0]}) "
                f"must match the number of atoms ({n_atoms})."
            )

    with open(filename, 'a') as xyz_file:
        # Write header
        xyz_file.write(f"{n_atoms}\n")
        xyz_file.write(f"Frame {frame}\n")

        # Write atom data
        for i in range(n_atoms):
            atom_line = f"{elements[i]} {coords[i, 0]:.6f} {coords[i, 1]:.6f} {coords[i, 2]:.6f}"
            if additional_data is not None:
                for j in range(additional_data.shape[1]):
                    val = additional_data[i, j]
                    # Numeric values formatted with 6 decimals, strings as-is
                    if isinstance(val, (int, float, np.number)):
                        atom_line += f" {val:.6f}"
                    else:
                        atom_line += f" {val}"
            xyz_file.write(atom_line + "\n")

read_lammps_data(file_path)

Read structure from a LAMMPS data file at a certain path

Parameters

file_path : str Path to the lammps data file

Returns

Tuple[np.ndarray, np.ndarray] Elements and coordinates array of the system

Source code in snow/io/lammps.py
def read_lammps_data(file_path: str) -> Tuple[np.ndarray, np.ndarray]:
    """Read structure from a LAMMPS data file at a certain path

    Parameters
    ----------
    file_path : str
        Path to the lammps data file

    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        Elements and coordinates array of the system
    """
    coordinates = []
    elements = []
    atoms_section = False

    with open(file_path, 'r') as f:
        for line in f:
            line = line.strip()

            # Detect "Atoms" section
            if line.startswith("Atoms"):
                atoms_section = True
                continue

            # Process lines within the Atoms section
            if atoms_section:
                if not line or line.startswith("#"):
                    continue  # Skip empty or comment lines

                # Split line and check the format
                parts = line.split()
                if len(parts) < 5:
                    continue  # Skip invalid lines

                try:
                    atom_type = int(parts[1])
                    x, y, z = map(float, parts[2:5])
                except ValueError:
                    print(f"Skipping invalid line: {line}")
                    continue

                coordinates.append([x, y, z])
                elements.append(atom_type)

            # End of Atoms section
            if atoms_section and not line:
                break

    # Convert to numpy array for easier manipulation
    coordinates = np.array(coordinates)

    return elements, coordinates

read_order_lammps_dump(filename, id_index=0, type_index=1, coords_indexes=[2, 3, 4], scaled_coords=True)

Extract a movie ( Tuple[np.ndarray, np.ndarray] ) from a lammps dump file. Atoms are not written in a consistent

order in dump files, so you generally need to reorder them. You can choose the columns to get the information about id, type, and coords from the lammps dump file. Default is to 'atomic' style, which has the shape 'id type xs ys zs'.

Parameters

filename : str filename for the lammps-dump file to extract atoms from. id_index: int index of the column that contains ids of your atoms in the lammps dump - default to 0 type_index: int index of the column that contains the type of atoms in the dump (in lammps these are mapped to numbers) coords_indexes: list of ints list of indexes of the columns that contain the positions of your atoms - default to [1,2,3] scaled_coords: bool bool to check if coordinates are scaled (written in terms of the box sizes length). Default to True, which is lammps' default. Probably this can be dealt with automatically by checking if all positions are between 0 and 1, but not super general and robust

Returns

Tuple[np.ndarray, np.ndarray] species ids and positions from the lammps dump with consistent ordering of atoms. Here, pos[i] is a Nx3 array with positions of the i-th frame and so on.

Source code in snow/io/lammps.py
def read_order_lammps_dump(filename, id_index: int = 0, type_index: int = 1, coords_indexes: list = [2,3,4], scaled_coords=True):
    """
    Extract a movie ( Tuple[np.ndarray, np.ndarray] ) from a lammps dump file. Atoms are not written in a consistent \n
    order in dump files, so you generally need to reorder them. 
    You can choose the columns to get the information about id, type, and coords from the lammps dump file. Default is 
    to 'atomic' style, which has the shape 'id type xs ys zs'.

    Parameters
    ----------
    filename : str
        filename for the lammps-dump file to extract atoms from.
    id_index: int
        index of the column that contains ids of your atoms in the lammps dump -  default to 0
    type_index: int
        index of the column that contains the type of atoms in the dump (in lammps these are mapped to numbers)
    coords_indexes: list of ints
        list of indexes of the columns that contain the positions of your atoms - default to [1,2,3]
    scaled_coords: bool
        bool to check if coordinates are scaled (written in terms of the box sizes length). Default to True, which is 
        lammps' default. Probably this can be dealt with automatically by checking if all positions are between 0 and 1,
        but not super general and robust

    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        species ids and positions from the lammps dump with consistent ordering of atoms. Here, pos[i] is a Nx3 array with positions of the i-th frame and so on.
    """

    try:
        with open(filename, 'r') as f:
            lines = f.readlines()
    except Exception as e:
        print(f"Error opening file: {e}")
        return []

    reading_atoms = False
    movie         = []
    species       = []

    for i, line in enumerate(lines):

        if line.startswith("ITEM: BOX BOUNDS"):
        #read box size - atoms positions are written in units of box size

            parts_1 = lines[i+1].split()
            parts_2 = lines[i+2].split()
            parts_3 = lines[i+3].split()

            xbox = float(parts_1[1]) - float(parts_1[0])
            ybox = float(parts_2[1]) - float(parts_2[0])
            zbox = float(parts_3[1]) - float(parts_3[0])

            continue

        elif line.startswith("ITEM: ATOMS"):
        #atoms coordinates start in the next line

            reading_atoms = True
            curr_ids = []
            curr_species = []
            curr_frame = []

            continue

        elif line.startswith("ITEM:") and reading_atoms: 
        #this frame is over

            reading_atoms = False
            #reorder and save atoms
            curr_frame = np.array(curr_frame)
            curr_species = np.array(curr_species)
            order = np.argsort(curr_ids)
            curr_frame = curr_frame[order]
            curr_species = curr_species[order]

            movie.append(curr_frame)
            species.append(curr_species)

            continue

        elif reading_atoms:
        #read atomic coordinates

            parts = line.split()
            try:
                curr_ids.append(int(parts[id_index]) - 1) #lammps has 1-based ids
                if scaled_coords:
                    curr_frame.append([float(parts[coords_indexes[0]])*xbox, float(parts[coords_indexes[1]])*ybox, float(parts[coords_indexes[2]])*zbox])
                else:
                    curr_frame.append([float(parts[coords_indexes[0]]), float(parts[coords_indexes[1]]), float(parts[coords_indexes[2]])])
                curr_species.append(int(parts[type_index]))
            except (ValueError, IndexError) as e:
                raise ValueError(
                    f"Malformed atom line at line {i}: {line.strip()}"
                ) from e

    #save last frame
    if reading_atoms:
        curr_frame = np.array(curr_frame)
        curr_species = np.array(curr_species)

        order = np.argsort(curr_ids)
        curr_frame = curr_frame[order]
        curr_species = curr_species[order]

        movie.append(curr_frame)
        species.append(curr_species)


    return species, movie    

read_lammps_data(file_path)

Read structure from a LAMMPS data file at a certain path

Parameters

file_path : str Path to the lammps data file

Returns

Tuple[np.ndarray, np.ndarray] Elements and coordinates array of the system

Source code in snow/io/lammps.py
def read_lammps_data(file_path: str) -> Tuple[np.ndarray, np.ndarray]:
    """Read structure from a LAMMPS data file at a certain path

    Parameters
    ----------
    file_path : str
        Path to the lammps data file

    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        Elements and coordinates array of the system
    """
    coordinates = []
    elements = []
    atoms_section = False

    with open(file_path, 'r') as f:
        for line in f:
            line = line.strip()

            # Detect "Atoms" section
            if line.startswith("Atoms"):
                atoms_section = True
                continue

            # Process lines within the Atoms section
            if atoms_section:
                if not line or line.startswith("#"):
                    continue  # Skip empty or comment lines

                # Split line and check the format
                parts = line.split()
                if len(parts) < 5:
                    continue  # Skip invalid lines

                try:
                    atom_type = int(parts[1])
                    x, y, z = map(float, parts[2:5])
                except ValueError:
                    print(f"Skipping invalid line: {line}")
                    continue

                coordinates.append([x, y, z])
                elements.append(atom_type)

            # End of Atoms section
            if atoms_section and not line:
                break

    # Convert to numpy array for easier manipulation
    coordinates = np.array(coordinates)

    return elements, coordinates

read_order_lammps_dump(filename, id_index=0, type_index=1, coords_indexes=[2, 3, 4], scaled_coords=True)

Extract a movie ( Tuple[np.ndarray, np.ndarray] ) from a lammps dump file. Atoms are not written in a consistent

order in dump files, so you generally need to reorder them. You can choose the columns to get the information about id, type, and coords from the lammps dump file. Default is to 'atomic' style, which has the shape 'id type xs ys zs'.

Parameters

filename : str filename for the lammps-dump file to extract atoms from. id_index: int index of the column that contains ids of your atoms in the lammps dump - default to 0 type_index: int index of the column that contains the type of atoms in the dump (in lammps these are mapped to numbers) coords_indexes: list of ints list of indexes of the columns that contain the positions of your atoms - default to [1,2,3] scaled_coords: bool bool to check if coordinates are scaled (written in terms of the box sizes length). Default to True, which is lammps' default. Probably this can be dealt with automatically by checking if all positions are between 0 and 1, but not super general and robust

Returns

Tuple[np.ndarray, np.ndarray] species ids and positions from the lammps dump with consistent ordering of atoms. Here, pos[i] is a Nx3 array with positions of the i-th frame and so on.

Source code in snow/io/lammps.py
def read_order_lammps_dump(filename, id_index: int = 0, type_index: int = 1, coords_indexes: list = [2,3,4], scaled_coords=True):
    """
    Extract a movie ( Tuple[np.ndarray, np.ndarray] ) from a lammps dump file. Atoms are not written in a consistent \n
    order in dump files, so you generally need to reorder them. 
    You can choose the columns to get the information about id, type, and coords from the lammps dump file. Default is 
    to 'atomic' style, which has the shape 'id type xs ys zs'.

    Parameters
    ----------
    filename : str
        filename for the lammps-dump file to extract atoms from.
    id_index: int
        index of the column that contains ids of your atoms in the lammps dump -  default to 0
    type_index: int
        index of the column that contains the type of atoms in the dump (in lammps these are mapped to numbers)
    coords_indexes: list of ints
        list of indexes of the columns that contain the positions of your atoms - default to [1,2,3]
    scaled_coords: bool
        bool to check if coordinates are scaled (written in terms of the box sizes length). Default to True, which is 
        lammps' default. Probably this can be dealt with automatically by checking if all positions are between 0 and 1,
        but not super general and robust

    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        species ids and positions from the lammps dump with consistent ordering of atoms. Here, pos[i] is a Nx3 array with positions of the i-th frame and so on.
    """

    try:
        with open(filename, 'r') as f:
            lines = f.readlines()
    except Exception as e:
        print(f"Error opening file: {e}")
        return []

    reading_atoms = False
    movie         = []
    species       = []

    for i, line in enumerate(lines):

        if line.startswith("ITEM: BOX BOUNDS"):
        #read box size - atoms positions are written in units of box size

            parts_1 = lines[i+1].split()
            parts_2 = lines[i+2].split()
            parts_3 = lines[i+3].split()

            xbox = float(parts_1[1]) - float(parts_1[0])
            ybox = float(parts_2[1]) - float(parts_2[0])
            zbox = float(parts_3[1]) - float(parts_3[0])

            continue

        elif line.startswith("ITEM: ATOMS"):
        #atoms coordinates start in the next line

            reading_atoms = True
            curr_ids = []
            curr_species = []
            curr_frame = []

            continue

        elif line.startswith("ITEM:") and reading_atoms: 
        #this frame is over

            reading_atoms = False
            #reorder and save atoms
            curr_frame = np.array(curr_frame)
            curr_species = np.array(curr_species)
            order = np.argsort(curr_ids)
            curr_frame = curr_frame[order]
            curr_species = curr_species[order]

            movie.append(curr_frame)
            species.append(curr_species)

            continue

        elif reading_atoms:
        #read atomic coordinates

            parts = line.split()
            try:
                curr_ids.append(int(parts[id_index]) - 1) #lammps has 1-based ids
                if scaled_coords:
                    curr_frame.append([float(parts[coords_indexes[0]])*xbox, float(parts[coords_indexes[1]])*ybox, float(parts[coords_indexes[2]])*zbox])
                else:
                    curr_frame.append([float(parts[coords_indexes[0]]), float(parts[coords_indexes[1]]), float(parts[coords_indexes[2]])])
                curr_species.append(int(parts[type_index]))
            except (ValueError, IndexError) as e:
                raise ValueError(
                    f"Malformed atom line at line {i}: {line.strip()}"
                ) from e

    #save last frame
    if reading_atoms:
        curr_frame = np.array(curr_frame)
        curr_species = np.array(curr_species)

        order = np.argsort(curr_ids)
        curr_frame = curr_frame[order]
        curr_species = curr_species[order]

        movie.append(curr_frame)
        species.append(curr_species)


    return species, movie    

read_xyz(file, extra_cols_indexes=None)

wrapper of read_xyz_movie to read single-frame movies - mostyl for compatibility

Source code in snow/io/xyz.py
def read_xyz(file, extra_cols_indexes=None):
    """
    wrapper of read_xyz_movie to read single-frame movies - mostyl for compatibility
    """

    if extra_cols_indexes is not None:
        el, coords, extra_cols = read_xyz_movie(file, extra_cols_indexes)
        return el[0], coords[0], extra_cols[0]

    else:
        el, coords = read_xyz_movie(file)
        return el[0], coords[0]

read_xyz_movie(file_path, extra_cols_indexes=None)

Obtains the coordinates and elements for each frame of an xyz trajectory.

Parameters

file_path : str Path to the xyz file with the structure extra_frames_indexes : str index for the extra columns of per-atom data to be extracted from the .xyz file. Consider that the first three 'indexes' are element and three cartesian coordinates and are returned by deafult from the function. Example: if your .xyz file has per-atom information like " El pos1 pos2 pos3 force1 force2 force3 charge ", you can get the extra columns force1, force2, charge by passing extra_cols_indexes=[4, 5, 7]. For now only float values parsing is supported.

Returns

Tuple[np.ndarray, np.ndarray] list of lists of chemical symbols and a list of (n_atoms, 3) arrays for the coordinates

Source code in snow/io/xyz.py
def read_xyz_movie(file_path: str, extra_cols_indexes: list = None) -> Tuple[list, np.ndarray]:
    """
    Obtains the coordinates and elements for each frame of an xyz trajectory.

    Parameters
    ----------
    file_path : str
        Path to the xyz file with the structure
    extra_frames_indexes : str
        index for the extra columns of per-atom data to be extracted from the .xyz file. Consider that the first three 'indexes'
        are element and three cartesian coordinates and are returned by deafult from the function.
        Example: if your .xyz file has per-atom information like " El pos1 pos2 pos3 force1 force2 force3 charge ",
        you can get the extra columns force1, force2, charge by passing extra_cols_indexes=[4, 5, 7].
        For now only float values parsing is supported.

    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        list of lists of chemical symbols and a list of (n_atoms, 3) arrays for the coordinates
    """

    el_list = []
    coords_list = []

    if extra_cols_indexes is not None:
        n_extra_cols = len(extra_cols_indexes)
        extra_cols_list = []

    with open(file_path, "r") as file:

        conf_line_iter = 0

        for line in file:

            conf_line_iter +=1

            #beginning frame
            if conf_line_iter == 1:
                n_atoms = int(line.strip())

                elements   = []
                coords     = np.zeros((n_atoms, 3))
                if extra_cols_indexes is not None:
                    extra_cols = np.zeros((n_atoms, n_extra_cols))

            elif conf_line_iter == 2:
                #skip the comment/general information line
                pass
                #continue 

            elif conf_line_iter >2 and conf_line_iter <= n_atoms+2: 
                #read elements and positions
                parts = line.strip().split()

                if len(parts)<4:
                    print(f'warning this line should have at least 4 values in it but has {len(parts)}')

                elements.append(parts[0])

                atom_index = int(conf_line_iter-3)

                coords[ atom_index, 0] = float(parts[1])
                coords[ atom_index, 1] = float(parts[2])
                coords[ atom_index, 2] = float(parts[3])

                if extra_cols_indexes is not None:
                    for i, index in enumerate(extra_cols_indexes):
                        extra_cols[atom_index, i] = float(parts[index])

                #frame is over
                if conf_line_iter == n_atoms+2:
                    el_list.append(elements)
                    coords_list.append(coords)
                    conf_line_iter = 0
                    if extra_cols_indexes is not None:
                        extra_cols_list.append(extra_cols)

    if extra_cols_indexes is not None:
        return el_list, coords_list, extra_cols_list

    return el_list, coords_list

write_xyz(filename, elements, coords, additional_data=None, box=None, mode='w')

Writes atomic data to an XYZ file in OVITO-compatible format. Currently only accepting numbers as additional data.

Parameters

filename: str Name of the output .xyz file. elements: ndarray List of atomic symbols (e.g., ['Au', 'Au', ...]). coords: ndarray) Nx3 array of atomic coordinates. additional_data: list or np.ndarray, optional Additional per-atom data, such as coordination numbers. box: np.ndarray a box to be written to file mode: str mode for writing ('a'->append, 'w'->(over)write)

Returns:

Type Description

None

Source code in snow/io/xyz.py
def write_xyz(filename, elements, coords, additional_data=None, box=None, mode='w'):
    """
    Writes atomic data to an XYZ file in OVITO-compatible format. Currently only accepting numbers
    as additional data.

    Parameters
    ----------
    filename: str
        Name of the output .xyz file.
    elements: ndarray
        List of atomic symbols (e.g., ['Au', 'Au', ...]).
    coords: ndarray)
        Nx3 array of atomic coordinates.
    additional_data: list or np.ndarray, optional
        Additional per-atom data, such as coordination numbers.
    box: np.ndarray
        a box to be written to file
    mode: str
        mode for writing ('a'->append,  'w'->(over)write)

    Returns:
        None
    """

    n_atoms = len(elements)

    #some controls to cast data in the right shape (convert to shape==(n_atoms, 1) if possible)
    if type(additional_data) == np.ndarray and additional_data.shape == (n_atoms, ):
        additional_data = additional_data[:,None]
    elif type(additional_data) == list:
        additional_data = np.array(additional_data)[:,None]
    elif type(additional_data) == np.ndarray or additional_data is None:
        pass
    else:
        raise ValueError('Please provide additional data as either a list or a np.ndarray')

    # Check if additional_data is provided and has the correct shape
    if additional_data is not None:
        additional_data = np.array(additional_data)
        if additional_data.shape[0] != n_atoms:
            raise ValueError(f"The number of rows in additional_data ({additional_data.shape[0]}) must match the number of atoms ({n_atoms}).")

    with open(filename, mode) as xyz_file:
        # Write header
        xyz_file.write(f"{n_atoms}\n")

        #write general info line
        if box is not None:
            xyz_file.write('Lattice="')
            #suppose box is shape=(3,3)
            if box.shape == (3,3):
                for i in range(3):
                    for j in range(3):
                        xyz_file.write(f'{box[i,j]} ')
            elif box.shape == (3,1):
                xyz_file.write(f'{box[0,0]} 0.0 0.0 ')
                xyz_file.write(f'0.0 {box[1,0]} 0.0 ')
                xyz_file.write(f'0.0 0.0 {box[2,0]}')
            elif box.shape == (3,2):
                xyz_file.write(f'{box[0,0]} {box[0,1]} 0.0 ')
                xyz_file.write(f'{box[1,0]} {box[1,1]} 0.0 ')
                xyz_file.write(f'{box[2,0]} {box[2,1]} 0.0')  
            else:
                raise Exception('only implemented style for boxes are np.ndarrays with shape (3,3) or (3,2) or (3,1).')       
            xyz_file.write('" - ')
        xyz_file.write("Generated XYZ file with optional properties\n")

        # Write atom data
        for i in range(n_atoms):
            atom_line = f"{elements[i]} {coords[i, 0]:.6f} {coords[i, 1]:.6f} {coords[i, 2]:.6f}"
            if additional_data is not None:

                # Add the additional per-atom data
                atom_line += ' ' + ' '.join([f"{additional_data[i, j]:.6f}" for j in range(additional_data.shape[1])])
            xyz_file.write(atom_line + "\n")

write_xyz_movie(filename, elements_list, coords_list, additional_data_list=None, box_list=None)

Writes an xyz movie by reiterating the usage of write_xyz function.

Parameters

filename: str Name of the output .xyz file. elements: ndarray List of atomic symbols (e.g., ['Au', 'Au', ...]). coords: ndarray) Nx3 array of atomic coordinates. additional_data: list or np.ndarray, optional Additional per-atom data, such as coordination numbers. box: np.ndarray a box to be written to file

Returns:

Type Description

None

Source code in snow/io/xyz.py
def write_xyz_movie(filename, elements_list, coords_list, additional_data_list=None, box_list=None):
    """
    Writes an xyz movie by reiterating the usage of write_xyz function.

    Parameters
    ----------
    filename: str
        Name of the output .xyz file.
    elements: ndarray
        List of atomic symbols (e.g., ['Au', 'Au', ...]).
    coords: ndarray)
        Nx3 array of atomic coordinates.
    additional_data: list or np.ndarray, optional
        Additional per-atom data, such as coordination numbers.
    box: np.ndarray
        a box to be written to file

    Returns:
        None
    """

    if additional_data_list is None:
        additional_data_list = [None] * len(elements_list)
    if box_list is None:
        box_list = [None] * len(elements_list)

    for iframe, (els, coords, add_data, box) in enumerate( zip(elements_list, coords_list, additional_data_list, box_list) ):
        if iframe == 0:
            write_xyz(filename, els, coords, add_data, box=box, mode='w')
        else:
            write_xyz(filename, els, coords, add_data, box=box, mode='a')