Source code for pycofbuilder.io_tools

# -*- coding: utf-8 -*-
# Created by Felipe Lopes de Oliveira
# Distributed under the terms of the MIT License.

"""
This module contains tools for input and output file manipulation used by pyCOFBuilder.
"""

import json
import os
import warnings
from datetime import date

import gemmi
import numpy as np
from ase.io import read
from pymatgen.io.cif import CifParser

from pycofbuilder.cjson import ChemJSON
from pycofbuilder.tools import (cell_to_cellpar, cell_to_ibrav,
                                cellpar_to_cell, elements_dict,
                                get_cartesian_to_fractional_matrix,
                                get_fractional_to_cartesian_matrix, get_kgrid,
                                smiles_to_xsmiles)


[docs] def read_xyz(path: str, file_name: str, extxyz=False) -> tuple: """ Reads a file in format `.xyz` from the given `path` and returns a list containg the N atom labels and a Nx3 array contaning the atoms coordinates. Parameters ---------- path : str Path to the file. file_name : str Name of the `xyz` file. Does not neet to contain the `.xyz` extention. extxyz : bool If True, the function will consider the extended xyz file format and use ase library to read the file. Returns ------- atonTypes : list List of strings containing containg the N atom labels. cartPos : numpy array Nx3 array contaning the atoms coordinates cellMatrix : numpy array 3x3 array contaning the cell vectors. """ # Remove the extention if exists file_name = file_name.split(".")[0] # Check if the file exists if not os.path.exists(os.path.join(path, file_name + ".xyz")): raise FileNotFoundError(f"File {file_name} not found!") if extxyz: atoms = read(os.path.join(path, file_name + ".xyz")) atomTypes = atoms.get_chemical_symbols() # type: ignore cartPos = atoms.get_positions() # type: ignore cellMatrix = atoms.get_cell() # type: ignore else: temp_file = open(os.path.join(path, file_name + ".xyz"), "r").readlines() atoms = [i.split() for i in temp_file[2:]] atomTypes = [i[0] for i in atoms if len(i) > 1] cartPos = np.array( [[float(i[1]), float(i[2]), float(i[3])] for i in atoms if len(i) > 1] ) cellMatrix = np.zeros((3, 3)) return atomTypes, cartPos, cellMatrix
[docs] def read_pdb(path, file_name): """ Reads a file in format `.pdb` from the `path` given and returns a list containg the N atom labels and a Nx3 array contaning the atoms coordinates. Parameters ---------- path : str Path to the file. file_name : str Name of the `pdb` file. Does not neet to contain the `.pdb` extention. Returns ------- atonTypes : list List of strings containing containg the N atom labels. cartPos : numpy array Nx3 array contaning the atoms coordinates cellMatrix : numpy array 3x3 array contaning the cell vectors. """ # Remove the extention if exists file_name = file_name.split(".")[0] if not os.path.exists(os.path.join(path, file_name + ".pdb")): raise FileNotFoundError(f"File {file_name} not found!") temp_file = open(os.path.join(path, file_name + ".pdb"), "r").read().splitlines() has_cell = any(["CRYST1" in i for i in temp_file]) if has_cell: cellParameters = np.array( [i.split()[1:] for i in temp_file if "CRYST1" in i][0] ).astype(float) cellMatrix = cellpar_to_cell(cellParameters) else: cellParameters = np.zeros(6) cellMatrix = np.zeros((3, 3)) if any(["ATOM" in i for i in temp_file]): atomTypes = [i.split()[2] for i in temp_file if "ATOM" in i] cartPos = np.array([i.split()[5:8] for i in temp_file if "ATOM" in i]).astype( float ) else: atomTypes = [i.split()[-1] for i in temp_file if "HETATM" in i] cartPos = np.array([i.split()[3:6] for i in temp_file if "HETATM" in i]).astype( float ) return atomTypes, cartPos, cellMatrix
[docs] def read_gjf(path, file_name) -> tuple: """ Reads a file in format `.gjf` from the `path` given and returns a list containg the N atom labels and a Nx3 array contaning the atoms coordinates. Parameters ---------- path : str Path to the file. file_name : str Name of the `gjf` file. Does not neet to contain the `.gjf` extention. Returns ------- atonTypes : list List of strings containing containg the N atom labels. cartPos : numpy array Nx3 array contaning the atoms coordinates cellMatrix : numpy array 3x3 array contaning the cell vectors. """ # Remove the extention if exists file_name = file_name.split(".")[0] if not os.path.exists(os.path.join(path, file_name + ".gjf")): raise FileNotFoundError(f"File {file_name} not found!") temp_file = open(os.path.join(path, file_name + ".gjf"), "r").readlines() temp_file = [i.split() for i in temp_file if i != "\n"] atoms = [i for i in temp_file if i[0] in elements_dict()] atonTypes = [i[0] for i in atoms] cartPos = np.array([[float(i[1]), float(i[2]), float(i[3])] for i in atoms]) cellMatrix = [i for i in temp_file if "Tv" in i] if cellMatrix: cellMatrix = np.array([i[1:] for i in cellMatrix]).astype(float) else: cellMatrix = np.zeros((3, 3)) return atonTypes, cartPos, cellMatrix
[docs] def read_cif(path, file_name, useASE=False, usePymatgen=False): """ Reads a file in format `.cif` from the `path` given and returns a list containg the N atom labels and a Nx3 array contaning the atoms coordinates. Parameters ---------- path : str Path to the file. file_name : str Name of the `cif` file. Does not neet to contain the `.cif` extention. useASE : bool If True, the function will use ASE library to read the file. usePymatgen : bool If True, the function will use Pymatgen library to read the file. Returns ------- atomTypes : list List of strings containing containg the N atom labels. cartPos : numpy array Nx3 array contaning the atoms coordinates cellMatrix : numpy array 3x3 array contaning the cell vectors. partialCharges : list List of floats containing the N atom partial charges. """ # Remove the extention if exists file_name = file_name.split(".")[0] if not os.path.exists(os.path.join(path, file_name + ".cif")): raise FileNotFoundError(f"File {file_name} not found!") # Check if the cif files is in P1 symmetry if not useASE and not usePymatgen: cif = gemmi.cif.read_file(os.path.join(path, file_name + ".cif")).sole_block() # Check if the cif is not with P1 symmetry symm_elements = 0 for loop_name in [ "_symmetry_equiv_pos_as_xyz", "_space_group_symop_operation_xyz", ]: if len(cif.find_loop(loop_name)) != 0: symm_elements = len(cif.find_loop(loop_name)) if symm_elements == 0 or symm_elements > 1: warnings.warn( "The CIF file is not in P1 symmetry. The structure will be read using pyMatGen." ) usePymatgen = True if useASE: atoms = read(os.path.join(path, file_name + ".cif")) atomTypes = atoms.get_chemical_symbols() # type: ignore cartPos = atoms.get_positions() # type: ignore cellMatrix = np.array(atoms.get_cell()) # type: ignore partialCharges = atoms.get_initial_charges() # type: ignore elif usePymatgen: structure = CifParser(os.path.join(path, file_name + ".cif")).get_structures( primitive=False )[0] atomTypes = [str(i) for i in structure.species] cartPos = structure.cart_coords cellMatrix = structure.lattice.matrix partialCharges = [0 for i in range(len(atomTypes))] else: cif = gemmi.cif.read_file(os.path.join(path, file_name + ".cif")).sole_block() a = float(cif.find_value("_cell_length_a").split("(")[0]) b = float(cif.find_value("_cell_length_b").split("(")[0]) c = float(cif.find_value("_cell_length_c").split("(")[0]) beta = float(cif.find_value("_cell_angle_beta").split("(")[0]) gamma = float(cif.find_value("_cell_angle_gamma").split("(")[0]) alpha = float(cif.find_value("_cell_angle_alpha").split("(")[0]) cellMatrix = cellpar_to_cell([a, b, c, alpha, beta, gamma]) atomTypes = list(cif.find_values("_atom_site_type_symbol")) if len(atomTypes) == 0: atomTypes = list(cif.find_values("_atom_site_label")) atomTypes = [i.rstrip("0123456789") for i in atomTypes] atom_site_fract_x = np.array(cif.find_values("_atom_site_fract_x")).astype( float ) atom_site_fract_y = np.array(cif.find_values("_atom_site_fract_y")).astype( float ) atom_site_fract_z = np.array(cif.find_values("_atom_site_fract_z")).astype( float ) cartPos = np.array( [ np.dot(cellMatrix, [i, j, k]) for i, j, k in zip( atom_site_fract_x, atom_site_fract_y, atom_site_fract_z ) ] ) partialCharges = np.array(cif.find_values("_atom_site_charge")).astype(float) return atomTypes, cartPos, cellMatrix, partialCharges
[docs] def save_csv( path: str, file_name: str, data: list, delimiter: str = ",", head: list = [] ) -> None: """ Saves a file in format `.csv`. Parameters ---------- path : str Path to the file. file_name : str Name of the `csv` file. Does not neet to contain the `.csv` extention. data : list Data to be saved. delimiter: str Delimiter of the columns. `,` is the default. head : list Names of the columns. """ # Remove the extention if exists file_name = file_name.split(".")[0] file_name = os.path.join(path, file_name + ".csv") content = [] if len(head) > 0: content.append(delimiter.join(head)) for i in data: content.append(delimiter.join([str(j) for j in i])) with open(file_name, "w") as f: f.write("\n".join(content))
[docs] def save_xsf( path: str, file_name: str, atomTypes: list, atomPos: list, cellMatrix: list, frac_coords=False, **kwargs, ): """ Save a file in format `.xsf` on the `path`. Parameters ---------- path : str Path to the save the file. file_name : str Name of the file. Does not neet to contain the extention. atomTypes : list List of strings containing containg the N atom types atomPos : list Nx3 array contaning the atoms coordinates. cellMatrix : numpy array Can be a 3x3 array contaning the cell vectors or a list with the 6 cell parameters. frac_coords : bool If True, the coordinates are in fractional coordinates. """ file_name = file_name.split(".")[0] if len(cellMatrix) == 6: cellMatrix = cellpar_to_cell(cellMatrix) # type: ignore if frac_coords: # Convert to fractional coordinates frac_matrix = get_fractional_to_cartesian_matrix(*cell_to_cellpar(cellMatrix)) atomPos = [np.dot(frac_matrix, [i[0], i[1], i[2]]) for i in atomPos] xsf_file = [] xsf_file.append("CRYSTAL") xsf_file.append("PRIMVEC") for i in range(len(cellMatrix)): xsf_file.append( f" {cellMatrix[i][0]:>15.9f} {cellMatrix[i][1]:>15.9f} {cellMatrix[i][2]:>15.9f}" ) xsf_file.append(" PRIMCOORD") xsf_file.append(f" {len(atomPos)} 1") for i in range(len(atomPos)): xsf_file.append( "{:3s} {:>15.9f} {:>15.9f} {:>15.9f}".format( atomTypes[i], atomPos[i][0], atomPos[i][1], atomPos[i][2] ) ) with open(os.path.join(path, file_name + ".xsf"), "w") as f: f.write("\n".join(xsf_file))
[docs] def save_pqr( path: str, file_name: str, atomTypes: list, atomPos: list, cell: list = [], partialCharges: list = [], frac_coords=False, atomLabels: list = [], bonds: list = [], bond_orders: list = [], ) -> None: """ Save a file in format `.pqr` on the `path`. Parameters ---------- path : str Path to the save the file. file_name : str Name of the file. Does not neet to contain the extention. atomTypes : list List of strings containing containg the N atom types atomPos : list Nx3 array contaning the atoms coordinates. cell : list, optional Can be a 3x3 array contaning the cell vectors or a list with the 6 cell parameters. partialCharges : list, optional List of strings containing containg the N atom partial charges. frac_coords : bool, optional If True, the coordinates are in fractional coordinates and will be converted to cartesian. Default is False. atomLabels : list, optional List of strings containing containg the N atom labels. bonds : list, optional List of lists containing the index of the bonded atoms and the bond length. bond_orders : list, optional List of integers containing the bond orders. """ file_name = file_name.split(".")[0] if len(cell) == 3: cell = cell_to_cellpar(cell) # type: ignore if frac_coords: # Convert to fractional coordinates frac_matrix = get_fractional_to_cartesian_matrix(*cell) atomPos = [np.dot(frac_matrix, [i[0], i[1], i[2]]) for i in atomPos] pqr_file = [] pqr_file.append(f"TITLE {file_name}") pqr_file.append("REMARK 4") pqr_file.append(f"REMARK Created by pyCOFBuilder on {date.today()}") if len(cell) > 0: pqr_file.append( "CRYST1{:>9.3f}{:>9.3f}{:>9.3f}{:>7.2f}{:>7.2f}{:>7.2f} P1".format(*cell) ) if len(partialCharges) == 0: partialCharges = [0 for i in range(len(atomTypes))] if len(atomLabels) == 0: atomLabels = atomTypes atom_line = ( "ATOM {:>4} {:>2} MOL A 0 {:>8.3f}{:>8.3f}{:>8.3f}{:>8.5f} {:>15}" ) for i in range(len(atomPos)): pqr_file.append( atom_line.format( i + 1, atomTypes[i], atomPos[i][0], atomPos[i][1], atomPos[i][2], partialCharges[i], atomLabels[i], ) ) if bonds and not bond_orders: bond_orders = [1 for i in range(len(bonds))] if bonds: for i in range(len(bonds)): for j in range(bond_orders[i]): pqr_file.append(f"CONECT {bonds[i][0] + 1:4} {bonds[i][1] + 1:4}") with open(os.path.join(path, file_name + ".pqr"), "w") as f: f.write("\n".join(pqr_file))
[docs] def save_pdb( path: str, file_name: str, atomTypes: list, atomPos: list, cell: list = [], frac_coords=False, atomLabels: list = [], bonds: list = [], bond_orders: list = [], **kwargs, ) -> None: """ Save a file in format `.pdb` on the `path`. path : str Path to the save the file. file_name : str Name of the file. Does not neet to contain the extention. atomTypes : list List of strings containing containg the N atom types atomPos : list Nx3 array contaning the atoms coordinates. cell : list, optional Can be a 3x3 array contaning the cell vectors or a list with the 6 cell parameters. frac_coords : bool, optional If True, the coordinates are in fractional coordinates and will be converted to cartesian. Default is False. atomLabels : list, optional List of strings containing containg the N atom labels. bonds : list, optional List of lists containing the index of the bonded atoms and the bond length. bond_orders : list, optional List of integers containing the bond orders. """ file_name = file_name.split(".")[0] if len(cell) == 3: cell = cell_to_cellpar(cell) # type: ignore if frac_coords: # Convert to fractional coordinates frac_matrix = get_fractional_to_cartesian_matrix(*cell) atomPos = [np.dot(frac_matrix, [i[0], i[1], i[2]]) for i in atomPos] pdb_file = [] pdb_file.append(f"TITLE {file_name}") pdb_file.append(f"REMARK Created by pyCOFBuilder on {date.today()}") if len(cell) > 0: pdb_file.append( "CRYST1{:>9.3f}{:>9.3f}{:>9.3f}{:>7.2f}{:>7.2f}{:>7.2f} P1".format(*cell) ) atom_line = ( "ATOM {:>4} {:>2} MOL {:>13.3f}{:>8.3f}{:>8.3f} 1.00 0.00 {:>11}" ) for i in range(len(atomPos)): pdb_file.append( atom_line.format( i + 1, atomTypes[i], atomPos[i][0], atomPos[i][1], atomPos[i][2], atomLabels[i], ) ) if bonds and not bond_orders: bond_orders = [1 for i in range(len(bonds))] if bonds: for i in range(len(bonds)): for j in range(bond_orders[i]): pdb_file.append(f"CONECT {bonds[i][0] + 1:4} {bonds[i][1] + 1:4}") with open(os.path.join(path, file_name + ".pdb"), "w") as f: f.write("\n".join(pdb_file))
[docs] def save_gjf( path: str, file_name: str, atomTypes: list, atomPos: list, cell: list = [], frac_coords=False, **kwargs, ) -> None: """ Save a file in format `.gjf` on the `path`. Parameters ---------- path : str Path to the save the file. file_name : str Name of the file. Does not neet to contain the extention. atomTypes : list List of strings containing containg the N atom types atomPos : list Nx3 array contaning the atoms coordinates. cell : list, optional Can be a 3x3 array contaning the cell vectors or a list with the 6 cell parameters. frac_coords : bool, optional If True, the coordinates are in fractional coordinates and will be converted to cartesian. Default is False. """ file_name = file_name.split(".")[0] if len(cell) == 6: cell = cellpar_to_cell(cell) # type: ignore if frac_coords: # Convert to fractional coordinates frac_matrix = get_fractional_to_cartesian_matrix(*cell_to_cellpar(cell)) atomPos = [np.dot(frac_matrix, [i[0], i[1], i[2]]) for i in atomPos] gjf_file = [] gjf_file.append("opt pm6") gjf_file.append("") gjf_file.append(file_name) gjf_file.append("") gjf_file.append("0 1") for i in range(len(atomPos)): gjf_file.append( "{:<5s}{:>15.7f}{:>15.7f}{:>15.7f}\n".format( atomTypes[i], atomPos[i][0], atomPos[i][1], atomPos[i][2] ) ) if len(cell) > 0: for i in cell: gjf_file.append("Tv {:>15.7f}{:>15.7f}{:>15.7f}\n".format(*i)) gjf_file.append("") gjf_file.append("") with open(os.path.join(path, file_name + ".gjf"), "w") as f: f.write("\n".join(gjf_file))
[docs] def save_xyz( path: str, file_name: str, atomTypes: list, atomPos: list, cell: list = [], partialCharges: list = [], frac_coords=False, **kwargs, ) -> None: """ Save a file in format `.pqr` on the `path`. Parameters ---------- path : str Path to the save the file. file_name : str Name of the file. Does not neet to contain the extention. atomTypes : list List of strings containing containg the N atom types atomPos : list Nx3 array contaning the atoms coordinates. cell : list, optional Can be a 3x3 array contaning the cell vectors or a list with the 6 cell parameters. partialCharges : list, optional List of strings containing containg the N atom partial charges. frac_coords : bool, optional If True, the coordinates are in fractional coordinates and will be converted to cartesian. Default is False. """ file_name = file_name.split(".")[0] if len(cell) == 3: cell = cell_to_cellpar(cell) # type: ignore if frac_coords: # Convert to fractional coordinates frac_matrix = get_fractional_to_cartesian_matrix(*cell) atomPos = [np.dot(frac_matrix, [i[0], i[1], i[2]]) for i in atomPos] if len(partialCharges) == 0: partialCharges = [0.0 for i in range(len(atomTypes))] xyz_file = [] xyz_file.append(f"{len(atomTypes)}") header = "" if len(cell) == 6: header += 'Lattice="{:>15.7f} {:>15.7f} {:>15.7f} {:>15.7f} {:>15.7f} {:>15.7f}" pbc="T T T"'.format( *cell ) xyz_file.append(header + " species:S:1:pos:R:3:charge:R:1") for i in range(len(atomTypes)): xyz_file.append( "{:<5s}{:>15.7f}{:>15.7f}{:>15.7f}{:>15.7f}".format( atomTypes[i], atomPos[i][0], atomPos[i][1], atomPos[i][2], partialCharges[i], ) ) with open(os.path.join(path, file_name + ".xyz"), "w") as f: f.write("\n".join(xyz_file))
[docs] def save_turbomole( path: str, file_name: str, atomTypes: list, atomPos: list, cell: list = [], frac_coords=False, **kwargs, ) -> None: """ Save the structure in Turbomole .coord format on the `path`. Parameters ---------- path : str Path to the save the file. file_name : str Name of the file. Does not neet to contain the extention. atomTypes : list List of strings containing containg the N atom types atomPos : list Nx3 array contaning the atoms coordinates. cell : list, optional Can be a 3x3 array contaning the cell vectors or a list with the 6 cell parameters. frac_coords : bool, optional If True, the coordinates are in fractional coordinates and will be converted to cartesian. Default is False. """ file_name = file_name.split(".")[0] if np.array(cell).shape == (3, 3): cell = cell_to_cellpar(cell) # type: ignore if frac_coords: # Convert to fractional coordinates frac_matrix = get_fractional_to_cartesian_matrix(*cell) atomPos = [np.dot(frac_matrix, [i[0], i[1], i[2]]) for i in atomPos] temp_file = ["$coord angs"] for i in range(len(atomTypes)): temp_file.append( "{:>15.7f}{:>15.7f}{:>15.7f} {:<5s}".format( atomPos[i][0], atomPos[i][1], atomPos[i][2], atomTypes[i] ) ) temp_file.append("$periodic 3") temp_file.append("$cell") temp_file.append("{} {} {} {} {} {}".format(*cell)) temp_file.append("$opt") temp_file.append(" engine=inertial") temp_file.append("$end") with open(os.path.join(path, file_name + ".coord"), "w") as f: f.write("\n".join(temp_file))
[docs] def save_vasp( path: str, file_name: str, atomTypes: list, atomPos: list, cell: list = [], frac_coords=False, **kwargs, ) -> None: """ Save the structure in VASP .vasp format on the `path`. Parameters ---------- path : str Path to the save the file. file_name : str Name of the file. Does not neet to contain the extention. atomTypes : list List of strings containing containg the N atom types atomPos : list Nx3 array contaning the atoms coordinates. cell : list, optional Can be a 3x3 array contaning the cell vectors or a list with the 6 cell parameters. frac_coords : bool, optional If True, the coordinates are in fractional coordinates and will be converted to cartesian. Default is False. """ file_name = file_name.split(".")[0] if np.array(cell).shape == 6: cell = cellpar_to_cell(cell) # type: ignore unique_atoms = [] for i in atomTypes: if i not in unique_atoms: unique_atoms.append(i) composition_dict = {i: atomTypes.count(i) for i in unique_atoms} temp_file = [] temp_file.append(file_name) temp_file.append("1.0") for i in range(3): temp_file.append("{:>15.7f}{:>15.7f}{:>15.7f}".format(*cell[i])) temp_file.append(" ".join(composition_dict.keys())) temp_file.append(" ".join([str(i) for i in composition_dict.values()])) if frac_coords: temp_file.append("Direct") else: temp_file.append("Cartesian") for i in range(len(atomTypes)): temp_file.append( "{:>15.7f}{:>15.7f}{:>15.7f} {:<5s}".format( atomPos[i][0], atomPos[i][1], atomPos[i][2], atomPos[i] ) ) with open(os.path.join(path, file_name + ".vasp"), "w") as f: f.write("\n".join(temp_file))
[docs] def save_qe( path: str, file_name: str, cell: list, atomTypes: list, atomLabels: list, atomPos: list, frac_coords=False, calc_type: str = "scf", kspacing: float = 0.3, **kwargs, ) -> None: """ Save the structure in Quantum Espresso .pwscf format. The `input_dict` can be used to specify the input parameters for the QuantumESPRESSO calculation. This dictionary must contain the keys: `control`, `system`, `electrons`, and `ions`. Each of these keys must contain a dictionary with the corresponding input parameters. This dictionary can contain the kpoints item, with the kpoints grid as a list of 3 integers. Additionally, it can contain the kspacing item, with the kpoints spacing as a float. In this case the kpoints grid will be calculated automatically. By default, the kspacing is set to 0.3. Parameters ---------- path : str Path to the save the file. file_name : str Name of the file. Does not neet to contain the extention. cell : numpy array Can be a 3x3 array contaning the cell vectors or a list with the 6 cell parameters. atomTypes : list List of strings containing containg the N atom types atomLabels : list List of strings containing containg the N atom labels atomPos : list Nx3 array contaning the atoms coordinates. frac_coords : bool If True, the coordinates are in fractional coordinates. calc_type : str Type of calculation. Can be 'scf', 'vc-relax', 'relax', 'md', 'vc-md', 'vc-tddft', 'tddft'. kspacing : float Kpoints spacing in 1/Angstrom. """ if len(cell) == 6: cell_matrix = cellpar_to_cell(cell) else: cell_matrix = cell ibrav_dict = cell_to_ibrav(cell_matrix) input_dict = {} input_dict["control"] = { "prefix": f"'{file_name}'", "calculation": f"{calc_type}", "restart_mode": "'from_scratch'", "wf_collect": ".true.", "pseudo_dir": "'$PSEUDO_DIR'", "outdir": "'$SCRATCH_DIR'", "verbosity": "'high'", "tstress": ".true.", "tprnfor": ".true.", "etot_conv_thr": "1.0d-5", "forc_conv_thr": "1.0d-6", "nstep": 1000, } input_dict["system"] = { "nat": len(atomTypes), "ntyp": len(set(atomTypes)), "ecutwfc": 40, "ecutrho": 360, "vdw_corr": "'grimme-d3'", "occupations": "'smearing'", **ibrav_dict, } input_dict["electrons"] = { "conv_thr": 1.0e-9, "electron_maxstep": 100, "mixing_beta": 0.3, } if calc_type == "vc-relax": input_dict["ions"] = {"ion_dynamics": "'bfgs'"} input_dict["cell"] = {"cell_dynamics": "'bfgs'", "cell_dofree": "'all'"} # If the kpoints grid is not specified, calculate it automatically if "k_points" not in input_dict.keys(): if "kspacing" not in input_dict.keys(): input_dict["kspacing"] = kspacing input_dict["kpoints"] = get_kgrid(cell_matrix, input_dict["kspacing"]) with open(os.path.join(path, file_name + ".pwscf"), "w") as f: f.write("&CONTROL\n") for key in input_dict["control"]: f.write(f" {key} = {input_dict['control'][key]}\n") f.write("/\n\n") f.write("&SYSTEM\n") for key in input_dict["system"]: f.write(f" {key} = {input_dict['system'][key]}\n") f.write("/\n\n") f.write("&ELECTRONS\n") for key in input_dict["electrons"]: f.write(f" {key} = {input_dict['electrons'][key]}\n") f.write("/\n\n") if calc_type == "vc-relax": f.write("&IONS\n") for key in input_dict["ions"]: f.write(f" {key} = {input_dict['ions'][key]}\n") f.write("/\n\n") f.write("&CELL\n") for key in input_dict["cell"]: f.write(f" {key} = {input_dict['cell'][key]}\n") f.write("/\n\n") f.write("ATOMIC_SPECIES\n") for atom in set(atomTypes): f.write(f" {atom} {elements_dict()[atom]:>9.5f} {atom}.PSEUDO.UPF\n") f.write("\n") # f.write('CELL_PARAMETERS (angstrom) \n') # for v in cell_matrix: # f.write(f'{v[0]:>15.9f} {v[1]:>15.9f} {v[2]:>15.9f}\n') # f.write('\n') if frac_coords: coords_type = "crystal" else: coords_type = "angstrom" f.write(f"ATOMIC_POSITIONS ({coords_type})\n") for i, atom in enumerate(atomPos): f.write( "{:<5s}{:>15.9f}{:>15.9f}{:>15.9f} ! {:5}\n".format( atomTypes[i], atom[0], atom[1], atom[2], atomLabels[i] ) ) f.write("\n") f.write("K_POINTS automatic\n") f.write(" {} {} {} 1 1 1\n".format(*input_dict["kpoints"]))
[docs] def save_cif( path: str, file_name: str, atomTypes: list, atomPos: list, cell: list = [], atomLabels: list = [], partialCharges: list[float] = [], bonds: list = [], bondTypes: list = [], frac_coords=False, renormalize_charges=False, **kwargs, ) -> None: """ Save a file in format `.cif` on the `path`. Parameters ---------- path : str Path to the save the file. file_name : str Name of the file. Does not neet to contain the extention. atomTypes : list List of strings containing containg the N atom types atomPos : list Nx3 array contaning the atoms coordinates. cell : numpy array Can be a 3x3 array contaning the cell vectors or a list with the 6 cell parameters. atomLabels : list List of strings containing containg the N atom labels partialCharges : list List of strings containing containg the N atom partial charges. bonds : list List of lists containing the index of the bonded atoms and the bond length. bondTypes : list List of strings containing the bond types. Can be singe (S), double (D), triple (T), or aromatic (A). frac_coords : bool If True, the coordinates are in fractional coordinates. renormalize_charges : bool If True, the charges will be renormalized to the total charge of the system. """ file_name = file_name.split(".")[0] if len(cell) == 3: a, b, c, alpha, beta, gamma = cell_to_cellpar(cell) if len(cell) == 6: a, b, c, alpha, beta, gamma = cell if renormalize_charges and partialCharges: partialCharges = np.array(partialCharges) - np.mean(partialCharges) # type: ignore if not partialCharges: partialCharges = [0 for i in range(len(atomTypes))] if not atomLabels: atomLabels = [f"{atomTypes[i]}{i+1}" for i in range(len(atomTypes))] cif_text = f"""\ data_{file_name} _audit_creation_date {date.today().strftime("%Y-%d-%m")} _audit_creation_method pyCOFBuilder _chemical_name_common '{file_name}' _cell_length_a {a:>10.6f} _cell_length_b {b:>10.6f} _cell_length_c {c:>10.6f} _cell_angle_alpha {alpha:>6.2f} _cell_angle_beta {beta:>6.2f} _cell_angle_gamma {gamma:>6.2f} _space_group_name_H-M_alt 'P 1' _space_group_IT_number 1 loop_ _symmetry_equiv_pos_as_xyz 'x, y, z' loop_ _atom_site_label _atom_site_type_symbol _atom_site_fract_x _atom_site_fract_y _atom_site_fract_z _atom_site_charge """ if not frac_coords: # Convert to fractional coordinates frac_matrix = get_cartesian_to_fractional_matrix(a, b, c, alpha, beta, gamma) atomPos = [np.dot(frac_matrix, [i[0], i[1], i[2]]) for i in atomPos] for i, pos in enumerate(atomPos): u, v, w = pos[0], pos[1], pos[2] cif_text += "{:<15} {} {:>15.9f} {:>15.9f} {:>15.9f} {:>10.5f}\n".format( atomLabels[i], atomTypes[i], u, v, w, partialCharges[i] ) if bonds: cif_text += "\nloop_\n" cif_text += "_geom_bond_atom_site_label_1\n" cif_text += "_geom_bond_atom_site_label_2\n" cif_text += "_geom_bond_distance\n" cif_text += "_geom_bond_site_symmetry_2\n" cif_text += "_ccdc_geom_bond_type\n" if not bondTypes: bondTypes = ["S" for i in range(len(bonds))] for i, bond in enumerate(bonds): cif_text += f"{atomLabels[bond[0]]:10} {atomLabels[bond[1]]:10} {bond[2]:.5f} . {bondTypes[i]}\n" with open(os.path.join(path, file_name + ".cif"), "w") as f: f.write(cif_text)
[docs] def save_chemjson( path: str, file_name: str, cell: list, atomTypes: list, atomLabels: list, atomPos: list, partialCharges: list = [], bonds: list = [], frac_coords=False, ): """ Save a file in format `.json` on the `path`. Parameters ---------- path : str Path to the save the file. file_name : str Name of the file. Does not neet to contain the extention. cell : numpy array Can be a 3x3 array contaning the cell vectors or a list with the 6 cell parameters. atomTypes : list List of strings containing containg the N atom types atomLabels : list List of strings containing containg the N atom labels atomPos : list Nx3 array contaning the atoms coordinates. partialCharges : list List of floats containing containg the N atom partial charges. bonds : list List of lists containing the index of the bonded atoms and the bond length. frac_coords : bool If True, the coordinates are in fractional coordinates. """ file_name = file_name.split(".")[0] data = ChemJSON() if len(cell) == 6: data.set_cell_parameters(cell) elif len(cell) == 3: data.set_cell_matrix(cell) else: raise ValueError("Cell parameters not provided.") data.set_atomic_types(atomTypes) if frac_coords: data.set_fractional_positions(atomPos) else: data.set_cartesian_positions(atomPos) if partialCharges: pass # TODO: Add partial charges if atomLabels: pass # TODO: Add atom labels if bonds: pass # TODO: Add bonds data.write_cjson(path, file_name + ".cjson")
[docs] def generate_mol_dict(path, file_name, name, code, smiles): xsmiles, xsmiles_label, composition = smiles_to_xsmiles(smiles) if file_name.endswith("gjf"): atom_types, atom_pos = read_gjf(path, file_name) elif file_name.endswith("xyz"): atom_types, atom_pos = read_xyz(path, file_name) mol_dict = { "name": name, "smiles": smiles, "code": code, "xsmiles": xsmiles, "xsmiles_label": xsmiles_label, "formula": composition, "atoms": { "elements": {"elementType": atom_types}, "coords": {"3d": atom_pos.tolist()}, }, } print(mol_dict) # Write the dictionary to a json file with open(os.path.join(path, file_name.split(".")[0] + ".json"), "w") as f: json.dump(mol_dict, f, indent=4)