# -*- coding: utf-8 -*-
# Created by Felipe Lopes de Oliveira
# Distributed under the terms of the MIT License.
"""
The CJSON package implements functions to read, create and manipulate Chemical JSON objects.
"""
import os
import gemmi
import numpy as np
import simplejson
from ase.cell import Cell
from pycofbuilder.tools import elements_dict
[docs]
class ChemJSON:
"""
Class to read, create and manupulate ChemJSON files.
Attributes
----------
file_name : str
The name of the file.
name : str
The name of the structure.
cell_parameters : list
The cell parameters of the structure as a (1,6) list.
cell_matrix : list
The cell matrix of the structure as a (3,3) list.
cartesian_positions : list
The cartesian positions of the structure as a (n,3) list.
fractional_positions : list
The fractional positions of the structure as a (n,3) list.
atomic_numbers : list
The atomic numbers of the structure as a (n,1) list.
atomic_types : list
The atomic types of the structure as a (n,1) list.
atomic_labels : list
The atomic labels of the structure as a (n,1) list.
formula : str
The formula of the structure.
properties : dict
The properties of the structure.
partial_charges : dict
A dictionary contaning the partial charges of the atoms on the structure.
Example: {'DDEC': [0.1, 0.2, 0.15], 'EQeq': [0.05, 0.15, 0.19]}
"""
def __init__(self):
self.file_name = ""
self.name = ""
# Structure properties
self.cell_parameters: list = [None] * 6 # Format: [a, b, c, alpha, beta, gamma]
self.cell_matrix: list = [[None] * 3 for _ in range(3)] # Format: 3x3 matrix
self.cartesian_positions: list = [] # Format: list of [x, y, z]
self.fractional_positions: list = [] # Format: list of [x, y, z]
self.atomic_numbers: list = [] # Format: list of atomic numbers
self.atomic_types: list = [] # Format: list of atomic types
self.atomic_labels: list = [] # Format: list of atomic labels
self.formula: str = ""
self.partial_charges: dict = {} # Format: dictionary of charge types and values
self.bonds: list = [] # Format: list of bond indexes
self.bond_orders: list = [] # Format: list of bond orders
self.properties: dict = {} # Format: dictionary of properties
self.results: list = [] # Format: list of results
# Create a custom representation of the class
def __repr__(self):
"""
Returns a custom representation of the class.
"""
repr_string = "ChemJSON(name='{}', formula='{}', number of atoms={}".format(
self.name, self.formula, len(self.atomic_types)
)
return repr_string
# Create a custom print of the class
def __str__(self):
"""
Returns a custom print of the class.
"""
string_string = (
"ChemJSON(name='{}', formula='{}', number of atoms={})\n".format(
self.name, self.formula, len(self.atomic_types)
)
)
if self.cell_parameters is not None:
string_string += f"""Cell parameters:
a = {self.cell_parameters[0]:>12.7f} Å
b = {self.cell_parameters[1]:>12.7f} Å
c = {self.cell_parameters[2]:>12.7f} Å
α = {self.cell_parameters[3]:>12.7f} °
β = {self.cell_parameters[4]:>12.7f} °
γ = {self.cell_parameters[5]:>12.7f} °
Cell matrix:
A {self.cell_matrix[0][0]:>12.7f} {self.cell_matrix[0][1]:>12.7f} {self.cell_matrix[0][2]:>12.7f}
B {self.cell_matrix[1][0]:>12.7f} {self.cell_matrix[1][1]:>12.7f} {self.cell_matrix[1][2]:>12.7f}
C {self.cell_matrix[2][0]:>12.7f} {self.cell_matrix[2][1]:>12.7f} {self.cell_matrix[2][2]:>12.7f}
"""
if self.cartesian_positions is not None:
string_string += "Cartesian positions:\n"
for i, position in enumerate(self.cartesian_positions):
string_string += " {:3} {:>9.5f} {:>9.5f} {:>9.5f}\n".format(
self.atomic_types[i], position[0], position[1], position[2]
)
if self.fractional_positions is not None:
string_string += "Fractional positions:\n"
for i, position in enumerate(self.fractional_positions):
string_string += " {:3} {:>9.5f} {:>9.5f} {:>9.5f}\n".format(
self.atomic_types[i], position[0], position[1], position[2]
)
return string_string
[docs]
def set_properties(self, properties: dict):
"""
Sets the properties of the structure.
"""
self.properties = properties
[docs]
def set_results(self, results):
"""
Sets the results of the structure.
"""
self.results = results
[docs]
def set_cell_parameters(self, cell_parameters):
"""
Sets the cell parameters of the structure.
"""
self.cell_parameters = cell_parameters
aseCell = Cell.fromcellpar(cell_parameters)
self.cell_matrix = np.array(aseCell).tolist()
self.cell_matrix = np.array(aseCell).tolist()
[docs]
def set_cell_matrix(self, cell_matrix):
"""
Sets the cell matrix of the structure. The cell
parameters will be calculated and also updated.
"""
self.cell_matrix = cell_matrix
aseCell = Cell(cell_matrix)
self.cell_parameters = aseCell.cellpar().tolist()
[docs]
def set_cartesian_positions(self, cartesian_positions):
"""
Sets the cartesian positions of the structure. The fractional
positions will be calculated and also updated.
"""
self.cartesian_positions = np.array(cartesian_positions).astype(float).tolist()
self.cartesian_positions = np.array(cartesian_positions).astype(float).tolist()
if None not in self.cell_parameters:
aseCell = Cell.fromcellpar(self.cell_parameters)
self.fractional_positions = aseCell.scaled_positions(
cartesian_positions
).tolist()
[docs]
def set_fractional_positions(self, fractional_positions):
"""
Sets the fractional positions of the structure. The cartesian
positions will be calculated and also updated.
"""
self.fractional_positions = (
np.array(fractional_positions).astype(float).tolist()
)
self.fractional_positions = (
np.array(fractional_positions).astype(float).tolist()
)
if None not in self.cell_parameters:
aseCell = Cell.fromcellpar(self.cell_parameters)
self.cartesian_positions = aseCell.cartesian_positions(
fractional_positions
).tolist()
[docs]
def set_atomic_types(self, atomic_types):
"""
Sets the atomic labels of the structure.
"""
self.atomic_types = atomic_types
self.atomic_labels = [
f"{atom}{i+1}" for i, atom in enumerate(self.atomic_types)
]
symbol_dict = elements_dict("atomic_number")
self.atomic_numbers = [symbol_dict[i] for i in atomic_types]
self.formula = "".join(
[
f"{atom}{self.atomic_types.count(atom)}"
for atom in set(self.atomic_types)
]
)
[docs]
def set_atomic_numbers(self, atomic_numbers):
"""
Sets the atomic numbers of the structure. The atomic types and formula
will be calculated and also updated.
"""
self.atomic_numbers = atomic_numbers
symbol_dict = elements_dict("atomic_number")
number_dict = {j: i for i, j in zip(symbol_dict.keys(), symbol_dict.values())}
self.atomic_types = [number_dict[i] for i in atomic_numbers]
self.atomic_labels = [
f"{atom}{i+1}" for i, atom in enumerate(self.atomic_types)
]
self.formula = "".join(
[
f"{atom}{self.atomic_types.count(atom)}"
for atom in set(self.atomic_types)
]
)
[docs]
def set_bonds(self, bond_indexes: list[list], bond_orders: list = [None]):
"""
Sets the bonds of the structure.
Parameters
----------
bond_indexes : list[list], required
A list of lists containing the indexes of the atoms that are bonded.
Example: [[0, 1], [1, 2], [2, 3]]
bond_orders : list | None, optional
A list of integers containing the bond orders of the bonds.
Example: [1, 2, 1]
"""
if bond_orders == [None]:
bond_orders = [1] * len(bond_indexes)
self.bonds = bond_indexes
self.bond_orders = bond_orders
[docs]
def from_cjson(self, path, file_name):
"""
Reads a ChemJSON file from a given path and file_name.
"""
self.file_name = os.path.join(path, file_name.split(".")[0] + ".cjson")
with open(self.file_name, "r") as file:
cjson_data = simplejson.load(file)
if "name" in cjson_data:
self.name = cjson_data["name"]
if "unitCell" in cjson_data:
if "a" in cjson_data["unitCell"]:
self.set_cell_parameters(
[
cjson_data["unitCell"][i]
for i in ["a", "b", "c", "alpha", "beta", "gamma"]
]
)
elif "cellVectors" in cjson_data["unitCell"]:
self.set_cell_matrix(
np.array(cjson_data["unitCell"]["cellVectors"]).reshape(3, 3)
)
if "atoms" in cjson_data:
if "coords" in cjson_data["atoms"]:
if "3d" in cjson_data["atoms"]["coords"]:
self.set_cartesian_positions(
np.array(cjson_data["atoms"]["coords"]["3d"]).reshape(-1, 3)
)
elif "3dFractional" in cjson_data["atoms"]["coords"]:
self.set_fractional_positions(
np.array(cjson_data["atoms"]["coords"]["3dFractional"]).reshape(
-1, 3
)
)
if "elements" in cjson_data["atoms"]:
if "type" in cjson_data["atoms"]["elements"]:
self.set_atomic_types(cjson_data["atoms"]["elements"]["type"])
elif "number" in cjson_data["atoms"]["elements"]:
self.set_atomic_numbers(cjson_data["atoms"]["elements"]["number"])
if "label" in cjson_data["atoms"]["elements"]:
self.atomic_labels = cjson_data["atoms"]["elements"]["label"]
else:
self.atomic_labels = [
f"{atom}{i+1}" for i, atom in enumerate(self.atomic_types)
]
if "bonds" in cjson_data:
if "connections" in cjson_data["bonds"]:
self.set_bonds(
np.array(cjson_data["bonds"]["connections"]["index"])
.astype(int)
.reshape(-1, 2)
.tolist()
)
if "order" in cjson_data["bonds"]:
self.bond_orders = np.array(cjson_data["bonds"]["order"]).tolist()
if "properties" in cjson_data:
self.set_properties(cjson_data["properties"])
if "results" in cjson_data:
self.set_results(cjson_data["results"])
if "partialCharges" in cjson_data:
self.partial_charges = cjson_data["partialCharges"]
[docs]
def from_xyz(self, path, file_name):
"""
Reads a XYZ file from a given path and file_name.
"""
self.file_name = os.path.join(path, file_name.split(".")[0] + ".xyz")
self.name = file_name.split(".")[0]
with open(self.file_name, "r") as file:
xyz_data = file.read().splitlines()
n_atoms = int(xyz_data[0])
atomic_types = []
cartesian_positions = []
for line in xyz_data[2:n_atoms + 3]:
atomic_types.append(line.split()[0])
cartesian_positions.append([float(i) for i in line.split()[1:]])
self.set_atomic_types(atomic_types)
self.set_cartesian_positions(np.array(cartesian_positions))
[docs]
def from_gjf(self, path, file_name):
"""
Reads a Gaussian input file from a given path and file_name.
"""
self.file_name = os.path.join(path, file_name.split(".")[0] + ".gjf")
self.name = file_name.split(".")[0]
with open(self.file_name, "r") as file:
gjf_data = file.read().splitlines()
# Remove empty lines
gjf_data = [line for line in gjf_data if line != ""]
atomic_types = []
cartesian_positions = []
for line in gjf_data:
if line.split()[0] in elements_dict("atomic_number").keys():
atomic_types.append(line.split()[0])
cartesian_positions.append([float(i) for i in line.split()[1:4]])
cell_matrix = []
for line in gjf_data:
if line.split()[0] == "Tv":
cell_matrix.append([float(i) for i in line.split()[1:4]])
if cell_matrix != []:
self.set_cell_matrix(np.array(cell_matrix))
self.set_atomic_types(atomic_types)
self.set_cartesian_positions(np.array(cartesian_positions))
[docs]
def from_cif(self, path, file_name):
"""
Reads a CIF file from a given path and file_name.
"""
# Read the cif file and get the lattice parameters and atomic positions
cif_filename = os.path.join(path, file_name.split(".")[0] + ".cif")
cif = gemmi.cif.read_file(cif_filename).sole_block()
a = float(cif.find_value("_cell_length_a").split("(")[0])
b = float(cif.find_value("_cell_length_b").split("(")[0])
c = float(cif.find_value("_cell_length_c").split("(")[0])
beta = float(cif.find_value("_cell_angle_beta").split("(")[0])
gamma = float(cif.find_value("_cell_angle_gamma").split("(")[0])
alpha = float(cif.find_value("_cell_angle_alpha").split("(")[0])
CellParameters = [a, b, c, alpha, beta, gamma]
AtomicTypes = list(cif.find_values("_atom_site_type_symbol"))
PosX = np.array(cif.find_values("_atom_site_fract_x")).astype(float)
PosY = np.array(cif.find_values("_atom_site_fract_y")).astype(float)
PosZ = np.array(cif.find_values("_atom_site_fract_z")).astype(float)
try:
charges = np.array(cif.find_values("_atom_site_charge")).astype(float)
charge_type = "DDEC"
except Exception:
charges = None
charge_type = None
self.set_cell_parameters(CellParameters)
self.set_atomic_types(AtomicTypes)
self.set_fractional_positions(np.array([PosX, PosY, PosZ]).T)
if charges is not None:
self.partial_charges = {charge_type: charges}
[docs]
def as_dict(self) -> dict:
"""
Returns the structure as a dictionary.
"""
structure_dict = {
"chemical json": 1,
"name": self.name,
"formula": self.formula,
}
if None not in self.cell_parameters:
structure_dict["unitCell"] = {
"a": self.cell_parameters[0],
"b": self.cell_parameters[1],
"c": self.cell_parameters[2],
"alpha": self.cell_parameters[3],
"beta": self.cell_parameters[4],
"gamma": self.cell_parameters[5],
"cellVectors": np.array(self.cell_matrix).flatten().tolist(),
}
structure_dict["atoms"] = {
"elements": {
"type": self.atomic_types,
"number": self.atomic_numbers,
},
"coords": {
"3d": np.array(self.cartesian_positions).flatten().tolist(),
},
}
if len(self.bonds) > 0:
structure_dict["bonds"] = {
"connections": {"index": np.array(self.bonds).flatten().tolist()},
"order": self.bond_orders,
}
if None not in self.cell_parameters:
structure_dict["atoms"]["coords"]["3dFractional"] = (
np.array(self.fractional_positions).flatten().tolist()
)
if len(self.partial_charges) > 0:
structure_dict["partialCharges"] = self.partial_charges
structure_dict["properties"] = self.properties
structure_dict["results"] = self.results
return structure_dict
[docs]
def write_cjson(self, path, file_name):
"""
Writes a ChemJSON file to a given path and file_name.
"""
self.file_name = os.path.join(path, file_name.split(".")[0] + ".cjson")
with open(self.file_name, "w") as file:
simplejson.dump(self.as_dict(), file, indent=4)