Source code for pycofbuilder.cjson

# -*- coding: utf-8 -*-
# Created by Felipe Lopes de Oliveira
# Distributed under the terms of the MIT License.

"""
The CJSON package implements functions to read, create and manipulate Chemical JSON objects.
"""

import os
import simplejson
import numpy as np

from pycofbuilder.tools import elements_dict

import gemmi
from ase.cell import Cell


[docs] class ChemJSON: ''' Class to read, create and manupulate ChemJSON files. Attributes ---------- file_name : str The name of the file. name : str The name of the structure. cell_parameters : list The cell parameters of the structure as a (1,6) list. cell_matrix : list The cell matrix of the structure as a (3,3) list. cartesian_positions : list The cartesian positions of the structure as a (n,3) list. fractional_positions : list The fractional positions of the structure as a (n,3) list. atomic_numbers : list The atomic numbers of the structure as a (n,1) list. atomic_types : list The atomic types of the structure as a (n,1) list. atomic_labels : list The atomic labels of the structure as a (n,1) list. formula : str The formula of the structure. properties : dict The properties of the structure. partial_charges : dict A dictionary contaning the partial charges of the atoms on the structure. Example: {'DDEC': [0.1, 0.2, 0.15], 'EQeq': [0.05, 0.15, 0.19]} ''' def __init__(self): self.file_name = '' self.name = '' # Structure properties self.cell_parameters = None self.cell_matrix = None self.cartesian_positions = None self.fractional_positions = None self.atomic_numbers = None self.atomic_types = None self.atomic_labels = None self.formula = '' self.partial_charges = None self.properties = None self.results = [] # Create a custom representation of the class def __repr__(self): ''' Returns a custom representation of the class. ''' repr_string = "ChemJSON(name='{}', formula='{}', number of atoms={}".format(self.name, self.formula, len(self.atomic_types)) return repr_string # Create a custom print of the class def __str__(self): ''' Returns a custom print of the class. ''' string_string = "ChemJSON(name='{}', formula='{}', number of atoms={})\n".format(self.name, self.formula, len(self.atomic_types)) if self.cell_parameters is not None: string_string += f"""Cell parameters: a = {self.cell_parameters[0]:>12.7f} Å b = {self.cell_parameters[1]:>12.7f} Å c = {self.cell_parameters[2]:>12.7f} Å α = {self.cell_parameters[3]:>12.7f} ° β = {self.cell_parameters[4]:>12.7f} ° γ = {self.cell_parameters[5]:>12.7f} ° Cell matrix: A {self.cell_matrix[0][0]:>12.7f} {self.cell_matrix[0][1]:>12.7f} {self.cell_matrix[0][2]:>12.7f} B {self.cell_matrix[1][0]:>12.7f} {self.cell_matrix[1][1]:>12.7f} {self.cell_matrix[1][2]:>12.7f} C {self.cell_matrix[2][0]:>12.7f} {self.cell_matrix[2][1]:>12.7f} {self.cell_matrix[2][2]:>12.7f} """ if self.cartesian_positions is not None: string_string += "Cartesian positions:\n" for i, position in enumerate(self.cartesian_positions): string_string += " {:3} {:>9.5f} {:>9.5f} {:>9.5f}\n".format(self.atomic_types[i], position[0], position[1], position[2] ) if self.fractional_positions is not None: string_string += "Fractional positions:\n" for i, position in enumerate(self.fractional_positions): string_string += " {:3} {:>9.5f} {:>9.5f} {:>9.5f}\n".format(self.atomic_types[i], position[0], position[1], position[2]) return string_string
[docs] def set_properties(self, properties): ''' Sets the properties of the structure. ''' self.properties = properties
[docs] def set_results(self, results): ''' Sets the results of the structure. ''' self.results = results
[docs] def set_cell_parameters(self, cell_parameters): ''' Sets the cell parameters of the structure. ''' self.cell_parameters = cell_parameters aseCell = Cell.fromcellpar(cell_parameters) self.cell_matrix = np.array(aseCell)
[docs] def set_cell_matrix(self, cell_matrix): ''' Sets the cell matrix of the structure. The cell parameters will be calculated and also updated. ''' self.cell_matrix = cell_matrix aseCell = Cell(cell_matrix) self.cell_parameters = aseCell.cellpar()
[docs] def set_cartesian_positions(self, cartesian_positions): ''' Sets the cartesian positions of the structure. The fractional positions will be calculated and also updated. ''' self.cartesian_positions = np.array(cartesian_positions).astype(float) if self.cell_parameters is not None: aseCell = Cell.fromcellpar(self.cell_parameters) self.fractional_positions = aseCell.scaled_positions(cartesian_positions)
[docs] def set_fractional_positions(self, fractional_positions): ''' Sets the fractional positions of the structure. The cartesian positions will be calculated and also updated. ''' self.fractional_positions = np.array(fractional_positions).astype(float) if self.cell_parameters is not None: aseCell = Cell.fromcellpar(self.cell_parameters) self.cartesian_positions = aseCell.cartesian_positions(fractional_positions)
[docs] def set_atomic_types(self, atomic_types): ''' Sets the atomic labels of the structure. ''' self.atomic_types = atomic_types self.atomic_labels = [f"{atom}{i+1}" for i, atom in enumerate(self.atomic_types)] symbol_dict = elements_dict('atomic_number') self.atomic_numbers = [symbol_dict[i] for i in atomic_types] self.formula = ''.join([f'{atom}{self.atomic_types.count(atom)}' for atom in set(self.atomic_types)])
[docs] def set_atomic_numbers(self, atomic_numbers): ''' Sets the atomic numbers of the structure. The atomic types and formula will be calculated and also updated. ''' self.atomic_numbers = atomic_numbers symbol_dict = elements_dict('atomic_number') number_dict = {j: i for i, j in zip(symbol_dict.keys(), symbol_dict.values())} self.atomic_types = [number_dict[i] for i in atomic_numbers] self.atomic_labels = [f"{atom}{i+1}" for i, atom in enumerate(self.atomic_types)] self.formula = ''.join([f'{atom}{self.atomic_types.count(atom)}' for atom in set(self.atomic_types)])
[docs] def from_cjson(self, path, file_name): ''' Reads a ChemJSON file from a given path and file_name. ''' self.file_name = os.path.join(path, file_name.split('.')[0] + '.cjson') with open(self.file_name, 'r') as file: cjson_data = simplejson.load(file) if "name" in cjson_data: self.name = cjson_data['name'] if "unitCell" in cjson_data: if 'a' in cjson_data['unitCell']: self.set_cell_parameters( [cjson_data['unitCell'][i] for i in ['a', 'b', 'c', 'alpha', 'beta', 'gamma']] ) elif 'cellVectors' in cjson_data['unitCell']: self.set_cell_matrix( np.array(cjson_data['unitCell']['cellVectors']).reshape(3, 3) ) if "atoms" in cjson_data: if 'coords' in cjson_data['atoms']: if '3d' in cjson_data['atoms']['coords']: self.set_cartesian_positions( np.array(cjson_data['atoms']['coords']['3d']).reshape(-1, 3) ) elif '3dFractional' in cjson_data['atoms']['coords']: self.set_fractional_positions( np.array(cjson_data['atoms']['coords']['3dFractional']).reshape(-1, 3) ) if "elements" in cjson_data['atoms']: if 'type' in cjson_data['atoms']['elements']: self.set_atomic_types(cjson_data['atoms']['elements']['type']) elif 'number' in cjson_data['atoms']['elements']: self.set_atomic_numbers(cjson_data['atoms']['elements']['number']) if 'label' in cjson_data['atoms']['elements']: self.atomic_labels = cjson_data['atoms']['elements']['label'] else: self.atomic_labels = [f"{atom}{i+1}" for i, atom in enumerate(self.atomic_types)] if 'properties' in cjson_data: self.set_properties(cjson_data['properties']) if 'results' in cjson_data: self.set_results(cjson_data['results']) if 'partialCharges' in cjson_data: self.partial_charges = cjson_data['partialCharges']
[docs] def from_xyz(self, path, file_name): ''' Reads a XYZ file from a given path and file_name. ''' self.file_name = os.path.join(path, file_name.split('.')[0] + '.xyz') self.name = file_name.split('.')[0] with open(self.file_name, 'r') as file: xyz_data = file.read().splitlines() n_atoms = int(xyz_data[0]) atomic_types = [] cartesian_positions = [] for line in xyz_data[2: n_atoms + 3]: atomic_types.append(line.split()[0]) cartesian_positions.append([float(i) for i in line.split()[1:]]) self.set_atomic_types(atomic_types) self.set_cartesian_positions(np.array(cartesian_positions))
[docs] def from_gjf(self, path, file_name): ''' Reads a Gaussian input file from a given path and file_name. ''' self.file_name = os.path.join(path, file_name.split('.')[0] + '.gjf') self.name = file_name.split('.')[0] with open(self.file_name, 'r') as file: gjf_data = file.read().splitlines() # Remove empty lines gjf_data = [line for line in gjf_data if line != ''] atomic_types = [] cartesian_positions = [] for line in gjf_data: if line.split()[0] in elements_dict('atomic_number').keys(): atomic_types.append(line.split()[0]) cartesian_positions.append([float(i) for i in line.split()[1:4]]) cell_matrix = [] for line in gjf_data: if line.split()[0] == 'Tv': cell_matrix.append([float(i) for i in line.split()[1:4]]) if cell_matrix != []: self.set_cell_matrix(np.array(cell_matrix)) self.set_atomic_types(atomic_types) self.set_cartesian_positions(np.array(cartesian_positions))
[docs] def from_cif(self, path, file_name): ''' Reads a CIF file from a given path and file_name. ''' # Read the cif file and get the lattice parameters and atomic positions cif_filename = os.path.join(path, file_name.split('.')[0] + '.cif') cif = gemmi.cif.read_file(cif_filename).sole_block() a = float(cif.find_value('_cell_length_a').split('(')[0]) b = float(cif.find_value('_cell_length_b').split('(')[0]) c = float(cif.find_value('_cell_length_c').split('(')[0]) beta = float(cif.find_value('_cell_angle_beta').split('(')[0]) gamma = float(cif.find_value('_cell_angle_gamma').split('(')[0]) alpha = float(cif.find_value('_cell_angle_alpha').split('(')[0]) CellParameters = [a, b, c, alpha, beta, gamma] AtomicTypes = list(cif.find_values('_atom_site_type_symbol')) PosX = np.array(cif.find_values('_atom_site_fract_x')).astype(float) PosY = np.array(cif.find_values('_atom_site_fract_y')).astype(float) PosZ = np.array(cif.find_values('_atom_site_fract_z')).astype(float) try: charges = np.array(cif.find_values('_atom_site_charge')).astype(float) charge_type = 'DDEC' except Exception: charges = None charge_type = None self.set_cell_parameters(CellParameters) self.set_atomic_types(AtomicTypes) self.set_fractional_positions(np.array([PosX, PosY, PosZ]).T) if charges is not None: self.partial_charges = {charge_type: charges}
[docs] def as_dict(self): ''' Returns the structure as a dictionary. ''' structure_dict = { 'chemical json': 1, 'name': self.name, 'formula': self.formula, } if self.cell_parameters is not None: structure_dict['unit cell'] = { 'a': self.cell_parameters[0], 'b': self.cell_parameters[1], 'c': self.cell_parameters[2], 'alpha': self.cell_parameters[3], 'beta': self.cell_parameters[4], 'gamma': self.cell_parameters[5], 'cellVectors': self.cell_matrix.flatten().tolist() } structure_dict['atoms'] = { 'elements': { 'type': self.atomic_types, 'number': self.atomic_numbers, }, 'coords': { '3d': self.cartesian_positions.flatten().tolist(), } } if self.cell_parameters is not None: structure_dict['atoms']['coords']['3dFractional'] = self.fractional_positions.flatten().tolist() if self.partial_charges is not None: structure_dict['partialCharges'] = self.partial_charges structure_dict['properties'] = self.properties structure_dict['results'] = self.results return structure_dict
[docs] def write_cjson(self, path, file_name): ''' Writes a ChemJSON file to a given path and file_name. ''' self.file_name = os.path.join(path, file_name.split('.')[0] + '.cjson') with open(self.file_name, 'w') as file: simplejson.dump(self.as_dict(), file, indent=4)