Source code for rdkit2ase.smiles2x

import ase
import numpy as np
from rdkit import Chem
from rdkit.Chem import rdDistGeom


def get_pf6() -> ase.Atoms:
    directions = np.array(
        [
            [1, 0, 0],
            [-1, 0, 0],
            [0, 1, 0],
            [0, -1, 0],
            [0, 0, 1],
            [0, 0, -1],
        ]
    )
    p_position = np.zeros(3, dtype=float)
    f_positions = directions * 1.57

    positions = [p_position] + list(f_positions)
    symbols = ["P"] + ["F"] * 6

    atoms = ase.Atoms(symbols=symbols, positions=positions)

    atoms.info["connectivity"] = [(0, i, 1.0) for i in range(1, 7)]
    atoms.info["smiles"] = "F[P-](F)(F)(F)(F)F"
    atoms.set_initial_charges([-1] + [0] * 6)

    return atoms


[docs] def smiles2atoms(smiles: str, seed: int = 42) -> ase.Atoms: """Convert a SMILES string to an ASE Atoms object. Parameters ---------- smiles : str The SMILES string to convert. seed : int, optional Random seed for conformer generation (default is 42). Returns ------- ase.Atoms The generated Atoms object (first conformer). Notes ----- This is a convenience wrapper around smiles2conformers that returns just the first conformer. Examples -------- >>> from rdkit2ase import smiles2atoms >>> import ase >>> atoms = smiles2atoms("CCO") >>> isinstance(atoms, ase.Atoms) True """ return smiles2conformers(smiles, 1, seed)[0]
[docs] def smiles2conformers( smiles: str, numConfs: int, # noqa N803 randomSeed: int = 42, # noqa N803 maxAttempts: int = 1000, # noqa N803 ) -> list[ase.Atoms]: """Generate multiple molecular conformers from a SMILES string. Parameters ---------- smiles : str The SMILES string to convert. numConfs : int Number of conformers to generate. randomSeed : int, optional Random seed for conformer generation (default is 42). maxAttempts : int, optional Maximum number of embedding attempts (default is 1000). Returns ------- list[ase.Atoms] List of generated conformers as ASE Atoms objects. Notes ----- Special handling is included for PF6- (hexafluorophosphate) which is treated as a special case. Each Atoms object in the returned list includes: - Atomic positions - Atomic numbers - SMILES string in the info dictionary - Connectivity information in the info dictionary - Formal charges when present Examples -------- >>> from rdkit2ase import smiles2conformers >>> import ase >>> frames = smiles2conformers("CCO", numConfs=3) >>> len(frames) 3 >>> all(isinstance(atoms, ase.Atoms) for atoms in frames) True """ mol = Chem.MolFromSmiles(smiles) if Chem.MolToSmiles(mol, canonical=True) == "F[P-](F)(F)(F)(F)F": return [get_pf6()] * numConfs mol = Chem.AddHs(mol) rdDistGeom.EmbedMultipleConfs( mol, numConfs=numConfs, randomSeed=randomSeed, maxAttempts=maxAttempts, ) images: list[ase.Atoms] = [] charges = [atom.GetFormalCharge() for atom in mol.GetAtoms()] if not any(charge != 0 for charge in charges): charges = None # collect the bond information bonds = [] for bond in mol.GetBonds(): bonds.append( ( bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond.GetBondTypeAsDouble(), ) ) for conf in mol.GetConformers(): atoms = ase.Atoms( positions=conf.GetPositions(), numbers=[atom.GetAtomicNum() for atom in mol.GetAtoms()], ) atoms.info["smiles"] = smiles atoms.info["connectivity"] = bonds if charges is not None: atoms.set_initial_charges(charges) images.append(atoms) return images