[docs]defmatch_substructure(atoms:ase.Atoms,smiles:str|None=None,smarts:str|None=None,mol:Chem.Mol|None=None,fragment:ase.Atoms|None=None,**kwargs,)->tuple[tuple[int,...]]:""" Find all matches of a substructure pattern in a given ASE Atoms object. Parameters ---------- atoms : ase.Atoms The molecule or structure in which to search for substructure matches. smiles : str, optional A SMILES string representing the substructure pattern to match. smarts : str, optional A SMARTS string representing the substructure pattern to match. mol : Chem.Mol, optional An RDKit Mol object representing the substructure pattern to match. fragment : ase.Atoms, optional An ASE Atoms object representing the substructure pattern to match. If provided, it will be converted to an RDKit Mol object for matching. **kwargs Additional keyword arguments passed to `ase2rdkit`. Returns ------- tuple of tuple of int A tuple of atom index tuples, each corresponding to one match of the pattern. """pattern=NoneifsmilesisnotNone:pattern=Chem.MolFromSmiles(smiles)pattern=Chem.AddHs(pattern)# Ensure hydrogens are added for matchingifsmartsisnotNone:ifpatternisnotNone:raiseValueError("Can only specify one pattern")pattern=Chem.MolFromSmarts(smarts)ifmolisnotNone:ifpatternisnotNone:raiseValueError("Can only specify one pattern")pattern=moliffragmentisnotNone:ifpatternisnotNone:raiseValueError("Can only specify one pattern")pattern=ase2rdkit(fragment,**kwargs)ifpatternisNone:raiseValueError("Must specify a pattern")Chem.SanitizeMol(pattern)mol=ase2rdkit(atoms,**kwargs)matches=mol.GetSubstructMatches(pattern)returnmatches
[docs]defget_substructures(atoms:ase.Atoms,**kwargs,)->list[ase.Atoms]:""" Extract all matched substructures from an ASE Atoms object. Parameters ---------- atoms : ase.Atoms The structure to search in. smarts : str, optional A SMARTS string to match substructures. smiles : str, optional A SMILES string to match substructures. mol : Chem.Mol, optional An RDKit Mol object to match substructures. fragment : ase.Atoms, optional A specific ASE Atoms object to match against the structure. **kwargs Additional keyword arguments passed to `match_substructure`. Returns ------- list of ase.Atoms List of substructure fragments matching the pattern. """return[atoms[match]formatchinmatch_substructure(atoms,**kwargs)]
[docs]defiter_fragments(atoms:ase.Atoms)->list[ase.Atoms]:""" Iterate over connected molecular fragments in an ASE Atoms object. If a 'connectivity' field is present in `atoms.info`, it will be used to determine fragments. Otherwise, `ase.build.separate` will be used. Parameters ---------- atoms : ase.Atoms A structure that may contain one or more molecular fragments. Yields ------ ase.Atoms Each connected component (fragment) in the input structure. """if"connectivity"inatoms.info:# connectivity is a list of tuples (i, j, bond_type)connectivity=atoms.info["connectivity"]forcomponentinfind_connected_components(connectivity):yieldatoms[list(component)]else:formoleculeinseparate(atoms):yieldmolecule