...
We can verify this by inspecting the training dataset to see which molecules need t49 (results are shown in this notebook
View file | ||
---|---|---|
|
...
Therefore, ost aromatic N atoms are treated by t84, but t49 is not redundant, as it applies to aromatic charged N.
Code Block | ||
---|---|---|
| ||
from openff.toolkit import Molecule, ForceField, Topology forcefield = ForceField("openff-2.1.0.offxml") import tqdm from qcportal.client import FractalClient from openff.qcsubmit.results import TorsionDriveResultCollection # Create a client which allows us to connect to the main QCArchive server. qcarchive_client = FractalClient() td_result_collection = TorsionDriveResultCollection.parse_file( "sage-2.1.0/inputs-and-outputs/data-sets/td-set-for-fitting-2.1.0.json" ) records_and_molecules = td_result_collection.to_records() use_t49 = [] # list of molecules that use t49 for _, molecule in tqdm.tqdm(records_and_molecules, desc="checking"): all_labels = forcefield.label_molecules(molecule.to_topology()) for mol_idx, mol_forces in enumerate(all_labels): for force_tag, force_dict in mol_forces.items(): for atom_indices, parameter in force_dict.items(): atomstr = "" for idx in atom_indices: atomstr += "%3s" % idx # for some reason this adds each molecule 4 times for each appearance of t49 if parameter.id == 't49': use_t49.append(molecule.to_smiles()) # Create unique list of molecules that use t49 set_list = set(use_t49) use_t49_unique = list(set_list) # Visualize the molecules Molecule.from_smiles(use_t49_unique[0]) Molecule.from_smiles(use_t49_unique[1]) Molecule.from_smiles(use_t49_unique[2]) |
...