Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

We can verify this by inspecting the training dataset to see which molecules need t49 (results are shown in this notebook

View file
nameRedundant parameters.htmlpdf
):

Code Block
languagepy
from openff.toolkit import Molecule, ForceField, Topology
forcefield = ForceField("openff-2.1.0.offxml")
import tqdm
from qcportal.client import FractalClient
from openff.qcsubmit.results import TorsionDriveResultCollection

# Create a client which allows us to connect to the main QCArchive server.
qcarchive_client = FractalClient()

td_result_collection = TorsionDriveResultCollection.parse_file(
    "sage-2.1.0/inputs-and-outputs/data-sets/td-set-for-fitting-2.1.0.json"
)

records_and_molecules = td_result_collection.to_records()

use_t49 = [] # list of molecules that use t49

for _, molecule in tqdm.tqdm(records_and_molecules, desc="checking"):
    all_labels = forcefield.label_molecules(molecule.to_topology())
    for mol_idx, mol_forces in enumerate(all_labels):
        for force_tag, force_dict in mol_forces.items():
            for atom_indices, parameter in force_dict.items():
                atomstr = ""
                for idx in atom_indices:
                    atomstr += "%3s" % idx
                # for some reason this adds each molecule 4 times for each appearance of t49
                if parameter.id == 't49': 
                    use_t49.append(molecule.to_smiles())

# Create unique list of molecules that use t49
set_list = set(use_t49)
use_t49_unique = list(set_list)

# Visualize the molecules
Molecule.from_smiles(use_t49_unique[0])
Molecule.from_smiles(use_t49_unique[1])
Molecule.from_smiles(use_t49_unique[2])

...

We can verify by checking whether any molecules in the training set use this parameter (results in this notebook

View file
nameRedundant parameters.htmlpdf
):

Code Block
from openff.toolkit import Molecule, ForceField, Topology
forcefield = ForceField("openff-2.1.0.offxml")
import tqdm
from qcportal.client import FractalClient
from openff.qcsubmit.results import TorsionDriveResultCollection

# Create a client which allows us to connect to the main QCArchive server.
qcarchive_client = FractalClient()

td_result_collection = TorsionDriveResultCollection.parse_file(
    "sage-2.1.0/inputs-and-outputs/data-sets/td-set-for-fitting-2.1.0.json"
)
records_and_molecules = td_result_collection.to_records()

use_t123 = []

for _, molecule in tqdm.tqdm(records_and_molecules, desc="checking"):
    all_labels = forcefield.label_molecules(molecule.to_topology())
    for mol_idx, mol_forces in enumerate(all_labels):
        for force_tag, force_dict in mol_forces.items():
            for atom_indices, parameter in force_dict.items():
                atomstr = ""
                for idx in atom_indices:
                    atomstr += "%3s" % idx

                # for some reason this adds each molecule 4 times for each appearance of t49
                if parameter.id == 't123': 
                    use_t123.append(molecule.to_smiles())

print(use_t123) # empty list

...