...
We can verify this by inspecting the training dataset to see which molecules need t49 (results are shown in this notebook
View file |
---|
name | Redundant parameters.htmlpdf |
---|
|
):
Code Block |
---|
|
from openff.toolkit import Molecule, ForceField, Topology
forcefield = ForceField("openff-2.1.0.offxml")
import tqdm
from qcportal.client import FractalClient
from openff.qcsubmit.results import TorsionDriveResultCollection
# Create a client which allows us to connect to the main QCArchive server.
qcarchive_client = FractalClient()
td_result_collection = TorsionDriveResultCollection.parse_file(
"sage-2.1.0/inputs-and-outputs/data-sets/td-set-for-fitting-2.1.0.json"
)
records_and_molecules = td_result_collection.to_records()
use_t49 = [] # list of molecules that use t49
for _, molecule in tqdm.tqdm(records_and_molecules, desc="checking"):
all_labels = forcefield.label_molecules(molecule.to_topology())
for mol_idx, mol_forces in enumerate(all_labels):
for force_tag, force_dict in mol_forces.items():
for atom_indices, parameter in force_dict.items():
atomstr = ""
for idx in atom_indices:
atomstr += "%3s" % idx
# for some reason this adds each molecule 4 times for each appearance of t49
if parameter.id == 't49':
use_t49.append(molecule.to_smiles())
# Create unique list of molecules that use t49
set_list = set(use_t49)
use_t49_unique = list(set_list)
# Visualize the molecules
Molecule.from_smiles(use_t49_unique[0])
Molecule.from_smiles(use_t49_unique[1])
Molecule.from_smiles(use_t49_unique[2]) |
...
We can verify by checking whether any molecules in the training set use this parameter (results in this notebook
View file |
---|
name | Redundant parameters.htmlpdf |
---|
|
):
Code Block |
---|
from openff.toolkit import Molecule, ForceField, Topology
forcefield = ForceField("openff-2.1.0.offxml")
import tqdm
from qcportal.client import FractalClient
from openff.qcsubmit.results import TorsionDriveResultCollection
# Create a client which allows us to connect to the main QCArchive server.
qcarchive_client = FractalClient()
td_result_collection = TorsionDriveResultCollection.parse_file(
"sage-2.1.0/inputs-and-outputs/data-sets/td-set-for-fitting-2.1.0.json"
)
records_and_molecules = td_result_collection.to_records()
use_t123 = []
for _, molecule in tqdm.tqdm(records_and_molecules, desc="checking"):
all_labels = forcefield.label_molecules(molecule.to_topology())
for mol_idx, mol_forces in enumerate(all_labels):
for force_tag, force_dict in mol_forces.items():
for atom_indices, parameter in force_dict.items():
atomstr = ""
for idx in atom_indices:
atomstr += "%3s" % idx
# for some reason this adds each molecule 4 times for each appearance of t49
if parameter.id == 't123':
use_t123.append(molecule.to_smiles())
print(use_t123) # empty list |
...