-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Molecular input #234
Molecular input #234
Changes from all commits
fff267b
8f40ee7
cab7db2
050a526
4121c59
29356d2
6df508b
dfe64f2
208af95
225ee76
e252160
2114de5
6c6d8cd
bc62b87
fca6a94
06f4844
fd12a81
7d49979
189fcae
781f0dd
22441fa
857be19
1254648
ed83a3c
a139854
49013c3
c734620
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,9 +22,11 @@ | |
ContinuousOutput, | ||
DiscreteInput, | ||
Input, | ||
MolecularInput, | ||
Output, | ||
TInputTransformSpecs, | ||
) | ||
from bofire.data_models.molfeatures.api import MolFeatures | ||
from bofire.data_models.objectives.api import AbstractObjective, Objective | ||
|
||
FeatureSequence = Union[List[AnyFeature], Tuple[AnyFeature]] | ||
|
@@ -348,6 +350,18 @@ def _get_transform_info( | |
[f"{feat.key}{_CAT_SEP}{d}" for d in feat.descriptors] | ||
) | ||
counter += len(feat.descriptors) | ||
elif isinstance(specs[feat.key], MolFeatures): | ||
assert isinstance(feat, MolecularInput) | ||
descriptor_names = specs[ | ||
feat.key | ||
].get_descriptor_names() # type: ignore | ||
features2idx[feat.key] = tuple( | ||
(np.array(range(len(descriptor_names))) + counter).tolist() | ||
) | ||
features2names[feat.key] = tuple( | ||
[f"{feat.key}{_CAT_SEP}{d}" for d in descriptor_names] | ||
) | ||
counter += len(descriptor_names) | ||
return features2idx, features2names | ||
|
||
def transform( | ||
|
@@ -383,6 +397,9 @@ def transform( | |
elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR: | ||
assert isinstance(feat, CategoricalDescriptorInput) | ||
transformed.append(feat.to_descriptor_encoding(s)) | ||
elif isinstance(specs[feat.key], MolFeatures): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you also include this in the tests? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Testing for this can be found in |
||
assert isinstance(feat, MolecularInput) | ||
transformed.append(feat.to_descriptor_encoding(specs[feat.key], s)) # type: ignore | ||
return pd.concat(transformed, axis=1) | ||
|
||
def inverse_transform( | ||
|
@@ -420,6 +437,7 @@ def inverse_transform( | |
elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR: | ||
assert isinstance(feat, CategoricalDescriptorInput) | ||
transformed.append(feat.from_descriptor_encoding(experiments)) | ||
|
||
return pd.concat(transformed, axis=1) | ||
|
||
def _validate_transform_specs(self, specs: TInputTransformSpecs): | ||
|
@@ -429,24 +447,47 @@ def _validate_transform_specs(self, specs: TInputTransformSpecs): | |
specs (TInputTransformSpecs): Transform specs to be validated. | ||
""" | ||
# first check that the keys in the specs dict are correct also correct feature keys | ||
if len(set(specs.keys()) - set(self.get_keys(CategoricalInput))) > 0: | ||
if ( | ||
len( | ||
set(specs.keys()) | ||
- set(self.get_keys(CategoricalInput)) | ||
- set(self.get_keys(MolecularInput)) | ||
) | ||
> 0 | ||
): | ||
raise ValueError("Unknown features specified in transform specs.") | ||
# next check that all values are of type CategoricalEncodingEnum | ||
# next check that all values are of type CategoricalEncodingEnum or MolFeatures | ||
if not ( | ||
all(isinstance(enc, CategoricalEncodingEnum) for enc in specs.values()) | ||
all( | ||
isinstance(enc, (CategoricalEncodingEnum, MolFeatures)) | ||
for enc in specs.values() | ||
) | ||
): | ||
raise ValueError("Unknown transform specified.") | ||
# next check that only Categoricalwithdescriptor have the value DESCRIPTOR | ||
descriptor_keys = [ | ||
key | ||
for key, value in specs.items() | ||
if value == CategoricalEncodingEnum.DESCRIPTOR | ||
] | ||
# next check that only CategoricalDescriptorInput can have the value DESCRIPTOR | ||
descriptor_keys = [] | ||
for key, value in specs.items(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure, is this also raisig an error if one assigns a molfeatures transform to a categoricaldescriptorinput? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you then also write tests for the addtions in this method? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Normally the validate input_processing_specs would have caught what you describe I think But nonetheless, def _validate_transform_specs has been improved to make sure that it will solve these types of issues too. It has been changed from how I had it before so that checking of CategoricalEncodingEnum.DESCRIPTOR and MolFeatures is separate to avoid a bug that can occur in case of user errors that can happen when there are multiple categorical variables inputs with various mistakes in transform type. Furthermore, MolecularInputs require a MolFeatures in the transform specs. Hopefully that's fine with you too. |
||
if value == CategoricalEncodingEnum.DESCRIPTOR: | ||
descriptor_keys.append(key) | ||
if ( | ||
len(set(descriptor_keys) - set(self.get_keys(CategoricalDescriptorInput))) | ||
> 0 | ||
): | ||
raise ValueError("Wrong features types assigned to DESCRIPTOR transform.") | ||
# next check if MolFeatures have been assigned to feature types other than MolecularInput | ||
molfeature_keys = [] | ||
for key, value in specs.items(): | ||
if isinstance(value, MolFeatures): | ||
molfeature_keys.append(key) | ||
if len(set(molfeature_keys) - set(self.get_keys(MolecularInput))) > 0: | ||
raise ValueError("Wrong features types assigned to MolFeatures transforms.") | ||
# next check that all MolecularInput have MolFeatures transforms | ||
for feat in self.get(includes=[MolecularInput]): | ||
mol_encoding = specs.get(feat.key) | ||
if mol_encoding is None: | ||
raise ValueError("No transform assigned to MolecularInput.") | ||
elif not isinstance(mol_encoding, MolFeatures): | ||
raise ValueError("Incorrect transform assigned to MolecularInput.") | ||
return specs | ||
|
||
def get_bounds( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from typing import Literal | ||
|
||
from bofire.data_models.kernels.kernel import Kernel | ||
|
||
|
||
class MolecularKernel(Kernel): | ||
pass | ||
|
||
|
||
class TanimotoKernel(MolecularKernel): | ||
type: Literal["TanimotoKernel"] = "TanimotoKernel" | ||
ard: bool = True |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from typing import Union | ||
|
||
from bofire.data_models.molfeatures.molfeatures import ( # BagOfCharacters | ||
Fingerprints, | ||
FingerprintsFragments, | ||
Fragments, | ||
MolFeatures, | ||
MordredDescriptors, | ||
) | ||
|
||
AbstractMolFeatures = MolFeatures | ||
|
||
AnyMolFeatures = Union[ | ||
Fingerprints, | ||
Fragments, | ||
FingerprintsFragments, | ||
# BagOfCharacters, | ||
MordredDescriptors, | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you also include this in the tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added MolecularInput to test_inputs_get_transform_info