Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose 'submission_date' and 'update_date' (#2562, #3310) #3026

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,38 @@ Static methods
higher order concerns than the one about whether a method's body currently
references self or not.

Error messages
--------------

* We avoid the use of f-strings when composing error messages for exceptions and
hannes-ucsc marked this conversation as resolved.
Show resolved Hide resolved
for use with ``require()`` or ``reject()``. If an error message is included,
it should be short, descriptive of the error encountered, and optionally
followed by the relevant value(s) involved::

raise KeyError(key)

raise ValueError('Unknown file type', file_type)

* Requirement errors should always have a message, since they are intended for
clients/users::

require(delay >= 0, 'Delay value must be non-negative')

require(url.scheme == 'https', "Unexpected URL scheme (should be 'https')", url.scheme)

reject(entity_id is None, 'Must supply an entity ID')

reject(file_path.endswith('/'), 'Path must not end in slash', path)

* Assertions are usually self-explanatory. Error messages should only be
included when they are not::

assert not debug

assert isinstance(x, list), type(x)
hannes-ucsc marked this conversation as resolved.
Show resolved Hide resolved

assert x == y, ('Misreticulated splines', x, y)

Catching exceptions
-------------------

Expand Down
19 changes: 12 additions & 7 deletions scripts/cgm_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import csv
from datetime import (
datetime,
timezone,
)
import json
import logging
Expand Down Expand Up @@ -63,6 +64,10 @@
from azul.logging import (
configure_script_logging,
)
from azul.time import (
format_dcp2_datetime,
parse_dcp2_datetime,
)
from azul.types import (
JSON,
)
Expand Down Expand Up @@ -235,7 +240,7 @@ def _parse_args(cls, argv):
default=None,
help='(Optional) Use the specified value for the '
'file versions instead of the current date. '
'Example: ' + datetime.now().strftime(cls.date_format))
'Example: ' + format_dcp2_datetime(datetime.now(timezone.utc)))
parser.add_argument('--catalog', '-c',
default=None,
help='(Optional) Only process projects that exist '
Expand All @@ -258,20 +263,20 @@ def _parse_args(cls, argv):
# TODO: parametrize `generation` variable
generation = 0

date_format = '%Y-%m-%dT%H:%M:%S.%fZ'

def __init__(self, argv: List[str]) -> None:
super().__init__()
self.args = self._parse_args(argv)
self.file_errors: MutableMapping[str, str] = {}
self.rows_completed: List[int] = []
self.validation_exceptions: MutableMapping[str, BaseException] = {}
if self.args.version is None:
self.timestamp = datetime.now().strftime(self.date_format)
self.timestamp = format_dcp2_datetime(datetime.now(timezone.utc))
else:
version = datetime.strptime(self.args.version, self.date_format)
assert self.args.version == version.strftime(self.date_format)
self.timestamp = self.args.version
d = parse_dcp2_datetime(self.args.version)
version_string = format_dcp2_datetime(d)
require(self.args.version == version_string,
f'{self.args.version!r} does not have correct syntax.')
self.timestamp = version_string
self.gcs = gcs.Client()
self.src_bucket, self.src_path = self._parse_gcs_url(self.args.source_area)
self.dst_bucket, self.dst_path = self._parse_gcs_url(self.args.staging_area)
Expand Down
7 changes: 5 additions & 2 deletions scripts/dss_v2_apdapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@
from azul.logging import (
configure_script_logging,
)
from azul.time import (
format_dcp2_datetime,
)
from azul.types import (
AnyJSON,
JSON,
Expand Down Expand Up @@ -390,7 +393,6 @@ def get_prefix_list(cls, prefix: str = None, start_prefix: str = None):


class BundleConverter:
file_version_format = '%Y-%m-%dT%H:%M:%S.%fZ'

def __init__(self,
bundle_fqid: BundleFQID,
Expand Down Expand Up @@ -585,7 +587,8 @@ def _format_file_version(self, old_version: str) -> str:
Convert the old file version syntax to the new syntax
"""
azul.dss.validate_version(old_version)
return datetime.strptime(old_version, azul.dss.version_format).strftime(self.file_version_format)
d = datetime.strptime(old_version, azul.dss.version_format)
return format_dcp2_datetime(d)

def links_json_new_name(self) -> str:
"""
Expand Down
28 changes: 18 additions & 10 deletions src/azul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1136,28 +1136,36 @@ class RequirementError(RuntimeError):

def require(condition: bool, *args, exception: type = RequirementError):
"""
Raise a RequirementError, or an instance of the given exception class, if the given condition is False.
Raise a RequirementError, or an instance of the given exception class, if
the given condition is False.
:param condition: the boolean condition to be required
:param condition: The boolean condition to be required.
:param args: optional positional arguments to be passed to the exception constructor. Typically only one such
argument should be provided: a string containing a textual description of the requirement.
:param args: optional positional arguments to be passed to the exception
constructor. Typically this should be a string containing a
textual description of the requirement, and optionally one or
more values involved in the required condition.
:param exception: a custom exception class to be instantiated and raised if the condition does not hold
:param exception: A custom exception class to be instantiated and raised if
the condition does not hold.
"""
reject(not condition, *args, exception=exception)


def reject(condition: bool, *args, exception: type = RequirementError):
"""
Raise a RequirementError, or an instance of the given exception class, if the given condition is True.
Raise a RequirementError, or an instance of the given exception class, if
the given condition is True.
:param condition: the boolean condition to be rejected
:param condition: The boolean condition to be rejected.
:param args: optional positional arguments to be passed to the exception constructor. Typically only one such
argument should be provided: a string containing a textual description of the rejected condition.
:param args: Optional positional arguments to be passed to the exception
constructor. Typically this should be a string containing a
textual description of the rejected condition, and optionally
one or more values involved in the rejected condition.
:param exception: a custom exception class to be instantiated and raised if the condition occurs
:param exception: A custom exception class to be instantiated and raised if
the condition occurs.
"""
if condition:
raise exception(*args)
Expand Down
15 changes: 12 additions & 3 deletions src/azul/indexer/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,11 +352,20 @@ class EntityAggregator(ABC):
def _transform_entity(self, entity: JSON) -> JSON:
return entity

def _get_accumulator(self, field) -> Optional[Accumulator]:
def _get_accumulator(self, field: str) -> Optional[Accumulator]:
"""
Return the Accumulator instance to be used for the given field or None if the field should not be accumulated.
Return the Accumulator instance to be used for the given field or None
if the field should not be accumulated.
"""
return SetAccumulator()
if field == 'submission_date':
return MinAccumulator()
elif field == 'update_date':
return MaxAccumulator()
else:
return self._get_default_accumulator()

def _get_default_accumulator(self) -> Optional[Accumulator]:
return SetAccumulator(max_size=100)

@abstractmethod
def aggregate(self, entities: Entities) -> Entities:
Expand Down
28 changes: 28 additions & 0 deletions src/azul/indexer/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
dataclass,
field,
)
from datetime import (
datetime,
timezone,
)
from enum import (
Enum,
auto,
Expand Down Expand Up @@ -40,6 +44,10 @@
SimpleSourceSpec,
SourceRef,
)
from azul.time import (
format_dcp2_datetime,
parse_dcp2_datetime,
)
from azul.types import (
AnyJSON,
AnyMutableJSON,
Expand Down Expand Up @@ -339,6 +347,26 @@ def from_index(self, value: Number) -> Optional[bool]:

null_bool: NullableBool = NullableBool()


class NullableDateTime(FieldType[Optional[datetime], str]):
es_type = 'date'
null = format_dcp2_datetime(datetime(1, 1, 1, tzinfo=timezone.utc))

def to_index(self, value: Optional[datetime]) -> str:
if value is None:
return self.null
else:
return format_dcp2_datetime(value)

def from_index(self, value: str) -> Optional[datetime]:
if value == self.null:
return None
else:
return parse_dcp2_datetime(value)


null_datetime: NullableDateTime = NullableDateTime()

FieldTypes4 = Union[Mapping[str, FieldType], Sequence[FieldType], FieldType]
FieldTypes3 = Union[Mapping[str, FieldTypes4], Sequence[FieldType], FieldType]
FieldTypes2 = Union[Mapping[str, FieldTypes3], Sequence[FieldType], FieldType]
Expand Down
42 changes: 23 additions & 19 deletions src/azul/plugins/metadata/hca/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ def _transform_entity(self, entity: JSON) -> JSON:
is_intermediate=entity['is_intermediate'],
count=(fqid, 1),
content_description=entity['content_description'],
matrix_cell_count=(fqid, entity.get('matrix_cell_count')))
matrix_cell_count=(fqid, entity.get('matrix_cell_count')),
submission_date=entity['submission_date'],
update_date=entity['update_date'])

def _group_keys(self, entity) -> Tuple[Any, ...]:
return (
Expand All @@ -88,19 +90,18 @@ def _get_accumulator(self, field) -> Optional[Accumulator]:
elif field in ('size', 'count', 'matrix_cell_count'):
return DistinctAccumulator(SumAccumulator())
else:
return None
return super()._get_accumulator(field)

def _get_default_accumulator(self) -> Optional[Accumulator]:
return None

class SampleAggregator(SimpleAggregator):

def _get_accumulator(self, field) -> Optional[Accumulator]:
return SetAccumulator(max_size=100)
class SampleAggregator(SimpleAggregator):
pass


class SpecimenAggregator(SimpleAggregator):

def _get_accumulator(self, field) -> Optional[Accumulator]:
return SetAccumulator(max_size=100)
pass


class CellSuspensionAggregator(GroupingAggregator):
Expand All @@ -118,13 +119,11 @@ def _get_accumulator(self, field) -> Optional[Accumulator]:
if field == 'total_estimated_cells':
return DistinctAccumulator(SumAccumulator())
else:
return SetAccumulator(max_size=100)
return super()._get_accumulator(field)


class CellLineAggregator(SimpleAggregator):

def _get_accumulator(self, field) -> Optional[Accumulator]:
return SetAccumulator(max_size=100)
pass


class DonorOrganismAggregator(SimpleAggregator):
Expand All @@ -147,13 +146,11 @@ def _get_accumulator(self, field) -> Optional[Accumulator]:
elif field == 'donor_count':
return UniqueValueCountAccumulator()
else:
return SetAccumulator(max_size=100)
return super()._get_accumulator(field)


class OrganoidAggregator(SimpleAggregator):

def _get_accumulator(self, field) -> Optional[Accumulator]:
return SetAccumulator(max_size=100)
pass


class ProjectAggregator(SimpleAggregator):
Expand All @@ -167,7 +164,7 @@ def _get_accumulator(self, field) -> Optional[Accumulator]:
'publications'):
return None
else:
return SetAccumulator(max_size=100)
return super()._get_accumulator(field)


class ProtocolAggregator(SimpleAggregator):
Expand All @@ -178,12 +175,19 @@ def _get_accumulator(self, field) -> Optional[Accumulator]:
elif field == 'assay_type':
return FrequencySetAccumulator(max_size=100)
else:
return SetAccumulator()
return super()._get_accumulator(field)

def _get_default_accumulator(self) -> Optional[Accumulator]:
return SetAccumulator()


class SequencingInputAggregator(SimpleAggregator):
pass


class SequencingProcessAggregator(SimpleAggregator):

def _get_accumulator(self, field) -> Optional[Accumulator]:
def _get_default_accumulator(self) -> Optional[Accumulator]:
return SetAccumulator(max_size=10)


Expand Down
Loading