Skip to content

Commit

Permalink
[r] Expose 'submission_date' and 'update_date' (#2562, #3310, PR #3026)
Browse files Browse the repository at this point in the history
  • Loading branch information
jessebrennan authored and hannes-ucsc committed Aug 24, 2021
2 parents 4520816 + 54f948d commit 384e116
Show file tree
Hide file tree
Showing 21 changed files with 1,561 additions and 342 deletions.
32 changes: 32 additions & 0 deletions CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,38 @@ Static methods
higher order concerns than the one about whether a method's body currently
references self or not.

Error messages
--------------

* We avoid the use of f-strings when composing error messages for exceptions and
for use with ``require()`` or ``reject()``. If an error message is included,
it should be short, descriptive of the error encountered, and optionally
followed by the relevant value(s) involved::

raise KeyError(key)

raise ValueError('Unknown file type', file_type)

* Requirement errors should always have a message, since they are intended for
clients/users::

require(delay >= 0, 'Delay value must be non-negative')

require(url.scheme == 'https', "Unexpected URL scheme (should be 'https')", url.scheme)

reject(entity_id is None, 'Must supply an entity ID')

reject(file_path.endswith('/'), 'Path must not end in slash', path)

* Assertions are usually self-explanatory. Error messages should only be
included when they are not::

assert not debug

assert isinstance(x, list), type(x)

assert x == y, ('Misreticulated splines', x, y)

Catching exceptions
-------------------

Expand Down
19 changes: 12 additions & 7 deletions scripts/cgm_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import csv
from datetime import (
datetime,
timezone,
)
import json
import logging
Expand Down Expand Up @@ -63,6 +64,10 @@
from azul.logging import (
configure_script_logging,
)
from azul.time import (
format_dcp2_datetime,
parse_dcp2_datetime,
)
from azul.types import (
JSON,
)
Expand Down Expand Up @@ -235,7 +240,7 @@ def _parse_args(cls, argv):
default=None,
help='(Optional) Use the specified value for the '
'file versions instead of the current date. '
'Example: ' + datetime.now().strftime(cls.date_format))
'Example: ' + format_dcp2_datetime(datetime.now(timezone.utc)))
parser.add_argument('--catalog', '-c',
default=None,
help='(Optional) Only process projects that exist '
Expand All @@ -258,20 +263,20 @@ def _parse_args(cls, argv):
# TODO: parametrize `generation` variable
generation = 0

date_format = '%Y-%m-%dT%H:%M:%S.%fZ'

def __init__(self, argv: List[str]) -> None:
super().__init__()
self.args = self._parse_args(argv)
self.file_errors: MutableMapping[str, str] = {}
self.rows_completed: List[int] = []
self.validation_exceptions: MutableMapping[str, BaseException] = {}
if self.args.version is None:
self.timestamp = datetime.now().strftime(self.date_format)
self.timestamp = format_dcp2_datetime(datetime.now(timezone.utc))
else:
version = datetime.strptime(self.args.version, self.date_format)
assert self.args.version == version.strftime(self.date_format)
self.timestamp = self.args.version
d = parse_dcp2_datetime(self.args.version)
version_string = format_dcp2_datetime(d)
require(self.args.version == version_string,
f'{self.args.version!r} does not have correct syntax.')
self.timestamp = version_string
self.gcs = gcs.Client()
self.src_bucket, self.src_path = self._parse_gcs_url(self.args.source_area)
self.dst_bucket, self.dst_path = self._parse_gcs_url(self.args.staging_area)
Expand Down
7 changes: 5 additions & 2 deletions scripts/dss_v2_apdapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@
from azul.logging import (
configure_script_logging,
)
from azul.time import (
format_dcp2_datetime,
)
from azul.types import (
AnyJSON,
JSON,
Expand Down Expand Up @@ -390,7 +393,6 @@ def get_prefix_list(cls, prefix: str = None, start_prefix: str = None):


class BundleConverter:
file_version_format = '%Y-%m-%dT%H:%M:%S.%fZ'

def __init__(self,
bundle_fqid: BundleFQID,
Expand Down Expand Up @@ -585,7 +587,8 @@ def _format_file_version(self, old_version: str) -> str:
Convert the old file version syntax to the new syntax
"""
azul.dss.validate_version(old_version)
return datetime.strptime(old_version, azul.dss.version_format).strftime(self.file_version_format)
d = datetime.strptime(old_version, azul.dss.version_format)
return format_dcp2_datetime(d)

def links_json_new_name(self) -> str:
"""
Expand Down
28 changes: 18 additions & 10 deletions src/azul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1136,28 +1136,36 @@ class RequirementError(RuntimeError):

def require(condition: bool, *args, exception: type = RequirementError):
"""
Raise a RequirementError, or an instance of the given exception class, if the given condition is False.
Raise a RequirementError, or an instance of the given exception class, if
the given condition is False.
:param condition: the boolean condition to be required
:param condition: The boolean condition to be required.
:param args: optional positional arguments to be passed to the exception constructor. Typically only one such
argument should be provided: a string containing a textual description of the requirement.
:param args: optional positional arguments to be passed to the exception
constructor. Typically this should be a string containing a
textual description of the requirement, and optionally one or
more values involved in the required condition.
:param exception: a custom exception class to be instantiated and raised if the condition does not hold
:param exception: A custom exception class to be instantiated and raised if
the condition does not hold.
"""
reject(not condition, *args, exception=exception)


def reject(condition: bool, *args, exception: type = RequirementError):
"""
Raise a RequirementError, or an instance of the given exception class, if the given condition is True.
Raise a RequirementError, or an instance of the given exception class, if
the given condition is True.
:param condition: the boolean condition to be rejected
:param condition: The boolean condition to be rejected.
:param args: optional positional arguments to be passed to the exception constructor. Typically only one such
argument should be provided: a string containing a textual description of the rejected condition.
:param args: Optional positional arguments to be passed to the exception
constructor. Typically this should be a string containing a
textual description of the rejected condition, and optionally
one or more values involved in the rejected condition.
:param exception: a custom exception class to be instantiated and raised if the condition occurs
:param exception: A custom exception class to be instantiated and raised if
the condition occurs.
"""
if condition:
raise exception(*args)
Expand Down
15 changes: 12 additions & 3 deletions src/azul/indexer/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,11 +352,20 @@ class EntityAggregator(ABC):
def _transform_entity(self, entity: JSON) -> JSON:
return entity

def _get_accumulator(self, field) -> Optional[Accumulator]:
def _get_accumulator(self, field: str) -> Optional[Accumulator]:
"""
Return the Accumulator instance to be used for the given field or None if the field should not be accumulated.
Return the Accumulator instance to be used for the given field or None
if the field should not be accumulated.
"""
return SetAccumulator()
if field == 'submission_date':
return MinAccumulator()
elif field == 'update_date':
return MaxAccumulator()
else:
return self._get_default_accumulator()

def _get_default_accumulator(self) -> Optional[Accumulator]:
return SetAccumulator(max_size=100)

@abstractmethod
def aggregate(self, entities: Entities) -> Entities:
Expand Down
28 changes: 28 additions & 0 deletions src/azul/indexer/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
dataclass,
field,
)
from datetime import (
datetime,
timezone,
)
from enum import (
Enum,
auto,
Expand Down Expand Up @@ -40,6 +44,10 @@
SimpleSourceSpec,
SourceRef,
)
from azul.time import (
format_dcp2_datetime,
parse_dcp2_datetime,
)
from azul.types import (
AnyJSON,
AnyMutableJSON,
Expand Down Expand Up @@ -339,6 +347,26 @@ def from_index(self, value: Number) -> Optional[bool]:

null_bool: NullableBool = NullableBool()


class NullableDateTime(FieldType[Optional[datetime], str]):
es_type = 'date'
null = format_dcp2_datetime(datetime(1, 1, 1, tzinfo=timezone.utc))

def to_index(self, value: Optional[datetime]) -> str:
if value is None:
return self.null
else:
return format_dcp2_datetime(value)

def from_index(self, value: str) -> Optional[datetime]:
if value == self.null:
return None
else:
return parse_dcp2_datetime(value)


null_datetime: NullableDateTime = NullableDateTime()

FieldTypes4 = Union[Mapping[str, FieldType], Sequence[FieldType], FieldType]
FieldTypes3 = Union[Mapping[str, FieldTypes4], Sequence[FieldType], FieldType]
FieldTypes2 = Union[Mapping[str, FieldTypes3], Sequence[FieldType], FieldType]
Expand Down
42 changes: 23 additions & 19 deletions src/azul/plugins/metadata/hca/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ def _transform_entity(self, entity: JSON) -> JSON:
is_intermediate=entity['is_intermediate'],
count=(fqid, 1),
content_description=entity['content_description'],
matrix_cell_count=(fqid, entity.get('matrix_cell_count')))
matrix_cell_count=(fqid, entity.get('matrix_cell_count')),
submission_date=entity['submission_date'],
update_date=entity['update_date'])

def _group_keys(self, entity) -> Tuple[Any, ...]:
return (
Expand All @@ -88,19 +90,18 @@ def _get_accumulator(self, field) -> Optional[Accumulator]:
elif field in ('size', 'count', 'matrix_cell_count'):
return DistinctAccumulator(SumAccumulator())
else:
return None
return super()._get_accumulator(field)

def _get_default_accumulator(self) -> Optional[Accumulator]:
return None

class SampleAggregator(SimpleAggregator):

def _get_accumulator(self, field) -> Optional[Accumulator]:
return SetAccumulator(max_size=100)
class SampleAggregator(SimpleAggregator):
pass


class SpecimenAggregator(SimpleAggregator):

def _get_accumulator(self, field) -> Optional[Accumulator]:
return SetAccumulator(max_size=100)
pass


class CellSuspensionAggregator(GroupingAggregator):
Expand All @@ -118,13 +119,11 @@ def _get_accumulator(self, field) -> Optional[Accumulator]:
if field == 'total_estimated_cells':
return DistinctAccumulator(SumAccumulator())
else:
return SetAccumulator(max_size=100)
return super()._get_accumulator(field)


class CellLineAggregator(SimpleAggregator):

def _get_accumulator(self, field) -> Optional[Accumulator]:
return SetAccumulator(max_size=100)
pass


class DonorOrganismAggregator(SimpleAggregator):
Expand All @@ -147,13 +146,11 @@ def _get_accumulator(self, field) -> Optional[Accumulator]:
elif field == 'donor_count':
return UniqueValueCountAccumulator()
else:
return SetAccumulator(max_size=100)
return super()._get_accumulator(field)


class OrganoidAggregator(SimpleAggregator):

def _get_accumulator(self, field) -> Optional[Accumulator]:
return SetAccumulator(max_size=100)
pass


class ProjectAggregator(SimpleAggregator):
Expand All @@ -167,7 +164,7 @@ def _get_accumulator(self, field) -> Optional[Accumulator]:
'publications'):
return None
else:
return SetAccumulator(max_size=100)
return super()._get_accumulator(field)


class ProtocolAggregator(SimpleAggregator):
Expand All @@ -178,12 +175,19 @@ def _get_accumulator(self, field) -> Optional[Accumulator]:
elif field == 'assay_type':
return FrequencySetAccumulator(max_size=100)
else:
return SetAccumulator()
return super()._get_accumulator(field)

def _get_default_accumulator(self) -> Optional[Accumulator]:
return SetAccumulator()


class SequencingInputAggregator(SimpleAggregator):
pass


class SequencingProcessAggregator(SimpleAggregator):

def _get_accumulator(self, field) -> Optional[Accumulator]:
def _get_default_accumulator(self) -> Optional[Accumulator]:
return SetAccumulator(max_size=10)


Expand Down
Loading

0 comments on commit 384e116

Please sign in to comment.