Skip to content

Commit

Permalink
Drop references to removed media_type_parameters (#662)
Browse files Browse the repository at this point in the history
We removed this functionality from the database in edgi-govdata-archiving/web-monitoring-db#752.
  • Loading branch information
Mr0grog authored Nov 13, 2020
1 parent c2494d9 commit 06e3e51
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
11 changes: 10 additions & 1 deletion web_monitoring/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@
'application/x-pdf',
))

# Identifies a bare media type (that is, one without parameters)
MEDIA_TYPE_EXPRESSION = re.compile(r'^\w+/\w[\w+_\-.]+$')


# These functions lump together library code into monolithic operations for the
# CLI. They also print. To access this functionality programmatically, it is
Expand Down Expand Up @@ -387,7 +390,6 @@ def format_memento(self, memento, cdx_record, maintainers, tags):
capture_time=iso_date,
uri=cdx_record.raw_url,
media_type=media_type or None,
media_type_parameters=media_type_parameters or None,
version_hash=utils.hash_content(memento.content),
source_type='internet_archive',
source_metadata=metadata,
Expand All @@ -398,6 +400,13 @@ def get_memento_media(self, memento):
"""Extract media type and media type parameters from a memento."""
media, *parameters = memento.headers.get('Content-Type', '').split(';')

# Clean up media type
media = media.strip().lower()
if not MEDIA_TYPE_EXPRESSION.match(media):
original = memento.history[0] if memento.history else memento
logger.info('Unknown media type "%s" for "%s"', media, original.memento_url)
media = ''

# Clean up whitespace, remove empty parameters, etc.
clean_parameters = (param.strip() for param in parameters)
parameters = [param for param in clean_parameters if param]
Expand Down
11 changes: 4 additions & 7 deletions web_monitoring/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ def _time_range_string(start_date, end_date):


def _build_version(*, page_id, uuid, capture_time, uri, hash, source_type,
title, source_metadata=None, media_type=None,
media_type_parameters=None):
title, source_metadata=None, media_type=None):
"""
Build a Version dict from parameters, performing some validation.
"""
Expand All @@ -93,16 +92,15 @@ def _build_version(*, page_id, uuid, capture_time, uri, hash, source_type,
'source_type': str(source_type),
'title': str(title),
'source_metadata': source_metadata,
'media_type': media_type,
'media_type_parameters': media_type_parameters}
'media_type': media_type}
return version


def _build_importable_version(*, page_url, uuid=None, capture_time, uri,
version_hash, source_type, title,
page_maintainers=None, page_tags=None,
source_metadata=None, status=None,
media_type=None, media_type_parameters=None):
media_type=None):
"""
Build a Version dict from parameters, performing some validation.
Expand All @@ -124,8 +122,7 @@ def _build_importable_version(*, page_url, uuid=None, capture_time, uri,
'status': str(status),
'page_maintainers': page_maintainers,
'page_tags': page_tags,
'media_type': media_type,
'media_type_parameters': media_type_parameters}
'media_type': media_type}
return version


Expand Down

0 comments on commit 06e3e51

Please sign in to comment.