Skip to content

Commit

Permalink
Make download_hash.sha1 optional
Browse files Browse the repository at this point in the history
  • Loading branch information
HebaruSan committed Oct 1, 2024
1 parent ce75185 commit 26e8393
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 3 deletions.
14 changes: 13 additions & 1 deletion netkan/netkan/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from hashlib import sha1
import uuid
import urllib.parse
from string import Template
from typing import Optional, List, Tuple, Union, Any, Dict, TYPE_CHECKING
from ruamel.yaml import YAML
import dateutil.parser
Expand Down Expand Up @@ -320,6 +321,7 @@ def __str__(self) -> str:
ISODATETIME_PROPERTIES = [
'release_date'
]
MIRROR_FILENAME_TEMPLATE = Template('$prefix-$identifier-$version.$extension')

def __init__(self, filename: Optional[Union[str, Path]] = None, contents: Optional[str] = None) -> None:
if filename:
Expand Down Expand Up @@ -438,7 +440,11 @@ def redistributable(self) -> bool:
def mirror_filename(self, with_epoch: bool = True) -> Optional[str]:
if 'download_hash' not in self._raw:
return None
return f'{self.download_hash["sha1"][0:8]}-{self.identifier}-{self._format_version(with_epoch)}.{Ckan.MIME_TO_EXTENSION[self.download_content_type]}'
return self.MIRROR_FILENAME_TEMPLATE.safe_substitute(
prefix=self._mirror_prefix(),
identifier=self.identifier,
version=self._format_version(with_epoch),
extension=Ckan.MIME_TO_EXTENSION[self.download_content_type])

def mirror_download(self, with_epoch: bool = True) -> Optional[str]:
filename = self.mirror_filename(with_epoch)
Expand All @@ -450,6 +456,12 @@ def mirror_item(self, with_epoch: bool = True) -> str:
return self._ia_bucket_sanitize(
f'{self.identifier}-{self._format_version(with_epoch)}')

def _mirror_prefix(self) -> str:
return (self.download_hash['sha1']
if 'sha1' in self.download_hash
else self.download_hash['sha256']
)[0:8]

# InternetArchive says:
# Bucket names should be valid archive identifiers;
# try someting matching this regular expression:
Expand Down
25 changes: 23 additions & 2 deletions netkan/netkan/mirrorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,22 @@ def mirrored(self, iarchive: internetarchive.session.ArchiveSession) -> bool:
return False
if not item.exists:
return False
sha1 = self.download_hash['sha1'].lower()
return any(file['sha1'].lower() == sha1 for file in item.files if 'sha1' in file)
sha1 = self._sha1()
if sha1 is None:
return False
return any(file['sha1'].lower() == sha1
for file in item.files
if 'sha1' in file)

def _sha1(self) -> Optional[str]:
if 'sha1' in self.download_hash:
# Use hash from metadata if set
return self.download_hash['sha1'].lower()
dl_io = self.open_download()
if dl_io is not None:
# Calculate hash from file if found
return self.large_file_sha1(dl_io)
return None

def license_urls(self) -> List[str]:
return [self.LICENSE_URLS[lic]
Expand Down Expand Up @@ -158,6 +172,13 @@ def large_file_sha256(file: BinaryIO, block_size: int = 8192) -> str:
sha.update(block)
return sha.hexdigest().upper()

@staticmethod
def large_file_sha1(file: BinaryIO, block_size: int = 8192) -> str:
sha = hashlib.sha1()
for block in iter(lambda: file.read(block_size), b''):
sha.update(block)
return sha.hexdigest().upper()

def open_if_hash_match(self, path: Path) -> Optional[BinaryIO]:
"""Check whether the file located at the given path matches our sha256.
Expand Down

0 comments on commit 26e8393

Please sign in to comment.