Skip to content

Commit

Permalink
Get rid of etags
Browse files Browse the repository at this point in the history
Etags are meant for transient usage in browsers and are not meant for
any long term usage.
Fixes: aboutcode-org#321

Signed-off-by: Hritik Vijay <hritikxx8@gmail.com>
  • Loading branch information
Hritik14 committed Jan 26, 2022
1 parent 90fd963 commit 457c91f
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 63 deletions.
28 changes: 0 additions & 28 deletions vulnerabilities/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,34 +67,6 @@ def fetch_yaml(url):
return saneyaml.load(response.content)


# FIXME: this is NOT how etags work .
# We should instead send the proper HTTP header
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match
# and integrate this finely in the processing as this typically needs to use
# streaming=True requests, and proper handling of the HTTP return code
# In all cases this ends up being a single request, not a HEADD followed
# by another real request
def create_etag(data_src, url, etag_key):
"""
Etags are like hashes of web responses. For a data source `data_src`,
we maintain (url, etag) mappings in the DB. `create_etag` creates
(`url`, etag) pair. If a (`url`, etag) already exists then the code
skips processing the response further to avoid duplicate work.
`etag_key` is the name of header which contains the etag for the url.
"""
etag = requests.head(url).headers.get(etag_key)
if not etag:
return True

elif url in data_src.config.etags:
if data_src.config.etags[url] == etag:
return False

data_src.config.etags[url] = etag
return True


def contains_alpha(string):
"""
Return True if the input 'string' contains any alphabet
Expand Down
36 changes: 1 addition & 35 deletions vulnerabilities/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,43 +20,9 @@
# VulnerableCode is a free software tool from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.

import dataclasses
from unittest import TestCase
from unittest.mock import patch
from unittest.mock import MagicMock

from vulnerabilities.data_source import DataSource
from vulnerabilities.helpers import create_etag


@dataclasses.dataclass
class DummyDataSourceConfiguration:
etags: dict


class DummyDataSource(DataSource):
CONFIG_CLASS = DummyDataSourceConfiguration


class TestHelpers(TestCase):
@classmethod
def setUpClass(cls):
data_source_cfg = {"etags": {}}
cls.data_source = DummyDataSource(config=data_source_cfg)

def test_create_etag(self):
assert self.data_source.config.etags == {}

mock_response = MagicMock()
mock_response.headers = {"ETag": "0x1234"}

with patch("vulnerabilities.helpers.requests.head", return_value=mock_response):
assert (
create_etag(data_src=self.data_source, url="https://example.org", etag_key="ETag")
is True
)
assert self.data_source.config.etags == {"https://example.org": "0x1234"}
assert (
create_etag(data_src=self.data_source, url="https://example.org", etag_key="ETag")
is False
)
...

0 comments on commit 457c91f

Please sign in to comment.