From 0ff199bd382bcbbcb0d731a6418783ce3d6d94b6 Mon Sep 17 00:00:00 2001 From: kkaris Date: Tue, 13 Aug 2024 05:59:56 -0700 Subject: [PATCH] Fix DGI (#45) * Update to handle dgi v5 new homepage * Remove unused import --- src/bioversions/sources/dgi.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/bioversions/sources/dgi.py b/src/bioversions/sources/dgi.py index 93019b00..83c26d15 100644 --- a/src/bioversions/sources/dgi.py +++ b/src/bioversions/sources/dgi.py @@ -2,14 +2,13 @@ """A getter for the `Drug Gene Interaction Database (DGI-DB) `_.""" -import os - import bs4 +import dateutil.parser import requests from bioversions.utils import Getter, VersionType -DOWNLOADS_PAGE = "https://www.dgidb.org/downloads" +GITHUB_PAGE = "https://github.com/dgidb/dgidb-v5" class DGIGetter(Getter): @@ -21,14 +20,14 @@ class DGIGetter(Getter): def get(self): """Get the latest DGI version number.""" - res = requests.get(DOWNLOADS_PAGE) - soup = bs4.BeautifulSoup(res.content, parser="lxml", features="lxml") - cells = list(soup.select("table#tsv_downloads tbody tr:first-child td:nth-child(2) a")) - if 1 != len(cells): + res = requests.get(GITHUB_PAGE) + soup = bs4.BeautifulSoup(res.content) + time_tag = soup.find("relative-time") + if time_tag is None: raise ValueError - cell = cells[0] - href = cell["href"] - version = os.path.dirname(os.path.relpath(href, "data/monthly_tsvs")) + datetime_str = time_tag.attrs["datetime"] + dt_obj = dateutil.parser.parse(datetime_str) + version = dt_obj.strftime(self.date_version_fmt) return version