Skip to content

Commit

Permalink
Add API to get date when state is released (#31)
Browse files Browse the repository at this point in the history
* change url

* change url

* tests

* fix tests failing and formatting

* open cv limit version

* data updated at

* date updated at remove dependency

* change variable names update_date to release_dates

* examples for get release date

* integration test change

* change example readme location for release_dates

* annotate response and return release date keys as State objects

* colab and readme update with release date keys as objects

* docker  update with release date keys as objects

* Update SICAR/tests/unit/sicar.py

* Update SICAR/tests/integration/sicar.py

* Update SICAR/sicar.py

* Update SICAR/sicar.py

* Update SICAR/sicar.py

---------

Co-authored-by: Gilson Urbano <guf@hotmail.com.br>
  • Loading branch information
rupestre-campos and urbanogilson authored Aug 7, 2024
1 parent b79e613 commit 440685d
Show file tree
Hide file tree
Showing 11 changed files with 169 additions and 23 deletions.
43 changes: 23 additions & 20 deletions .github/interrogate_badge.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ car = Sicar()

# Download APPS polygon for the PA state
car.download_state(State.PA, Polygon.APPS)

# Get release date for all states as a dict
release_dates = car.get_release_dates()
print(release_dates.get(State.PA))
# '03/08/2024'
```

### OCR drivers
Expand Down
17 changes: 17 additions & 0 deletions SICAR/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
StateCodeNotValidException: Exception raised when an invalid state code is encountered.
FailedToDownloadCaptchaException: Exception raised when downloading a captcha fails.
FailedToDownloadPolygonException: Exception raised when downloading a polygon fails.
FailedToGetReleaseDateException: Exception raised when downloading release date fails.
"""


Expand Down Expand Up @@ -107,3 +108,19 @@ def __init__(self):
None
"""
super().__init__("Failed to download polygon!")


class FailedToGetReleaseDateException(Exception):
"""Exception raised when get release date fails."""

def __init__(self):
"""
Initialize an instance of FailedToGetReleaseDateException.
Parameters:
None
Returns:
None
"""
super().__init__("Failed to get release date!")
49 changes: 48 additions & 1 deletion SICAR/sicar.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import random
import httpx
from PIL import Image, UnidentifiedImageError
from bs4 import BeautifulSoup
from tqdm import tqdm
from typing import Dict
from pathlib import Path
Expand All @@ -28,6 +29,7 @@
StateCodeNotValidException,
FailedToDownloadCaptchaException,
FailedToDownloadPolygonException,
FailedToGetReleaseDateException,
)


Expand Down Expand Up @@ -62,6 +64,36 @@ def __init__(
self._create_session(headers=headers)
self._initialize_cookies()

def _parse_release_dates(self, response: bytes) -> Dict:
"""
Parse raw html getting states and release date.
Parameters:
response (bytes): The request content as byte string containing html page from SICAR with release dates per state
Returns:
Dict: A dict containing state sign as keys and parsed update date as value.
"""
html_content = response.decode("utf-8")

soup = BeautifulSoup(html_content, "html.parser")

state_dates = {}

for state_block in soup.find_all("div", class_="listagem-estados"):
button_tag = state_block.find(
"button", class_="btn-abrir-modal-download-base-poligono"
)
state = button_tag.get("data-estado") if button_tag else None

date_tag = state_block.find("div", class_="data-disponibilizacao")
date = date_tag.get_text(strip=True) if date_tag else None

if state in iter(State) and date:
state_dates[State(state)] = date

return state_dates

def _create_session(self, headers: Dict = None):
"""
Create a new session for making HTTP requests.
Expand Down Expand Up @@ -194,7 +226,6 @@ def _download_polygon(

if content_length == 0 or not content_type.startswith("application/zip"):
raise FailedToDownloadPolygonException()

path = Path(
os.path.join(folder, f"{state.value}_{polygon.value}")
).with_suffix(".zip")
Expand Down Expand Up @@ -325,3 +356,19 @@ def download_country(
debug=debug,
chunk_size=chunk_size,
)

def get_release_dates(self) -> Dict:
"""
Get release date for each state in SICAR system.
Returns:
Dict: A dict containing state sign as keys and release date as string in dd/mm/yyyy format.
Raises:
FailedToGetReleaseDateException: If the page with release date fails to load.
"""
try:
response = self._get(f"{self._RELEASE_DATE}")
return self._parse_release_dates(response.content)
except UrlNotOkException as error:
raise FailedToGetReleaseDateException() from error
3 changes: 3 additions & 0 deletions SICAR/tests/integration/sicar.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ def test_download_state(self):
self._car.download_state(State.RR, Polygon.AREA_FALL, debug=True),
Path,
)

def test_get_release_dates(self):
self.assertIsInstance(self._car.get_release_dates(), dict)
6 changes: 6 additions & 0 deletions SICAR/tests/unit/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
StateCodeNotValidException,
FailedToDownloadCaptchaException,
FailedToDownloadPolygonException,
FailedToGetReleaseDateException,
)


Expand Down Expand Up @@ -36,3 +37,8 @@ def test_failed_to_download_polygon_exception(self):
with self.assertRaises(FailedToDownloadPolygonException) as context:
raise FailedToDownloadPolygonException()
self.assertEqual(str(context.exception), "Failed to download polygon!")

def test_failed_to_get_release_dates_exception(self):
with self.assertRaises(FailedToGetReleaseDateException) as context:
raise FailedToGetReleaseDateException()
self.assertEqual(str(context.exception), "Failed to get release date!")
31 changes: 31 additions & 0 deletions SICAR/tests/unit/sicar.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
StateCodeNotValidException,
FailedToDownloadCaptchaException,
FailedToDownloadPolygonException,
FailedToGetReleaseDateException,
)


Expand Down Expand Up @@ -370,3 +371,33 @@ def test_download_country(self, mock_mkdir, mock_download_state):

mock_mkdir.assert_has_calls(expected_calls["path"])
mock_download_state.assert_has_calls(expected_calls["download_state"])

def test_get_release_dates_success(self):
html_content = (
b'<div class="listagem-estados">'
b'<div class="data-disponibilizacao"><i>04/08/2024</i></div>'
b'<button type="button" class="btn-abrir-modal-download-base-poligono"'
b'data-estado="AC" data-nome-estado="Acre"></button>'
)

mock_response = MagicMock(status_code=httpx.codes.OK, content=html_content)
sicar = Sicar(driver=self.mocked_captcha)
sicar._get = MagicMock(return_value=mock_response)

update_dates = sicar.get_release_dates()

sicar._get.assert_called_once_with(
f"https://consultapublica.car.gov.br/publico/estados/downloads"
)

self.assertEqual(update_dates, {State.AC: "04/08/2024"})

def test_get_release_dates_failure(self):
sicar = Sicar(driver=self.mocked_captcha)
sicar._session.get = MagicMock(
return_value=MagicMock(status_code=httpx.codes.NOT_FOUND)
)
with self.assertRaises(FailedToGetReleaseDateException):
sicar.get_release_dates()

sicar._session.get.assert_called_once()
1 change: 1 addition & 0 deletions SICAR/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ class Url:
_INDEX = f"{_BASE}/imoveis/index"
_DOWNLOAD_BASE = f"{_BASE}/estados/downloadBase"
_RECAPTCHA = f"{_BASE}/municipios/ReCaptcha"
_RELEASE_DATE = f"{_BASE}/estados/downloads"
31 changes: 30 additions & 1 deletion examples/colab.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 2,
"metadata": {
"id": "mh1l468ry2pk"
},
Expand All @@ -93,6 +93,35 @@
"car = Sicar()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get release dates for each state"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Release date for RR is: 03/07/2024\n"
]
}
],
"source": [
"# Get release dates\n",
"release_dates = car.get_release_dates()\n",
"# get a single state value\n",
"print(f\"Release date for RR is: {release_dates.get(State.RR)}\")\n",
"\n",
"# print(f\"Release date for RR is: {release_dates.get('RR')}\") # Also valid to use string 'RR'"
]
},
{
"cell_type": "markdown",
"metadata": {
Expand Down
5 changes: 4 additions & 1 deletion examples/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
car = Sicar(driver=Tesseract)
# car = Sicar(driver=Paddle)

# Download APPS polygons for the Roraima state
# Download APPS polygons for the Roraima state
car.download_state(state=State.RR, polygon=Polygon.APPS, folder="data/Roraima", debug=True)

# Download APPS polygons for all states in Brazil
# car.download_country(polygon=Polygon.APPS, folder="/Brazil")

# Get release date for all states
release_dates = car.get_release_dates()
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies = [
"numpy>=1.22.4, <2",
"tqdm>=4.65.0",
"matplotlib>=3.7.1",
"beautifulsoup4>=4.0.0, <4.20.0"
]

[project.optional-dependencies]
Expand Down

0 comments on commit 440685d

Please sign in to comment.