Skip to content

Commit

Permalink
tests: add setup_resources_exceptions in conftest and fix test_except…
Browse files Browse the repository at this point in the history
…ion_analysis
  • Loading branch information
bolinocroustibat committed Sep 3, 2024
1 parent a9fd037 commit 86ff5bd
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 12 deletions.
14 changes: 12 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import hashlib
import logging
import os
import uuid
from datetime import datetime
Expand All @@ -22,11 +23,14 @@

DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@localhost:5433/postgres")
RESOURCE_ID = "c4e3a9fb-4415-488e-ba57-d05269b27adf"
EXCEPTION_RESOURCE_ID = "d4e3a9fb-4415-488e-ba57-d05269b27adf"
DATASET_ID = "601ddcfc85a59c3a45c2435a"
pytestmark = pytest.mark.asyncio

nest_asyncio.apply()

log = logging.getLogger("udata-hydra")


def dummy(return_value=None):
"""
Expand Down Expand Up @@ -135,8 +139,14 @@ def setup_catalog(catalog_content, rmock):


@pytest.fixture
async def setup_resources_exceptions():
await ResourceException.insert(resource_id="c4e3a9fb-4415-488e-ba57-d05269b27adf")
async def setup_resources_exceptions(setup_catalog):
try:
await Resource.insert(
dataset_id=DATASET_ID, resource_id=EXCEPTION_RESOURCE_ID, url="http://example.com/"
)
except asyncpg.exceptions.UniqueViolationError:
log.warning("Resource already exists")
await ResourceException.insert(resource_id=EXCEPTION_RESOURCE_ID)


@pytest.fixture
Expand Down
18 changes: 8 additions & 10 deletions tests/test_csv_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
from tempfile import NamedTemporaryFile

import pytest
from asyncpg import Record
from asyncpg.exceptions import UndefinedTableError
from yarl import URL

from udata_hydra import config
from udata_hydra.analysis.csv import analyse_csv, csv_to_db
from udata_hydra.db.resource import Resource
from udata_hydra.db.resource_exception import ResourceException

from .conftest import RESOURCE_ID
from .conftest import EXCEPTION_RESOURCE_ID, RESOURCE_ID

pytestmark = pytest.mark.asyncio

Expand Down Expand Up @@ -86,18 +88,14 @@ async def test_analyse_csv_big_file(setup_catalog, rmock, db, fake_check, produc
assert profile["total_lines"] == expected_count


async def test_exception_analysis(
setup_catalog, setup_resources_exceptions, rmock, db, fake_check, produce_mock
):
async def test_exception_analysis(setup_resources_exceptions, rmock, db, fake_check, produce_mock):
"""
Tests that exception resources (files that are too large to be normally processed) are indeed processed.
"""
save_config = config.MAX_FILESIZE_ALLOWED
config.override(MAX_FILESIZE_ALLOWED={"csv": 5000})
await db.execute(
f"UPDATE catalog SET resource_id = '{config.LARGE_RESOURCES_EXCEPTIONS[0]}' WHERE id=1"
)
check = await fake_check(resource_id=config.LARGE_RESOURCES_EXCEPTIONS[0])
exception: Record = await ResourceException.get_by_resource_id(EXCEPTION_RESOURCE_ID)
check = await fake_check(resource_id=exception["resource_id"])
filename, expected_count = ("20190618-annuaire-diagnostiqueurs.csv", 45522)
url = check["url"]
table_name = hashlib.md5(url.encode("utf-8")).hexdigest()
Expand All @@ -106,14 +104,14 @@ async def test_exception_analysis(
rmock.get(url, status=200, body=data)

# Check resource status before analysis
resource = await Resource.get(config.LARGE_RESOURCES_EXCEPTIONS[0])
resource = await Resource.get(exception["resource_id"])
assert resource["status"] is None

# Analyse the CSV
await analyse_csv(check_id=check["id"])

# Check resource status after analysis
resource = await Resource.get(config.LARGE_RESOURCES_EXCEPTIONS[0])
resource = await Resource.get(exception["resource_id"])
assert resource["status"] is None

count = await db.fetchrow(f'SELECT count(*) AS count FROM "{table_name}"')
Expand Down

0 comments on commit 86ff5bd

Please sign in to comment.