diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml index ca08f6bb..72d90ed9 100644 --- a/.github/workflows/build-and-release.yml +++ b/.github/workflows/build-and-release.yml @@ -33,8 +33,8 @@ jobs: - name: Install python dependencies run: | - python -m pip install --upgrade pip - pip install setuptools wheel coveralls check-manifest + python -m pip install pip==24.0 + pip install setuptools wheel check-manifest pip install -e .[tests] - name: Show python dependencies @@ -55,22 +55,6 @@ jobs: run: | docker-compose -f docker-compose.test.py2.yml run --rm ${{ matrix.suite }} - - name: Coveralls - uses: AndreMiras/coveralls-python-action@v20201129 - with: - parallel: true - github-token: ${{ secrets.github_token }} - flag-name: run-${{ matrix.test_number }} - - Coveralls: - needs: Test - runs-on: ubuntu-latest - steps: - - name: Coveralls Finished - uses: AndreMiras/coveralls-python-action@v20201129 - with: - github-token: ${{ secrets.github_token }} - parallel-finished: true Release: if: ${{ github.event_name == 'push' }} @@ -85,7 +69,7 @@ jobs: - name: Install python dependencies run: | pip install --user --upgrade pip - pip --no-cache-dir install --user setuptools wheel coveralls check-manifest + pip --no-cache-dir install --user setuptools wheel check-manifest pip --no-cache-dir install --user -e .[tests] - name: Build package diff --git a/.github/workflows/test-py3.yml b/.github/workflows/test-py3.yml new file mode 100644 index 00000000..2d0c4acb --- /dev/null +++ b/.github/workflows/test-py3.yml @@ -0,0 +1,38 @@ +name: Test python 3 + +on: + push: + branches: [celery-5] + pull_request: + branches: [celery-5] + + +jobs: + Test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.11", "3.12"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install python dependencies + run: | + python -m pip install --upgrade pip + pip install -e .[all] + + - name: Show python dependencies + run: | + pip freeze + + - name: Run tests + run: | + py.test -vv tests/unit diff --git a/README.rst b/README.rst index e1194a3d..658db8bf 100644 --- a/README.rst +++ b/README.rst @@ -14,9 +14,6 @@ .. image:: https://img.shields.io/travis/inspirehep/hepcrawl.svg :target: https://travis-ci.org/inspirehep/hepcrawl -.. image:: https://img.shields.io/coveralls/inspirehep/hepcrawl.svg - :target: https://coveralls.io/r/inspirehep/hepcrawl - .. image:: https://img.shields.io/github/tag/inspirehep/hepcrawl.svg :target: https://github.com/inspirehep/hepcrawl/releases diff --git a/docker-compose.deps.py3.yml b/docker-compose.deps.py3.yml deleted file mode 100644 index 42741a19..00000000 --- a/docker-compose.deps.py3.yml +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of hepcrawl. -# Copyright (C) 2017 CERN. -# -# hepcrawl is a free software; you can redistribute it and/or modify it -# under the terms of the Revised BSD License; see LICENSE file for -# more details. - -version: '2.1' - -services: - hepcrawl_testing_image: - build: - context: ./ - dockerfile: tests/Dockerfile.hepcrawl_py3 - image: hepcrawl_py3 - command: "true" - volumes: - - .:/code/ diff --git a/docker-compose.test.py3.yml b/docker-compose.test.py3.yml deleted file mode 100644 index 77924785..00000000 --- a/docker-compose.test.py3.yml +++ /dev/null @@ -1,204 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of hepcrawl. -# Copyright (C) 2017 CERN. -# -# hepcrawl is a free software; you can redistribute it and/or modify it -# under the terms of the Revised BSD License; see LICENSE file for -# more details. - -version: '2.1' - -services: - service_base: &service_base - image: hepcrawl_py3 - environment: - - APP_BROKER_URL=pyamqp://guest:guest@rabbitmq:5672// - - APP_CELERY_RESULT_BACKEND=redis://redis:6379/1 - - APP_CRAWLER_HOST_URL=http://scrapyd:6800 - - APP_API_PIPELINE_TASK_ENDPOINT_DEFAULT=hepcrawl.testlib.tasks.submit_results - - APP_FILES_STORE=/tmp/file_urls - - APP_LAST_RUNS_PATH=/code/.scrapy/last_runs - - APP_CRAWL_ONCE_PATH=/code/.scrapy - - COVERAGE_PROCESS_START=/code/.coveragerc - tty: true - volumes: - - .:/code/ - user: "${UID:-1000}:${GID:-1000}" - - functional_wsp: - <<: *service_base - command: py.test -vv tests/functional/wsp - depends_on: - scrapyd: - condition: service_healthy - ftp_server: - condition: service_healthy - - functional_desy: - <<: *service_base - command: py.test -vv tests/functional/desy - depends_on: - scrapyd: - condition: service_healthy - ftp_server: - condition: service_healthy - - functional_arxiv: - <<: *service_base - command: py.test -vv tests/functional/arxiv - depends_on: - scrapyd: - condition: service_healthy - arxiv-http-server.local: - condition: service_healthy - - functional_pos: - <<: *service_base - command: py.test -vv tests/functional/pos - depends_on: - scrapyd: - condition: service_healthy - http-server.local: - condition: service_healthy - - unit: - <<: *service_base - command: py.test -vv tests/unit - links: [] - - celery: - <<: *service_base - command: celery worker --events --app hepcrawl.testlib.tasks --loglevel=debug - depends_on: - rabbitmq: - condition: service_healthy - redis: - condition: service_healthy - - scrapyd: - <<: *service_base - command: scrapyd --pidfile=/tmp/scrapyd.pid - networks: - default: - ftp: - depends_on: - celery: - condition: service_started - healthcheck: - timeout: 5s - interval: 5s - retries: 5 - test: "curl -k http://localhost:6800/listprojects.json" - - scrapyd-deploy: - <<: *service_base - command: "true" # do nothing, as the real thing currently fails on Python 3, and not needed for unit - # command: bash -c "scrapyd-deploy" - # depends_on: - # scrapyd: - # condition: service_healthy - - ftp_server: - image: stilliard/pure-ftpd:hardened - environment: - - PUBLICHOST=1.2.3.4 - networks: - ftp: - ipv4_address: 1.2.3.4 - volumes: - - ${PWD}/tests/functional/desy/fixtures/ftp_server/FFT:/home/ftpusers/bob/FFT - - ${PWD}/tests/functional/desy/fixtures/ftp_server/DESY:/home/ftpusers/bob/DESY - - ${PWD}/tests/functional/wsp/fixtures/ftp_server/WSP:/home/ftpusers/bob/WSP - - ${PWD}/tests/functional/wsp/fixtures/ftp_server/pureftpd.passwd:/etc/pure-ftpd/passwd/pureftpd.passwd - healthcheck: - timeout: 5s - interval: 5s - retries: 5 - test: "ls -l /var/run/pure-ftpd.pid" - - http-server.local: - image: nginx:stable-alpine - volumes: - - ${PWD}/tests/functional/pos/fixtures/https_server/conf/proxy.conf:/etc/nginx/conf.d/default.conf - - ${PWD}/tests/functional/pos/fixtures/https_server/conf/ssl:/etc/nginx/ssl - - ${PWD}/tests/functional/pos/fixtures/https_server/records:/etc/nginx/html/ - ports: - - 443:443 - healthcheck: - timeout: 5s - interval: 5s - retries: 5 - test: "curl -k https://localhost:443/187.html" - - functional_cds: - <<: *service_base - command: py.test -vv tests/functional/cds - depends_on: - scrapyd: - condition: service_healthy - cds-http-server.local: - condition: service_healthy - - arxiv-http-server.local: - image: nginx:stable-alpine - volumes: - - ${PWD}/tests/functional/arxiv/fixtures/http_server/conf/proxy.conf:/etc/nginx/conf.d/default.conf - - ${PWD}/tests/functional/arxiv/fixtures/http_server/records:/etc/nginx/html/ - ports: - - 80:80 - healthcheck: - timeout: 5s - interval: 5s - retries: 5 - test: "curl -k http://localhost:80/arxiv-physics-hep-th.xml" - - cds-http-server.local: - image: nginx:stable-alpine - volumes: - - ${PWD}/tests/functional/cds/fixtures/http_server/conf/proxy.conf:/etc/nginx/conf.d/default.conf - - ${PWD}/tests/functional/cds/fixtures/http_server/records:/etc/nginx/html/ - ports: - - 80:80 - healthcheck: - timeout: 5s - interval: 5s - retries: 5 - test: "curl -k http://localhost:80/cds-single.xml" - - rabbitmq: - image: rabbitmq - healthcheck: - timeout: 5s - interval: 5s - retries: 5 - test: "rabbitmqctl status" - - redis: - image: redis:3.2.3 - healthcheck: - timeout: 5s - interval: 5s - retries: 5 - test: "redis-cli -h 127.0.0.1 ping| grep PONG" - - localstack: - image: localstack/localstack:latest - ports: - - '4572:4572' - - '4566:4566' - environment: - - SERVICES=s3 - - DEBUG=1 - - DATA_DIR=/home/localstack/data - healthcheck: - timeout: 5s - interval: 5s - retries: 5 - test: "curl -k localhost:4566|grep running" - -networks: - ftp: - ipam: - config: - - subnet: 1.0.0.0/8 diff --git a/docs/index.rst b/docs/index.rst index e4d8a945..101c906f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,10 +13,6 @@ travis-ci badge - - coveralls.io badge -

diff --git a/setup.py b/setup.py index 268121ba..5f04a9f2 100644 --- a/setup.py +++ b/setup.py @@ -17,15 +17,17 @@ install_requires = [ 'automat==20.2.0', - 'amqp~=2.0,>2.2.0,!=2.3.0', + 'amqp', 'autosemver~=0.2', "backports.tempfile==1.0", 'boto3~=1.14', + 'dojson==1.4.0', 'inspire-schemas~=61.5', 'inspire-dojson~=63.0', 'inspire-utils~=3.0,>=3.0.0', # newer scrapy is incompatible with old scrapyd 'Scrapy~=1.6,<1.7.0', + 'parsel<=1.8.0', 'scrapy-crawl-once~=0.1,>=0.1.1', 'scrapy-sentry~=0.0,>=0.8.0', # TODO: unpin once they support wheel building again, needed for Python 3 @@ -44,13 +46,14 @@ 'harvestingkit>=0.6.12', 'Sickle~=0.6,>=0.6.2', # newer versions seem incompatible with required scrapyd version - 'Twisted~=18.0,>=18.9.0', + 'Twisted~=18.0,>=18.9.0; python_version == "2.7"', + 'Twisted>=21.2.0; python_version >= "3"', #latex parsing 'pylatexenc~=2.9', - 'queuelib==1.5.0', - 'sentry-sdk==1.3.0', - 'structlog==20.1.0', - 'python-logstash==0.4.8', + 'queuelib>=1.5.0', + 'sentry-sdk>=0.10.0', + 'structlog>=19.0.0', + 'python-logstash>=0.4.8', ] tests_require = [ @@ -60,7 +63,7 @@ 'freezegun>=0.3.9', 'isort==4.2.2', 'mock~=2.0,>=2.0.0', - 'pytest>=2.8.0', + 'pytest>=2.8.0, <8.0.0', 'pytest-cov>=2.1.0', 'pytest-pep8>=1.0.6', 'requests-mock>=1.3.0', diff --git a/tests/Dockerfile.hepcrawl_py3 b/tests/Dockerfile.hepcrawl_py3 deleted file mode 100644 index 5dc73059..00000000 --- a/tests/Dockerfile.hepcrawl_py3 +++ /dev/null @@ -1,14 +0,0 @@ -FROM python:3 - -WORKDIR /usr/src/app - -ENV PYTHONDONTWRITEBYTECODE=1 -RUN mkdir /code /var/lib/scrapy /venv - -copy . /code - -ENV PATH="/home/test/.local/bin:${PATH}" - -WORKDIR /code -RUN pip install --upgrade wheel setuptools idutils rfc3987 bleach jsonschema inspire-utils -RUN pip install --no-cache-dir -e .[all] diff --git a/tests/unit/test_desy.py b/tests/unit/test_desy.py index d9a6853c..0cdb66d7 100644 --- a/tests/unit/test_desy.py +++ b/tests/unit/test_desy.py @@ -10,7 +10,7 @@ from __future__ import absolute_import, division, print_function import os - +import sys import mock import pytest from deepdiff import DeepDiff @@ -106,6 +106,8 @@ def override_generated_fields(record): ] ) def test_pipeline(generated_records, expected_records): + if sys.version_info[0] >= 3: + unicode = str clean_generated_records = [ override_generated_fields(generated_record) for generated_record in generated_records @@ -126,6 +128,10 @@ def test_invalid_jsonll(): response.meta = {"s3_subdirectory": 'invalid_record'} result = list(spider.parse(response)) - assert result[0].exception.startswith('ValueError') + exception = result[0].exception + if exception.startswith('ValueError') or exception.startswith('JSONDecodeError'): + assert True + else: + assert False assert result[0].traceback is not None assert result[0].source_data == "This is not actually JSONL" diff --git a/tests/unit/test_settings.py b/tests/unit/test_settings.py index 44967ba7..4ec42585 100644 --- a/tests/unit/test_settings.py +++ b/tests/unit/test_settings.py @@ -8,7 +8,7 @@ # more details. import logging - +import sys from scrapy.utils.project import get_project_settings from scrapy.utils.log import (configure_logging, logger) @@ -21,4 +21,7 @@ def test_log_settings(): configure_logging(settings=settings) assert any(isinstance(handler, logging.StreamHandler) for handler in logger.root.handlers) - assert not any(isinstance(handler, logging.FileHandler) for handler in logger.root.handlers) + if sys.version_info[0] >= 3: + assert any(isinstance(handler, logging.FileHandler) and handler.level == logging.NOTSET for handler in logger.root.handlers) + else: + assert not any(isinstance(handler, logging.FileHandler) for handler in logger.root.handlers) \ No newline at end of file