diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml new file mode 100644 index 0000000..1846258 --- /dev/null +++ b/.github/workflows/check.yml @@ -0,0 +1,48 @@ +name: check +on: + pull_request: + +concurrency: + group: check-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: test ${{ matrix.py }} - ${{ matrix.os }} + runs-on: ${{ matrix.os }}-latest + strategy: + fail-fast: false + matrix: + os: + - Ubuntu + - MacOs + py: + - 3.11 + - '3.10' + - 3.9 + steps: + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - uses: actions/checkout@v4 + - name: Install tox and dev dependencies + run: python -m pip install -r dev-requirements.txt + - name: Setup Micromamba + uses: mamba-org/setup-micromamba@v1 + with: + micromamba-version: "latest" + - name: Pick environment to run + run: | + import subprocess; import json; import os + major, minor, impl = json.loads(subprocess.check_output(["python", "-c", "import json; import sys; import platform; print(json.dumps([sys.version_info[0], sys.version_info[1], platform.python_implementation()]));"], universal_newlines=True)) + with open(os.environ['GITHUB_ENV'], 'a') as file_handler: + file_handler.write('TOXENV=' + ("py" if impl == "CPython" else "pypy") + ("{}{}".format(major, minor) if impl == "CPython" else "3") + "\n") + shell: python + - name: Setup test suite + run: tox -vv --notest + - name: Run test suite + run: tox --skip-pkg-install + env: + CI_RUN: "yes" + DIFF_AGAINST: HEAD \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..56e026a --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,7 @@ +include README.md +graft ShortSeq +prune ShortSeq/tests/testdata/ +prune *.egg-info/ +prune **/__pycache__ +global-exclude .DS_Store +global-exclude *.so \ No newline at end of file diff --git a/ShortSeq/__init__.pxd b/ShortSeq/__init__.pxd index f1848c4..66f0e15 100644 --- a/ShortSeq/__init__.pxd +++ b/ShortSeq/__init__.pxd @@ -1,6 +1,6 @@ from .short_seq cimport * -from .fast_read cimport * -from .short_seq_util cimport * from .short_seq_var cimport * from .short_seq_128 cimport * from .short_seq_64 cimport * +from .fast_read cimport * +from .util cimport * diff --git a/ShortSeq/short_seq.pxd b/ShortSeq/short_seq.pxd index cfcd104..dd1ec01 100644 --- a/ShortSeq/short_seq.pxd +++ b/ShortSeq/short_seq.pxd @@ -9,11 +9,13 @@ from cpython.unicode cimport PyUnicode_AsASCIIString from libcpp.vector cimport vector -from .short_seq_util cimport * from .short_seq_var cimport * from .short_seq_128 cimport * from .short_seq_64 cimport * from .fast_read cimport * +from .util cimport * + +from ShortSeq import MAX_64_NT, MAX_128_NT, MAX_VAR_NT """ Private dictionary fast-path methods not currently offered by the Cython wrapper diff --git a/ShortSeq/short_seq_128.pxd b/ShortSeq/short_seq_128.pxd index adbedcc..9eacbc3 100644 --- a/ShortSeq/short_seq_128.pxd +++ b/ShortSeq/short_seq_128.pxd @@ -1,4 +1,4 @@ -from .short_seq_util cimport * +from .util cimport * # Constants cdef size_t MIN_128_NT diff --git a/ShortSeq/short_seq_64.pxd b/ShortSeq/short_seq_64.pxd index cbeebc2..2042b90 100644 --- a/ShortSeq/short_seq_64.pxd +++ b/ShortSeq/short_seq_64.pxd @@ -1,4 +1,4 @@ -from .short_seq_util cimport * +from .util cimport * # Constants cdef size_t MIN_64_NT diff --git a/ShortSeq/short_seq_var.pxd b/ShortSeq/short_seq_var.pxd index 81cee70..1697ba6 100644 --- a/ShortSeq/short_seq_var.pxd +++ b/ShortSeq/short_seq_var.pxd @@ -1,4 +1,4 @@ -from .short_seq_util cimport * +from .util cimport * from cpython.mem cimport PyObject_Calloc, PyObject_Free from libc.math cimport ceil from libc.string cimport memcmp diff --git a/ShortSeq/tests/unit_tests_main.py b/ShortSeq/tests/unit_tests_main.py index 44f4ed6..614950f 100644 --- a/ShortSeq/tests/unit_tests_main.py +++ b/ShortSeq/tests/unit_tests_main.py @@ -1,7 +1,4 @@ import unittest -import time - -from pympler.asizeof import asizeof from ShortSeq import ShortSeq, ShortSeq64, ShortSeq128, ShortSeqVar from ShortSeq import MIN_VAR_NT, MAX_VAR_NT, MIN_64_NT, MAX_64_NT, MIN_128_NT, MAX_128_NT @@ -158,16 +155,6 @@ def test_slice(self): self.assertEqual(sq[i:], sample[i:]) self.assertEqual(sq[-i:], sample[-i:]) - def test_gzip_size_comparison(self): - sample = rand_sequence(MAX_64_NT, no_range=True) - import gzip - - gz_bytes = gzip.compress(sample.encode()) - print(asizeof(gz_bytes)) - print(asizeof(sample.encode())) - print(asizeof(ShortSeq.from_str(sample))) - - class ShortSeqVarTests(unittest.TestCase): """These tests address the variable length ShortSeq variant (ShortSeqVar)""" diff --git a/ShortSeq/umi/umi.pxd b/ShortSeq/umi/umi.pxd index 1bd7484..1ac7a2b 100644 --- a/ShortSeq/umi/umi.pxd +++ b/ShortSeq/umi/umi.pxd @@ -1,4 +1,4 @@ -from ShortSeq.short_seq_util cimport * +from ShortSeq.util cimport * from cpython.mem cimport PyObject_Calloc, PyObject_Free from cpython.exc cimport PyErr_NoMemory from libc.math cimport ceil, floor diff --git a/ShortSeq/short_seq_util.pxd b/ShortSeq/util.pxd similarity index 98% rename from ShortSeq/short_seq_util.pxd rename to ShortSeq/util.pxd index 87fcf3f..891f254 100644 --- a/ShortSeq/short_seq_util.pxd +++ b/ShortSeq/util.pxd @@ -44,6 +44,7 @@ For SIMD operations. cdef extern from "x86intrin.h" nogil: uint64_t _pext_u64 (uint64_t __X, uint64_t __Y) uint32_t _pext_u32 (uint32_t __X, uint32_t __Y) + uint64_t _popcnt64(uint64_t __X) """ A little bit of hackery to allow fast access to the packed hash field of both diff --git a/ShortSeq/short_seq_util.pyx b/ShortSeq/util.pyx similarity index 100% rename from ShortSeq/short_seq_util.pyx rename to ShortSeq/util.pyx diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..8aae0c8 --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,3 @@ +tox<4 +tox-conda +cython \ No newline at end of file diff --git a/setup.py b/setup.py index 953d161..ed2c54c 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ URL = 'https://github.com/AlexTate/ShortSeq' AUTHOR = 'Alex Tate' PLATFORM = 'Unix' -REQUIRES_PYTHON = '>=3.10.0' +REQUIRES_PYTHON = '>3.8, <3.12' VERSION = '0.0.1' @@ -33,8 +33,8 @@ sources=['ShortSeq/short_seq_64.pyx'], extra_compile_args=short_seq_common_compile_args, language='c++'), - Extension("ShortSeq.short_seq_util", - sources=['ShortSeq/short_seq_util.pyx'], + Extension("ShortSeq.util", + sources=['ShortSeq/util.pyx'], extra_compile_args=short_seq_common_compile_args, language='c++'), Extension("ShortSeq.fast_read", @@ -54,6 +54,7 @@ description=DESCRIPTION, include_package_data=True, packages=find_namespace_packages(), + python_requires=REQUIRES_PYTHON, zip_safe=False, ext_modules=cythonize( extensions, diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..8b8aa3c --- /dev/null +++ b/tox.ini @@ -0,0 +1,18 @@ +[tox] +envlist = ShortSeq-dev-py3{9,10,11} +minversion = 3.27.1 +requires = tox-conda + +[testenv] +deps = -rrequirements.txt +conda_channels = conda-forge +changedir = {envtmpdir} ; prevent unittest from importing module from the CWD +commands = + python --version + python -m unittest ShortSeq.tests.unit_tests_main + +[gh] +python = + 3.11: py311 + 3.10: py310 + 3.9: py39