Skip to content

Commit

Permalink
Migrate to hatch and Python 3. (#116)
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewwardrop authored Nov 1, 2023
1 parent a91ebed commit 821f2a7
Show file tree
Hide file tree
Showing 56 changed files with 2,806 additions and 1,933 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Publish tagged releases to PyPI

on:
push:
tags:
- "v*"

jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: '3.9'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install hatch
- name: Build and publish
env:
HATCH_INDEX_USER: __token__
HATCH_INDEX_AUTH: ${{ secrets.PYPI_PUBLISH_TOKEN }}
run: |
hatch build
hatch
39 changes: 39 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Run Tests

on:
push:
branches:
- "**"

jobs:
build:
runs-on: ubuntu-latest
strategy:
max-parallel: 4
fail-fast: false
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install hatch hatch-vcs
- name: Run tests
run: hatch run tests
- if: matrix.python-version == '3.9'
name: Lint
run: hatch run lint:check
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1.0.10
with:
file: ./coverage.xml
flags: unittests
env_vars: OS,PYTHON
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ var/
*.egg-info/
.installed.cfg
*.egg
_version_info.py

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
27 changes: 0 additions & 27 deletions .travis.yml

This file was deleted.

5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
[![PyPI - Version](https://img.shields.io/pypi/v/omniduct.svg)](https://pypi.org/project/omniduct/)
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/omniduct.svg)
![PyPI - Status](https://img.shields.io/pypi/status/omniduct.svg)
[![Build Status](https://travis-ci.org/airbnb/omniduct.svg?branch=master)](https://travis-ci.org/airbnb/omniduct)
[![Build Status](https://img.shields.io/github/actions/workflow/status/airbnb/omniduct/tests.yml?branch=main)](https://github.com/airbnb/omniduct/actions?query=workflow%3A%22Run+Tests%22)
[![Documentation Status](https://readthedocs.org/projects/omniduct/badge/?version=latest)](http://omniduct.readthedocs.io/en/latest/?badge=latest)


Expand All @@ -19,6 +19,3 @@ It provides:
- Automatic port forwarding of remote services over SSH where connections cannot be made directly.
- Convenient IPython magic functions for interfacing with data providers from within IPython and Jupyter Notebook sessions.
- Utility classes and methods to assist in maintaining registries of useful services.

**Note:** Omniduct 1.1.x is the last version series to support Python 2. Going
forward it will support Python 3.6+.
14 changes: 8 additions & 6 deletions omniduct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,19 @@ def about():
"Omniduct",
version=__version__,
logo=__logo__,
maintainers=OrderedDict(zip(
[a.strip() for a in __author__.split(',')],
[a.strip() for a in __author_email__.split(',')]
)),
maintainers=OrderedDict(
zip(
[a.strip() for a in __author__.split(",")],
[a.strip() for a in __author_email__.split(",")],
)
),
attributes={
'Documentation': __docs_url__,
"Documentation": __docs_url__,
},
description="""
Omniduct provides uniform interfaces for connecting to and extracting data
from a wide variety of (potentially remote) data stores (including HDFS,
Hive, Presto, MySQL, etc).
""",
endorse_omniduct=False
endorse_omniduct=False,
)
121 changes: 58 additions & 63 deletions omniduct/_version.py
Original file line number Diff line number Diff line change
@@ -1,107 +1,102 @@
import os
import sys

__all__ = ['__author__', '__author_email__', '__version__', '__logo__', '__docs_url__']
try:
from ._version_info import __version__, __version_tuple__
except ImportError:
__version__ = "unknown"
__version_tuple__ = (0, 0, 0, "+unknown")

__all__ = [
"__author__",
"__author_email__",
"__version__",
"__version_tuple__",
"__logo__",
"__docs_url__",
]

__author__ = "Matthew Wardrop, Dan Frank"
__author_email__ = "mpwardrop@gmail.com, danfrankj@gmail.com"
__version__ = "1.1.19"
__logo__ = os.path.join(os.path.dirname(__file__), 'logo.png') if '__file__' in globals() else None
__logo__ = (
os.path.join(os.path.dirname(__file__), "logo.png")
if "__file__" in globals()
else None
)
__docs_url__ = "https://omniduct.readthedocs.io/"


# These are the core dependencies, and should not include those which are used only in handling specific protocols.
# Order matters since installation happens from the end of the list
__dependencies__ = [
"future", # Python 2/3 support
"six", # Python 2/3 support

"interface_meta>=1.1.0,<2", # Metaclass for creating an extensible well-documented architecture
"pyyaml", # YAML configuration parsing
"decorator", # Decorators used by caching and documentation routines
"progressbar2>=3.30.0", # Support for progressbars in logging routines
"wrapt", # Object proxying for conveniently exposing ducts in registry

# Database querying libraries
"jinja2", # Templating support in databases
"pandas>=0.17.1", # Various results including database queries are returned as pandas dataframes
"pandas>=0.20.3", # Various results including database queries are returned as pandas dataframes
"sqlparse", # Neatening of SQL based queries (mainly to avoid missing the cache)
"sqlalchemy", # Various integration endpoints in the database stack

# Utility libraries
"python-dateutil", # Used for its `relativedelta` class for Cache instances
"lazy-object-proxy", # Schema traversal
]
if sys.version_info.major < 3 or sys.version_info.major == 3 and sys.version_info.minor < 4:
__dependencies__.append("enum34") # Python 3.4+ style enums in older versions of python

PY2 = sys.version_info[0] == 2
if os.name == 'posix' and PY2:
__dependencies__.append('subprocess32') # Python 3.2+ subprocess handling for Python 2

__optional_dependencies__ = {
# Databases
'druid': [
'pydruid>=0.4.0', # Primary client
"druid": [
"pydruid>=0.4.0", # Primary client
],

'hiveserver2': [
'pyhive[hive]>=0.4', # Primary client
'thrift>=0.10.0', # Thrift dependency which seems not to be installed with upstream deps
"hiveserver2": [
"pyhive[hive]>=0.4", # Primary client
"thrift>=0.10.0", # Thrift dependency which seems not to be installed with upstream deps
],

'presto': [
'pyhive[presto]>=0.4', # Primary client
"presto": [
"pyhive[presto]>=0.4", # Primary client
],

'pyspark': [
'pyspark', # Primary client
"pyspark": [
"pyspark", # Primary client
],

'snowflake': [
'snowflake-sqlalchemy',
"snowflake": [
"snowflake-sqlalchemy",
],

'exasol': ['pyexasol'] if sys.version_info.major > 2 else [],

"exasol": ["pyexasol"] if sys.version_info.major > 2 else [],
# Filesystems
'webhdfs': [
'pywebhdfs', # Primary client
'requests', # For rerouting redirect queries to our port-forwarded services
"webhdfs": [
"pywebhdfs", # Primary client
"requests", # For rerouting redirect queries to our port-forwarded services
],

's3': [
'boto3', # AWS client library
"s3": [
"boto3", # AWS client library
],

# Remotes
'ssh': [
'pexpect', # Command line handling (including smartcard activation)
"ssh": [
"pexpect", # Command line handling (including smartcard activation)
],

'ssh_paramiko': [
'paramiko', # Primary client
'pexpect', # Command line handling (including smartcard activation)
"ssh_paramiko": [
"paramiko", # Primary client
"pexpect", # Command line handling (including smartcard activation)
],

# Rest clients
'rest': [
'requests', # Library to handle underlying REST queries
"rest": [
"requests", # Library to handle underlying REST queries
],

# Documentation requirements
'docs': [
'sphinx', # The documentation engine
'sphinx_autobuild', # A Sphinx plugin used during development of docs
'sphinx_rtd_theme', # The Spinx theme used by the docs
"docs": [
"sphinx", # The documentation engine
"sphinx_autobuild", # A Sphinx plugin used during development of docs
"sphinx_rtd_theme", # The Spinx theme used by the docs
],
"test": [
"nose", # test runner
"mock", # mocking
"pyfakefs", # mock filesystem
"coverage", # test coverage monitoring
"flake8", # Code linting
],

'test': [
'nose', # test runner
'mock', # mocking
'pyfakefs', # mock filesystem
'coverage', # test coverage monitoring
'flake8', # Code linting
]
}
__optional_dependencies__['all'] = [dep for deps in __optional_dependencies__.values() for dep in deps]
__optional_dependencies__["all"] = [
dep for deps in __optional_dependencies__.values() for dep in deps
]
23 changes: 7 additions & 16 deletions omniduct/caches/_serializers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import pickle
from distutils.version import LooseVersion

import pandas


class Serializer(object):

class Serializer:
@property
def file_extension(self):
return ""
Expand All @@ -18,21 +16,21 @@ def deserialize(self, fh):


class BytesSerializer(Serializer):

@property
def file_extension(self):
return ".bytes"

def serialize(self, obj, fh):
assert isinstance(obj, bytes), "BytesSerializer requires incoming data be already encoded into a bytestring."
assert isinstance(
obj, bytes
), "BytesSerializer requires incoming data be already encoded into a bytestring."
fh.write(obj)

def deserialize(self, fh):
return fh.read()


class PickleSerializer(Serializer):

@property
def file_extension(self):
return ".pickle"
Expand All @@ -45,19 +43,12 @@ def deserialize(self, fh):


class PandasSerializer(Serializer):

@property
def file_extension(self):
return ".pandas"

@classmethod
def serialize(cls, formatted_data, fh):
# compat: if pandas is old, to_pickle does not accept file handles
if LooseVersion(pandas.__version__) <= LooseVersion('0.20.3'):
fh.close()
fh = fh.name
return pandas.to_pickle(formatted_data, fh, compression=None)
def serialize(self, obj, fh):
return pandas.to_pickle(obj, fh, compression=None)

@classmethod
def deserialize(cls, fh):
def deserialize(self, fh):
return pandas.read_pickle(fh, compression=None)
Loading

1 comment on commit 821f2a7

@marcelo313
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How long did this take you @matthewwardrop? Also do you suggest that I pull the repo again before working on the azure_data_lake functionality

Please sign in to comment.