Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade profile semantics to specs-v1 #141

Merged
merged 9 commits into from
Apr 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
Helping out with the development of **datapackage** is much appreciated
and we try to be a newcomer friendly project! There's a lot to do if we
are supposed to be the goto python package for working with [data
packages](http://www.dataprotocols.org/en/latest/data-packages.html).
packages](http://specs.frictionlessdata.io/data-package/).

We don't track an awful lot except for perhaps bugs and feature requests
from non-developers (or very busy developers) in our issue tracker so
the development is mostly fueled by the *scratch your own itch* mantra.

So start off by looking at what [data
packages](http://www.dataprotocols.org/en/latest/data-packages.html) can
packages](http://specs.frictionlessdata.io/data-package/) can
do and what feature you would like to see and use. Then just implement
it!

Expand Down
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


specs:
wget -O datapackage/specs/registry.json https://specs.frictionlessdata.io/schemas/registry.json
wget -O datapackage/specs/data-package.json https://specs.frictionlessdata.io/schemas/data-package.json
wget -O datapackage/specs/tabular-data-package.json https://specs.frictionlessdata.io/schemas/tabular-data-package.json
wget -O datapackage/specs/fiscal-data-package.json https://specs.frictionlessdata.io/schemas/fiscal-data-package.json
wget -O datapackage/profiles/registry.json https://specs.frictionlessdata.io/schemas/registry.json
wget -O datapackage/profiles/data-package.json https://specs.frictionlessdata.io/schemas/data-package.json
wget -O datapackage/profiles/tabular-data-package.json https://specs.frictionlessdata.io/schemas/tabular-data-package.json
wget -O datapackage/profiles/fiscal-data-package.json https://specs.frictionlessdata.io/schemas/fiscal-data-package.json
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

A model for working with [Data Packages].

[Data Packages]: http://dataprotocols.org/data-packages/
[Data Packages]: http://specs.frictionlessdata.io/data-package/

## Install

Expand Down
13 changes: 12 additions & 1 deletion datapackage/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

# Module API

from .datapackage import DataPackage
from .pushpull import push_datapackage, pull_datapackage
from .resource import Resource
from .profile import Profile

# Deprecated
from .pushpull import push_datapackage, pull_datapackage
222 changes: 136 additions & 86 deletions datapackage/datapackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,15 @@
import requests
import warnings
import jsonpointer
import datapackage.schema
from . import config
from . import helpers
from .resource import Resource
from .exceptions import (
DataPackageException,
)
from .profile import Profile
from . import exceptions
from . import helpers
from . import config


class DataPackage(object):
'''Class for loading, validating and working with a Data Package.
""""Class for loading, validating and working with a Data Package.

Args:
descriptor (dict, str or file-like object, optional): The contents of the
Expand Down Expand Up @@ -52,11 +50,11 @@ class DataPackage(object):
SchemaError: If the :data:`schema` couldn't be loaded or was invalid.
RegistryError: If there was some problem loading the :data:`schema`
from the registry.
'''
"""

# Public

def __init__(self, descriptor=None, schema='data-package', default_base_path=None):
def __init__(self, descriptor=None, schema=None, default_base_path=None):

# Extract from zip
descriptor = self._extract_zip_if_possible(descriptor)
Expand All @@ -69,6 +67,24 @@ def __init__(self, descriptor=None, schema='data-package', default_base_path=Non
helpers.dereference_data_package_descriptor(self._descriptor, self._base_path)
helpers.expand_data_package_descriptor(self._descriptor)

# Get profile
profile = self._descriptor['profile']

# Handle deprecated schema argument
if schema is not None:
warnings.warn(
'Argument "schema" is deprecated. '
'Please use "descriptor.profile" property.',
UserWarning)
if isinstance(schema, six.string_types):
if schema in ['base', 'default']:
schema = 'data-package'
elif schema == 'tabular':
schema = 'tabular-data-package'
elif schema == 'fiscal':
schema = 'fiscal-data-package'
profile = schema

# Handle deprecated resource.path/url
for resource in self._descriptor.get('resources', []):
url = resource.pop('url', None)
Expand All @@ -87,80 +103,44 @@ def __init__(self, descriptor=None, schema='data-package', default_base_path=Non
resource['path'] = [path]

# Set attributes
self._schema = self._load_schema(schema)
self._resources = self._load_resources(self.descriptor,
self.base_path)
self._profile = Profile(profile)
self._resources = self._update_resources((), self.descriptor, self.base_path)

def __del__(self):
self._remove_tempdir_if_exists()
if hasattr(self, '_tempdir') and os.path.exists(self._tempdir):
shutil.rmtree(self._tempdir, ignore_errors=True)

@property
def descriptor(self):
'''dict: The descriptor of this data package. Its attributes can be
changed.'''
""""dict: The descriptor of this data package. Its attributes can be
changed.
"""
return self._descriptor

@property
def schema(self):
''':class:`.Schema`: This data package's schema.

Check https://github.com/okfn/datapackage-validate-py for documentation
on its attributes.
'''
return self._schema

@property
def base_path(self):
'''str: The base path of this Data Package (can be None).'''
return self._base_path
def profile(self):
""""str: The profile of this data package.
"""
return self._profile

@property
def resources(self):
'''The resources defined in this data package (can be empty).
""""The resources defined in this data package (can be empty).

To add or remove resources, alter the `resources` attribute of the
:data:`descriptor`.

:returns: The resources.
:rtype: tuple of :class:`.Resource`
'''

"""
self._resources = self._update_resources(self._resources,
self.descriptor,
self.base_path)
return self._resources

@property
def attributes(self):
'''tuple: The union of the attributes defined in the schema and the
data package (can be empty).'''
attributes = set(self.to_dict().keys())
try:
attributes.update(self.schema.properties.keys())
except AttributeError:
pass
return tuple(attributes)

@property
def required_attributes(self):
'''tuple: The schema's required attributed (can be empty).'''
required = ()
try:
if self.schema.required is not None:
required = tuple(self.schema.required)
except AttributeError:
pass
return required

def to_dict(self):
'''dict: Convert this Data Package to dict.'''
return copy.deepcopy(self.descriptor)

def to_json(self):
'''str: Convert this Data Package to a JSON string.'''
return json.dumps(self.descriptor)

def save(self, file_or_path):
'''Validates and saves this Data Package contents into a zip file.
""""Validates and saves this Data Package contents into a zip file.

It creates a zip file into ``file_or_path`` with the contents of this
Data Package and its resources. Every resource which content lives in
Expand Down Expand Up @@ -199,7 +179,8 @@ def save(self, file_or_path):
Raises:
ValidationError: If the Data Package is invalid.
DataPackageException: If there was some error writing the package.
'''

"""
self.validate()

def arcname(resource):
Expand Down Expand Up @@ -227,38 +208,51 @@ def arcname(resource):
except (IOError,
zipfile.BadZipfile,
zipfile.LargeZipFile) as e:
six.raise_from(DataPackageException(e), e)
six.raise_from(exceptions.DataPackageException(e), e)

def validate(self):
'''Validate this Data Package.
""""Validate this Data Package.

Raises:
ValidationError: If the Data Package is invalid.
'''

"""
descriptor = self.to_dict()
self.schema.validate(descriptor)
self.profile.validate(descriptor)

def iter_errors(self):
'''Lazily yields each ValidationError for the received data dict.
""""Lazily yields each ValidationError for the received data dict.

Returns:
iter: ValidationError for each error in the data.
'''
return self.schema.iter_errors(self.to_dict())

# Deprecated
"""
return self.profile.iter_errors(self.to_dict())

def safe(self):
warnings.warn(
'DataPackage.safe is deprecated. Now it\'s always safe.',
UserWarning)
return True
# Additional

@property
def base_path(self):
""""str: The base path of this Data Package (can be None).
"""
return self._base_path

def to_dict(self):
""""dict: Convert this Data Package to dict.
"""
return copy.deepcopy(self.descriptor)

def to_json(self):
""""str: Convert this Data Package to a JSON string.
"""
return json.dumps(self.descriptor)

# Private

def _extract_zip_if_possible(self, descriptor):
'''str: Path to the extracted datapackage.json if descriptor points to
ZIP, or the unaltered descriptor otherwise.'''
""""str: Path to the extracted datapackage.json if descriptor points to
ZIP, or the unaltered descriptor otherwise.
"""
result = descriptor
try:
if isinstance(descriptor, six.string_types):
Expand Down Expand Up @@ -307,13 +301,7 @@ def _validate_zip(self, the_zip):
if f.endswith('datapackage.json')]
if len(datapackage_jsons) != 1:
msg = 'DataPackage must have only one "datapackage.json" (had {n})'
raise DataPackageException(msg.format(n=len(datapackage_jsons)))

def _load_schema(self, schema):
return datapackage.schema.Schema(schema)

def _load_resources(self, descriptor, base_path):
return self._update_resources((), descriptor, base_path)
raise exceptions.DataPackageException(msg.format(n=len(datapackage_jsons)))

def _update_resources(self, current_resources, descriptor, base_path):
resources_dicts = descriptor.get('resources')
Expand All @@ -329,6 +317,68 @@ def _update_resources(self, current_resources, descriptor, base_path):

return tuple(new_resources)

def _remove_tempdir_if_exists(self):
if hasattr(self, '_tempdir') and os.path.exists(self._tempdir):
shutil.rmtree(self._tempdir, ignore_errors=True)
# Deprecated

def safe(self):
"""True: datapackage is always safe.
"""

# Deprecate
warnings.warn(
'DataPackage.safe is deprecated. '
'Now it\'s always safe.',
UserWarning)

return True

@property
def schema(self):
""":class:`.Schema`: This data package's schema.
"""

# Deprecate
warnings.warn(
'DataPackage.schema is deprecated.',
UserWarning)
required = ()

return self._profile

@property
def attributes(self):
"""tuple: Attributes defined in the schema and the data package.
"""

# Deprecate
warnings.warn(
'DataPackage.attributes is deprecated.',
UserWarning)

# Get attributes
attributes = set(self.to_dict().keys())
try:
attributes.update(self.profile.properties.keys())
except AttributeError:
pass

return tuple(attributes)

@property
def required_attributes(self):
"""tuple: The schema's required attributed.
"""

# Deprecate
warnings.warn(
'DataPackage.attributes_attributes is deprecated.',
UserWarning)
required = ()

# Get required
try:
if self.profile.required is not None:
required = tuple(self.profile.required)
except AttributeError:
pass

return required
Loading