Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/add creator support #302

Merged
merged 6 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 45 additions & 7 deletions ckanext/dcat/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,31 @@ def dcat_to_ckan(dcat_dict):
elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')})

if dcat_publisher.get('mbox'):
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})
if dcat_publisher.get('email'):
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('email')})

if dcat_publisher.get('identifier'):
package_dict['extras'].append({
'key': 'dcat_publisher_id',
'value': dcat_publisher.get('identifier') # This could be a URI like https://ror.org/05wg1m734
})

dcat_creator = dcat_dict.get('creator')
if isinstance(dcat_creator, basestring):
package_dict['extras'].append({'key': 'dcat_creator_name', 'value': dcat_creator})
elif isinstance(dcat_creator, dict) and dcat_creator.get('name'):
if dcat_creator.get('name'):
package_dict['extras'].append({'key': 'dcat_creator_name', 'value': dcat_creator.get('name')})

if dcat_creator.get('email'):
package_dict['extras'].append({'key': 'dcat_creator_email', 'value': dcat_creator.get('email')})

if dcat_creator.get('identifier'):
package_dict['extras'].append({
'key': 'dcat_creator_id',
'value': dcat_creator.get('identifier')
})

package_dict['extras'].append({
'key': 'language',
'value': ','.join(dcat_dict.get('language', []))
Expand All @@ -63,20 +79,20 @@ def dcat_to_ckan(dcat_dict):


def ckan_to_dcat(package_dict):

dcat_dict = {}

dcat_dict['title'] = package_dict.get('title')
dcat_dict['description'] = package_dict.get('notes')
dcat_dict['landingPage'] = package_dict.get('url')


# Keywords
dcat_dict['keyword'] = []
for tag in package_dict.get('tags', []):
dcat_dict['keyword'].append(tag['name'])


# Publisher
dcat_dict['publisher'] = {}
dcat_dict['creator'] = {}

for extra in package_dict.get('extras', []):
if extra['key'] in ['dcat_issued', 'dcat_modified']:
Expand All @@ -85,19 +101,41 @@ def ckan_to_dcat(package_dict):
elif extra['key'] == 'language':
dcat_dict['language'] = extra['value'].split(',')

# Publisher fields
elif extra['key'] == 'dcat_publisher_name':
dcat_dict['publisher']['name'] = extra['value']

elif extra['key'] == 'dcat_publisher_email':
dcat_dict['publisher']['mbox'] = extra['value']
dcat_dict['publisher']['email'] = extra['value']

elif extra['key'] == 'dcat_publisher_id':
dcat_dict['publisher']['identifier'] = extra['value']

# Creator fields
elif extra['key'] == 'dcat_creator_name':
dcat_dict['creator']['name'] = extra['value']

elif extra['key'] == 'dcat_creator_email':
dcat_dict['creator']['email'] = extra['value']

elif extra['key'] == 'dcat_creator_id':
dcat_dict['creator']['identifier'] = extra['value']

# Identifier
elif extra['key'] == 'guid':
dcat_dict['identifier'] = extra['value']

# Fallback for publisher (if no name in extras, use maintainer)
if not dcat_dict['publisher'].get('name') and package_dict.get('maintainer'):
dcat_dict['publisher']['name'] = package_dict.get('maintainer')
if package_dict.get('maintainer_email'):
dcat_dict['publisher']['mbox'] = package_dict.get('maintainer_email')
dcat_dict['publisher']['email'] = package_dict.get('maintainer_email')

# Fallback for creator (if no name in extras, optionally use author)
if not dcat_dict['creator'].get('name') and package_dict.get('author'):
dcat_dict['creator']['name'] = package_dict.get('author')
if package_dict.get('author_email'):
dcat_dict['creator']['email'] = package_dict.get('author_email')

dcat_dict['distribution'] = []
for resource in package_dict.get('resources', []):
Expand Down
49 changes: 14 additions & 35 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,58 +419,37 @@ def _insert_or_update_temporal(self, dataset_dict, key, value):
else:
dataset_dict["extras"].append({"key": key, "value": value})

def _publisher(self, subject, predicate):
def _agent_details(self, subject, predicate):
"""
Returns a dict with details about a dct:publisher entity, a foaf:Agent
Returns a dict with details about a dct:publisher or dct:creator entity, a foaf:Agent

Both subject and predicate must be rdflib URIRef or BNode objects

Examples:

<dct:publisher>
<dct:publisher> or <dct:creator>
<foaf:Organization rdf:about="http://orgs.vocab.org/some-org">
<foaf:name>Publishing Organization for dataset 1</foaf:name>
<foaf:mbox>contact@some.org</foaf:mbox>
<foaf:homepage>http://some.org</foaf:homepage>
<dct:type rdf:resource="http://purl.org/adms/publishertype/NonProfitOrganisation"/>
</foaf:Organization>
</dct:publisher>

{
'uri': 'http://orgs.vocab.org/some-org',
'name': 'Publishing Organization for dataset 1',
'email': 'contact@some.org',
'url': 'http://some.org',
'type': 'http://purl.org/adms/publishertype/NonProfitOrganisation',
}

<dct:publisher rdf:resource="http://publications.europa.eu/resource/authority/corporate-body/EURCOU" />

{
'uri': 'http://publications.europa.eu/resource/authority/corporate-body/EURCOU'
}

Returns keys for uri, name, email, url and type with the values set to
an empty string if they could not be found
Returns keys for uri, name, email, url, type, and identifier with the values set to
an empty string if they could not be found.
"""

publisher = {}
agent_details = {}

for agent in self.g.objects(subject, predicate):
agent_details["uri"] = str(agent) if isinstance(agent, term.URIRef) else ""
agent_details["name"] = self._object_value(agent, FOAF.name)
agent_details["email"] = self._object_value(agent, FOAF.mbox)
agent_details["url"] = self._object_value(agent, FOAF.homepage)
agent_details["type"] = self._object_value(agent, DCT.type)
agent_details['identifier'] = self._object_value(agent, DCT.identifier)

publisher["uri"] = str(agent) if isinstance(agent, term.URIRef) else ""

publisher["name"] = self._object_value(agent, FOAF.name)

publisher["email"] = self._object_value(agent, FOAF.mbox)

publisher["url"] = self._object_value(agent, FOAF.homepage)

publisher["type"] = self._object_value(agent, DCT.type)

publisher['identifier'] = self._object_value(agent, DCT.identifier)

return publisher
return agent_details

def _contact_details(self, subject, predicate):
"""
Expand Down Expand Up @@ -1136,7 +1115,7 @@ def _extract_catalog_dict(self, catalog_ref):
out.append(
{
"key": "source_catalog_publisher",
"value": json.dumps(self._publisher(catalog_ref, DCT.publisher)),
"value": json.dumps(self._agent_details(catalog_ref, DCT.publisher)),
}
)
return out
Expand Down
54 changes: 51 additions & 3 deletions ckanext/dcat/profiles/euro_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

config = toolkit.config


DISTRIBUTION_LICENSE_FALLBACK_CONFIG = "ckanext.dcat.resource.inherit.license"


Expand Down Expand Up @@ -122,13 +121,21 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
)

# Publisher
publisher = self._publisher(dataset_ref, DCT.publisher)
publisher = self._agent_details(dataset_ref, DCT.publisher)
for key in ("uri", "name", "email", "url", "type", "identifier"):
if publisher.get(key):
dataset_dict["extras"].append(
{"key": "publisher_{0}".format(key), "value": publisher.get(key)}
)

# Creator
creator = self._agent_details(dataset_ref, DCT.creator)
for key in ("uri", "name", "email", "url", "type", "identifier"):
if creator.get(key):
dataset_dict["extras"].append(
{"key": "creator_{0}".format(key), "value": creator.get(key)}
)

# Temporal
start, end = self._time_interval(dataset_ref, DCT.temporal)
if start:
Expand Down Expand Up @@ -250,7 +257,6 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
"publisher_name",
"publisher_email",
):

extra["key"] = "dcat_" + extra["key"]

if extra["key"] == "language":
Expand Down Expand Up @@ -412,6 +418,48 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
]
self._add_triples_from_dict(publisher_details, publisher_ref, items)

# Creator
creator_ref = None

if dataset_dict.get("creator"):
# Scheming publisher field: will be handled in a separate profile
pass
elif any(
[
self._get_dataset_value(dataset_dict, "creator_uri"),
self._get_dataset_value(dataset_dict, "creator_name"),
]
):
# Legacy creator_* extras
creator_uri = self._get_dataset_value(dataset_dict, "creator_uri")
creator_name = self._get_dataset_value(dataset_dict, "creator_name")
if creator_uri:
creator_ref = CleanedURIRef(creator_uri)
else:
# No creator_uri
creator_ref = BNode()

creator_details = {
"name": creator_name,
"email": self._get_dataset_value(dataset_dict, "creator_email"),
"url": self._get_dataset_value(dataset_dict, "creator_url"),
"type": self._get_dataset_value(dataset_dict, "creator_type"),
"identifier": self._get_dataset_value(dataset_dict, "creator_identifier"),
}

# Add to graph
if creator_ref:
g.add((creator_ref, RDF.type, FOAF.Agent))
g.add((dataset_ref, DCT.creator, creator_ref)) # Use DCT.creator for creator
items = [
("name", FOAF.name, None, Literal),
("email", FOAF.mbox, None, Literal),
("url", FOAF.homepage, None, URIRef),
("type", DCT.type, None, URIRefOrLiteral),
("identifier", DCT.identifier, None, URIRefOrLiteral),
]
self._add_triples_from_dict(creator_details, creator_ref, items)

# Temporal
start = self._get_dataset_value(dataset_dict, "temporal_start")
end = self._get_dataset_value(dataset_dict, "temporal_end")
Expand Down
Loading