Skip to content

Commit

Permalink
- Add reCAPTCHA to logins
Browse files Browse the repository at this point in the history
- Show 'displayed name' for cosmetic name field
- Add more uploader functions to support plugins such as Archiver
- Add '/dataset' fallback to URL generation
- Convert dict values into strings before passing to Solr
- Avoid changing package modified timestamp when reordering resources
- Allow sysadmins to edit usernames
- Allow legacy duplicate emails
- Use POST for logouts to avoid caching
- Fix 'group' vs 'organization' text on dashboard
- Invalidate cached pages when cookies change
- Avoid eagerly loading large CLOB fields on activity dashboard
- Populate activity stream notification checkbox from the account being edited, not the logged-in user
- Shorten the lock timeout on dropping datastore tables to avoid deadlocks
- Patch dependencies
  • Loading branch information
ThrawnCA committed Sep 6, 2024
1 parent 66abe42 commit 71d804b
Show file tree
Hide file tree
Showing 31 changed files with 740 additions and 130 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ Changelog

.. towncrier release notes start
v.2.10.x 2023-xx-xx
==================

Major features
--------------

- Update to Interface IUploader, on get_uploader and get_resource_uploader,
new to include new method signature metadata() which can be utilised by
archiver and other plugins instead of trying on local disk directly
- Add get API `resource_file_metadata_show` which takes resource ID and returns
{ 'content_type': content_type, 'size': length, 'hash': hash } if found

v.2.10.5 2024-08-21
===================

Expand Down
4 changes: 2 additions & 2 deletions ckan/i18n/en_AU/LC_MESSAGES/ckan.po
Original file line number Diff line number Diff line change
Expand Up @@ -3202,7 +3202,7 @@ msgstr "Change details"

#: ckan/templates/user/edit_user_form.html:13
msgid "Full name"
msgstr "Full name"
msgstr "Displayed name"

#: ckan/templates/user/edit_user_form.html:13
msgid "eg. Joe Bloggs"
Expand Down Expand Up @@ -3374,7 +3374,7 @@ msgstr "username"

#: ckan/templates/user/new_user_form.html:11
msgid "Full Name"
msgstr "Full Name"
msgstr "Displayed Name"

#: ckan/templates/user/new_user_form.html:37
msgid "Create Account"
Expand Down
18 changes: 17 additions & 1 deletion ckan/lib/authenticator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import ckan.plugins as plugins
from typing import Any, Mapping, Optional

from ckan.common import g, request
from ckan.lib import captcha
from ckan.model import User
from . import signals

Expand All @@ -26,7 +28,21 @@ def default_authenticate(identity: 'Mapping[str, Any]') -> Optional["User"]:
elif not user_obj.validate_password(identity['password']):
log.debug('Login as %r failed - password not valid', login)
else:
return user_obj
check_captcha = identity.get('check_captcha', True)
if check_captcha and g.recaptcha_publickey:
# Check for a valid reCAPTCHA response
try:
client_ip_address = request.remote_addr or 'Unknown IP Address'
captcha.check_recaptcha_v2_base(
client_ip_address,
request.form.get(u'g-recaptcha-response', '')
)
return user_obj
except captcha.CaptchaError:
log.warning('Login as %r failed - failed reCAPTCHA', login)
request.environ[u'captchaFailed'] = True
else:
return user_obj
signals.failed_login.send(login)
return None

Expand Down
2 changes: 2 additions & 0 deletions ckan/lib/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ def _allow_caching(cache_force: Optional[bool] = None):
elif not config.get('ckan.cache_enabled'):
allow_cache = False

# Any rendered template will have a login-sensitive header
request.environ['__limit_cache_by_cookie__'] = True
if not allow_cache:
# Prevent any further rendering from being cached.
request.environ['__no_cache__'] = True
Expand Down
16 changes: 14 additions & 2 deletions ckan/lib/captcha.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def check_recaptcha(request: Request) -> None:
'''Check a user\'s recaptcha submission is valid, and raise CaptchaError
'''Check a user's recaptcha submission is valid, and raise CaptchaError
on failure.'''
recaptcha_private_key = config.get('ckan.recaptcha.privatekey')
if not recaptcha_private_key:
Expand All @@ -17,8 +17,20 @@ def check_recaptcha(request: Request) -> None:
client_ip_address = request.environ.get(
'REMOTE_ADDR', 'Unknown IP Address')

# reCAPTCHA v2
recaptcha_response_field = request.form.get('g-recaptcha-response', '')
check_recaptcha_v2_base(client_ip_address, recaptcha_response_field)


def check_recaptcha_v2_base(client_ip_address: str,
recaptcha_response_field: str) -> None:
'''Check a user's recaptcha submission is valid, and raise CaptchaError
on failure using discreet data'''
recaptcha_private_key = config.get('ckan.recaptcha.privatekey', '')
if not recaptcha_private_key:
# Recaptcha not enabled
return

# reCAPTCHA v2
recaptcha_server_name = 'https://www.google.com/recaptcha/api/siteverify'

# recaptcha_response_field will be unicode if there are foreign chars in
Expand Down
65 changes: 65 additions & 0 deletions ckan/lib/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,71 @@ def _get_auto_flask_context():

@core_helper
def url_for(*args: Any, **kw: Any) -> str:
'''Return the URL for an endpoint given some parameters, defaulting to
using 'dataset' if the given custom dataset fails.
This is a wrapper for :py:func:`flask.url_for`
and :py:func:`routes.url_for` that adds some extra features that CKAN
needs.
To build a URL for a Flask view, pass the name of the blueprint and the
view function separated by a period ``.``, plus any URL parameters::
url_for('api.action', ver=3, logic_function='status_show')
# Returns /api/3/action/status_show
For a fully qualified URL pass the ``_external=True`` parameter. This
takes the ``ckan.site_url`` and ``ckan.root_path`` settings into account::
url_for('api.action', ver=3, logic_function='status_show',
_external=True)
# Returns http://example.com/api/3/action/status_show
URLs built by Pylons use the Routes syntax::
url_for(controller='my_ctrl', action='my_action', id='my_dataset')
# Returns '/dataset/my_dataset'
Or, using a named route::
url_for('dataset.read', id='changed')
# Returns '/dataset/changed'
Use ``qualified=True`` for a fully qualified URL when targeting a Pylons
endpoint.
For backwards compatibility, an effort is made to support the Pylons syntax
when building a Flask URL, but this support might be dropped in the future,
so calls should be updated.
'''
try:
return base_url_for(*args, **kw)
except FlaskRouteBuildError:
# If the url failed, try again but replace any custom dataset type in
# use with the default 'dataset'
retry_with_default = False
# Update args if a custom dataset type was set there
if (len(args) and '.' in args[0]
and not args[0].startswith('/')
and not args[0].startswith('dataset.')):
args = args[0].split('.', 1)
args = ('dataset.' + args[1],)
retry_with_default = True

# Update kw controller if a custom dataset type was set there
if (kw.get('controller')
and kw.get('controller') != 'dataset'):
kw.update({'controller': 'dataset'})
retry_with_default = True

if retry_with_default:
return base_url_for(*args, **kw)
else:
raise


@core_helper
def base_url_for(*args: Any, **kw: Any) -> str:
'''Return the URL for an endpoint given some parameters.
This is a wrapper for :py:func:`flask.url_for`
Expand Down
3 changes: 3 additions & 0 deletions ckan/lib/search/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,9 @@ def index_package(self,
except DateParserError:
log.warning('%r: %r value of %r is not a valid date', pkg_dict['id'], key, value)
continue
if isinstance(value, dict):
# Solr 8+ chokes on passing dict values unless doing an atomic update
value = json.dumps(value)
new_dict[key] = value

pkg_dict = new_dict
Expand Down
123 changes: 117 additions & 6 deletions ckan/lib/uploader.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
# encoding: utf-8
from __future__ import annotations

import flask
import hashlib
import os
import cgi
import datetime
import logging
import magic
import mimetypes
import six
from typing import Any, IO, Optional, Union
from urllib.parse import urlparse

from werkzeug.datastructures import FileStorage as FlaskFileStorage
from werkzeug.wrappers.response import Response as WerkzeugResponse

import ckan.lib.munge as munge
import ckan.logic as logic
import ckan.plugins as plugins
from ckan import logic, plugins
from ckan.common import config
from ckan.types import ErrorDict, PUploader, PResourceUploader
from ckan.lib import base, munge
from ckan.types import ErrorDict, PUploader, PResourceUploader, Response

ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage)
MB = 1 << 20
Expand Down Expand Up @@ -96,6 +99,36 @@ def get_max_resource_size() -> int:
return config.get('ckan.max_resource_size')


def _file_hashnlength(local_path: str) -> tuple[str, int]:
BLOCKSIZE = 65536
hasher = hashlib.sha1()
length = 0

with open(local_path, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
length += len(buf)

buf = afile.read(BLOCKSIZE)

return (str(hasher.hexdigest()), length)


def _add_download_headers(file_path: str,
mime_type: Optional[str],
response: Union[Response, WerkzeugResponse]) -> None:
""" Add appropriate 'Content-Type' and 'Content-Disposition' headers
to a a file download.
"""
if mime_type:
response.headers['Content-Type'] = mime_type
if mime_type != 'application/pdf':
file_name = file_path.split('/')[-1]
response.headers['Content-Disposition'] = \
'attachment; filename=' + file_name


class Upload(object):
storage_path: Optional[str]
filename: Optional[str]
Expand Down Expand Up @@ -136,6 +169,13 @@ def __init__(self,
if old_filename:
self.old_filepath = os.path.join(self.storage_path, old_filename)

def _get_storage_path_for(self, filename: str) -> str:
'''Function to get the path to a stored file.
Storage path must be configured.
'''
assert self.storage_path
return os.path.join(self.storage_path, six.ensure_str(filename))

def update_data_dict(self, data_dict: dict[str, Any], url_field: str,
file_field: str, clear_field: str) -> None:
''' Manipulate data from the data_dict. url_field is the name of the
Expand All @@ -149,16 +189,19 @@ def update_data_dict(self, data_dict: dict[str, Any], url_field: str,
self.clear = data_dict.pop(clear_field, None)
self.file_field = file_field
self.upload_field_storage = data_dict.pop(file_field, None)
self.preserve_filename = data_dict.get('preserve_filename', None)

if not self.storage_path:
return

if isinstance(self.upload_field_storage, ALLOWED_UPLOAD_TYPES):
if self.upload_field_storage.filename:
self.filename = self.upload_field_storage.filename
self.filename = str(datetime.datetime.utcnow()) + self.filename
if not self.preserve_filename:
self.filename = str(datetime.datetime.utcnow()) \
+ self.filename
self.filename = munge.munge_filename_legacy(self.filename)
self.filepath = os.path.join(self.storage_path, self.filename)
self.filepath = self._get_storage_path_for(self.filename)
data_dict[url_field] = self.filename
self.upload_file = _get_underlying_file(
self.upload_field_storage)
Expand Down Expand Up @@ -225,9 +268,42 @@ def verify_type(self):
if types and type_ not in types:
raise logic.ValidationError(err)

def delete(self, filename: str) -> None:
''' Delete file we are pointing at'''
if self.storage_path and not filename.startswith('http'):
try:
# Delete file from storage_path and filename
os.remove(self._get_storage_path_for(filename))
except OSError:
pass

def download(self, filename: str) -> Union[Response, WerkzeugResponse]:
''' Generate file stream or redirect for file'''
if not self.storage_path:
return base.abort(404, "Uploaded resource not found")
filepath = self._get_storage_path_for(filename)
resp = flask.send_file(filepath)
content_type, _ = mimetypes.guess_type(filepath)
_add_download_headers(filepath, content_type, resp)
return resp

def metadata(self, filename: str) -> Union[dict[str, Any], IOError]:
''' Return metadata of file'''
if not self.storage_path:
return {}
try:
filepath = self._get_storage_path_for(filename)
content_type, _ = mimetypes.guess_type(filepath)
hash, length = _file_hashnlength(filepath)
return {'content_type': content_type, 'size': length, 'hash': hash}
except IOError as e:
log.error("Could not retrieve meta data, IOError thrown: %s", e)
return e


class ResourceUpload(object):
mimetype: Optional[str]
url: Optional[str]

def __init__(self, resource: dict[str, Any]) -> None:
path = get_storage_path()
Expand All @@ -253,6 +329,7 @@ def __init__(self, resource: dict[str, Any]) -> None:

if url and config_mimetype_guess == 'file_ext' and urlparse(url).path:
self.mimetype = mimetypes.guess_type(url)[0]
self.url = url

if bool(upload_field_storage) and \
isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES):
Expand Down Expand Up @@ -360,3 +437,37 @@ def upload(self, id: str, max_size: int = 10) -> None:
os.remove(filepath)
except OSError:
pass

def delete(self, id: str, filename: Optional[str] = None) -> None:
''' Delete file we are pointing at'''
try:
os.remove(self.get_path(id))
except OSError:
pass

def download(self, id: str, filename: Optional[str] = None
) -> Union[Response, WerkzeugResponse]:
''' Generate file stream or redirect for file'''
filepath = self.get_path(id)
resp = flask.send_file(filepath)
_add_download_headers(filepath, self.mimetype, resp)
return resp

def metadata(self,
id: str,
filename: Optional[str] = None
) -> Union[dict[str, Any], IOError]:
''' Return meta details of file'''
try:
filepath = self.get_path(id)
if self.mimetype:
content_type = self.mimetype
elif self.url:
content_type = mimetypes.guess_type(self.url)[0]
else:
raise IOError("No resource URL found")
hash, length = _file_hashnlength(filepath)
return {'content_type': content_type, 'size': length, 'hash': hash}
except IOError as e:
log.error("Could not retrieve metadata, IOError thrown: %s", e)
return e
Loading

0 comments on commit 71d804b

Please sign in to comment.