Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix AuthorizationException with AWSV4SignerAsyncAuth when the doc ID has special characters. #848

Merged
merged 5 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
### Fixed
- Fix `Transport.perform_request`'s arguments `timeout` and `ignore` variable usage ([810](https://github.com/opensearch-project/opensearch-py/pull/810))
- Fix `Index.save` not passing through aliases to the underlying index ([823](https://github.com/opensearch-project/opensearch-py/pull/823))
- Fix `AuthorizationException` with AWS OpenSearch when the doc ID contains `:` ([848](https://github.com/opensearch-project/opensearch-py/pull/848))
### Security

### Dependencies
Expand Down
6 changes: 5 additions & 1 deletion DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,11 @@ test_opensearchpy/test_connection.py::TestRequestsConnection::test_no_http_compr
test_opensearchpy/test_async/test_connection.py::TestAIOHttpConnection::test_no_http_compression PASSED [100%]
```

Note that integration tests require docker to be installed and running, and downloads quite a bit of data from over the internet and hence take few minutes to complete.
```
./.ci/run-tests false 2.16.0 test_indices_lifecycle
```

Note that integration tests require docker to be installed and running, and downloads quite a bit of data from the internet and hence take few minutes to complete.

## Linter

Expand Down
4 changes: 3 additions & 1 deletion opensearchpy/connection/http_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import warnings
from typing import Any, Collection, Mapping, Optional, Union

import yarl

from .._async._extra_imports import aiohttp, aiohttp_exceptions # type: ignore
from .._async.compat import get_running_loop
from .._async.http_aiohttp import AIOHttpConnection
Expand Down Expand Up @@ -210,7 +212,7 @@ async def perform_request(
try:
async with self.session.request(
method,
url,
yarl.URL(url, encoded=True),
data=body,
auth=auth,
headers=req_headers,
Expand Down
4 changes: 2 additions & 2 deletions samples/hello/hello.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

def main() -> None:
"""
an example showing how to create an synchronous connection to
An example showing how to create a synchronous connection to
OpenSearch, create an index, index a document and search to
return the document
return the document.
"""
host = "localhost"
port = 9200
Expand Down
4 changes: 2 additions & 2 deletions samples/hello/hello_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

async def main() -> None:
"""
an example showing how to create an asynchronous connection
An example showing how to create an asynchronous connection
to OpenSearch, create an index, index a document and
search to return the document
search to return the document.
"""
# connect to OpenSearch
host = "localhost"
Expand Down
63 changes: 63 additions & 0 deletions samples/hello/unicode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python

# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.


import os

from opensearchpy import OpenSearch

# connect to OpenSearch


def main() -> None:
"""
An example showing how to create a synchronous connection to
OpenSearch, create an index, index a document and search to
return the document.
"""
host = "localhost"
port = 9200
auth = (
"admin",
os.getenv("OPENSEARCH_PASSWORD", "admin"),
) # For testing only. Don't store credentials in code.

client = OpenSearch(
hosts=[{"host": host, "port": port}],
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)

info = client.info()
print(f"Welcome to {info['version']['distribution']} {info['version']['number']}!")

index_name = "кино"
index_create_result = client.indices.create(index=index_name)
print(index_create_result)

document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"}
id = "соларис@2011"
doc_insert_result = client.index(
index=index_name, body=document, id=id, refresh=True
)
print(doc_insert_result)

doc_delete_result = client.delete(index=index_name, id=id)
print(doc_delete_result)

index_delete_result = client.indices.delete(index=index_name)
print(index_delete_result)


if __name__ == "__main__":
main()
72 changes: 72 additions & 0 deletions samples/hello/unicode_async.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python

# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.


import asyncio
import os

from opensearchpy import AsyncOpenSearch


async def main() -> None:
"""
An example showing how to create an asynchronous connection
to OpenSearch, create an index, index a document and
search to return the document.
"""
# connect to OpenSearch
host = "localhost"
port = 9200
auth = (
"admin",
os.getenv("OPENSEARCH_PASSWORD", "admin"),
) # For testing only. Don't store credentials in code.

client = AsyncOpenSearch(
hosts=[{"host": host, "port": port}],
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)

try:
info = await client.info()
print(
f"Welcome to {info['version']['distribution']} {info['version']['number']}!"
)

index_name = "кино"
index_create_result = await client.indices.create(index=index_name)
print(index_create_result)

document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"}
id = "соларис@2011"
doc_insert_result = await client.index(
index=index_name, body=document, id=id, refresh=True
)
print(doc_insert_result)

doc_delete_result = await client.delete(index=index_name, id=id)
print(doc_delete_result)

index_delete_result = await client.indices.delete(index=index_name)
print(index_delete_result)

finally:
await client.close()


if __name__ == "__main__":
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(main())
loop.close()
5 changes: 3 additions & 2 deletions test_opensearchpy/test_async/test_http_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from unittest import mock

import pytest
import yarl
from multidict import CIMultiDict

from opensearchpy._async._extra_imports import aiohttp # type: ignore
Expand Down Expand Up @@ -91,7 +92,7 @@ async def test_basicauth_in_request_session(self, mock_request: Any) -> None:
await c.perform_request("post", "/test")
mock_request.assert_called_with(
"post",
"http://localhost:9200/test",
yarl.URL("http://localhost:9200/test", encoded=True),
data=None,
auth=c._http_auth,
headers={},
Expand Down Expand Up @@ -120,7 +121,7 @@ def auth_fn(*args: Any, **kwargs: Any) -> Any:

mock_request.assert_called_with(
"post",
"http://localhost:9200/test",
yarl.URL("http://localhost:9200/test", encoded=True),
data=None,
auth=None,
headers={
Expand Down
60 changes: 60 additions & 0 deletions test_opensearchpy/test_async/test_server/test_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,70 @@
import pytest
from _pytest.mark.structures import MarkDecorator

from opensearchpy.exceptions import RequestError

pytestmark: MarkDecorator = pytest.mark.asyncio


class TestSpecialCharacters:
async def test_index_with_slash(self, async_client: Any) -> None:
index_name = "movies/shmovies"
with pytest.raises(RequestError) as e:
await async_client.indices.create(index=index_name)
assert (
str(e.value)
== "RequestError(400, 'invalid_index_name_exception', 'Invalid index name [movies/shmovies], must not contain the following characters [ , \", *, \\\\, <, |, ,, >, /, ?]')"
)


class TestUnicode:
async def test_indices_lifecycle_english(self, async_client: Any) -> None:
index_name = "movies"

index_create_result = await async_client.indices.create(index=index_name)
assert index_create_result["acknowledged"] is True
assert index_name == index_create_result["index"]

document = {"name": "Solaris", "director": "Andrei Tartakovsky", "year": "2011"}
id = "solaris@2011"
doc_insert_result = await async_client.index(
index=index_name, body=document, id=id, refresh=True
)
assert "created" == doc_insert_result["result"]
assert index_name == doc_insert_result["_index"]
assert id == doc_insert_result["_id"]

doc_delete_result = await async_client.delete(index=index_name, id=id)
assert "deleted" == doc_delete_result["result"]
assert index_name == doc_delete_result["_index"]
assert id == doc_delete_result["_id"]

index_delete_result = await async_client.indices.delete(index=index_name)
assert index_delete_result["acknowledged"] is True

async def test_indices_lifecycle_russian(self, async_client: Any) -> None:
index_name = "кино"
index_create_result = await async_client.indices.create(index=index_name)
assert index_create_result["acknowledged"] is True
assert index_name == index_create_result["index"]

document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"}
id = "соларис@2011"
doc_insert_result = await async_client.index(
index=index_name, body=document, id=id, refresh=True
)
assert "created" == doc_insert_result["result"]
assert index_name == doc_insert_result["_index"]
assert id == doc_insert_result["_id"]

doc_delete_result = await async_client.delete(index=index_name, id=id)
assert "deleted" == doc_delete_result["result"]
assert index_name == doc_delete_result["_index"]
assert id == doc_delete_result["_id"]

index_delete_result = await async_client.indices.delete(index=index_name)
assert index_delete_result["acknowledged"] is True

async def test_indices_analyze(self, async_client: Any) -> None:
await async_client.indices.analyze(body='{"text": "привет"}')

Expand Down
73 changes: 73 additions & 0 deletions test_opensearchpy/test_async/test_signer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# GitHub history for details.

import uuid
from typing import Any, Collection, Dict, Mapping, Optional, Tuple, Union
from unittest.mock import Mock

import pytest
Expand Down Expand Up @@ -103,3 +104,75 @@ async def test_aws_signer_async_frozen_credentials_as_http_auth(self) -> None:
assert "X-Amz-Date" in headers
assert "X-Amz-Security-Token" in headers
assert len(mock_session.get_frozen_credentials.mock_calls) == 1


class TestAsyncSignerWithSpecialCharacters:
def mock_session(self) -> Mock:
access_key = uuid.uuid4().hex
secret_key = uuid.uuid4().hex
token = uuid.uuid4().hex
dummy_session = Mock()
dummy_session.access_key = access_key
dummy_session.secret_key = secret_key
dummy_session.token = token

del dummy_session.get_frozen_credentials

return dummy_session

async def test_aws_signer_async_consitent_url(self) -> None:
region = "us-west-2"

from opensearchpy import AsyncOpenSearch
from opensearchpy.connection.http_async import AsyncHttpConnection
from opensearchpy.helpers.asyncsigner import AWSV4SignerAsyncAuth

# Store URLs for comparison
signed_url = None
sent_url = None

doc_id = "doc_id:with!special*chars%3A"
quoted_doc_id = "doc_id%3Awith%21special*chars%253A"
url = f"https://search-domain.region.es.amazonaws.com:9200/index/_doc/{quoted_doc_id}"

# Create a mock signer class to capture the signed URL
class MockSigner(AWSV4SignerAsyncAuth):
def _sign_request(
self,
method: str,
url: str,
query_string: Optional[str] = None,
body: Optional[Union[str, bytes]] = None,
) -> Dict[str, str]:
nonlocal signed_url
signed_url = url
return {}

# Create a mock connection class to capture the sent URL
class MockConnection(AsyncHttpConnection):
async def perform_request(
self: "MockConnection",
method: str,
url: str,
params: Optional[Mapping[str, Any]] = None,
body: Optional[Any] = None,
timeout: Optional[Union[int, float]] = None,
ignore: Collection[int] = (),
headers: Optional[Mapping[str, str]] = None,
) -> Tuple[int, Mapping[str, str], str]:
nonlocal sent_url
sent_url = f"{self.host}{url}"
return 200, {}, "{}"

auth = MockSigner(self.mock_session(), region)
auth("GET", url)

client = AsyncOpenSearch(
hosts=[{"host": "search-domain.region.es.amazonaws.com"}],
http_auth=auth,
use_ssl=True,
verify_certs=True,
connection_class=MockConnection,
)
await client.index("index", {"test": "data"}, id=doc_id)
assert signed_url == sent_url, "URLs don't match"
Loading
Loading