Skip to content

Commit

Permalink
addFeedRangesAndUseFeedRangeInQueryChangeFeed (#37687)
Browse files Browse the repository at this point in the history
* Add getFeedRanges API 
* Add feedRange support in query changeFeed


Co-authored-by: annie-mac <xinlian@microsoft.com>
  • Loading branch information
xinlian12 and annie-mac authored Oct 4, 2024
1 parent ffc1dbd commit 4a136a4
Show file tree
Hide file tree
Showing 36 changed files with 3,230 additions and 793 deletions.
2 changes: 2 additions & 0 deletions sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#### Features Added
* Added Retry Policy for Container Recreate in the Python SDK. See [PR 36043](https://github.com/Azure/azure-sdk-for-python/pull/36043)
* Added option to disable write payload on writes. See [PR 37365](https://github.com/Azure/azure-sdk-for-python/pull/37365)
* Added get feed ranges API. See [PR 37687](https://github.com/Azure/azure-sdk-for-python/pull/37687)
* Added feed range support in `query_items_change_feed`. See [PR 37687](https://github.com/Azure/azure-sdk-for-python/pull/37687)

#### Breaking Changes

Expand Down
2 changes: 2 additions & 0 deletions sdk/cosmos/azure-cosmos/azure/cosmos/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
)
from .partition_key import PartitionKey
from .permission import Permission
from ._feed_range import FeedRange

__all__ = (
"CosmosClient",
Expand All @@ -64,5 +65,6 @@
"TriggerType",
"ConnectionRetryPolicy",
"ThroughputProperties",
"FeedRange"
)
__version__ = VERSION
19 changes: 2 additions & 17 deletions sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,23 +284,8 @@ def GetHeaders( # pylint: disable=too-many-statements,too-many-branches
if options.get("disableRUPerMinuteUsage"):
headers[http_constants.HttpHeaders.DisableRUPerMinuteUsage] = options["disableRUPerMinuteUsage"]

if options.get("changeFeed") is True:
# On REST level, change feed is using IfNoneMatch/ETag instead of continuation.
if_none_match_value = None
if options.get("continuation"):
if_none_match_value = options["continuation"]
elif options.get("isStartFromBeginning") and not options["isStartFromBeginning"]:
if_none_match_value = "*"
elif options.get("startTime"):
start_time = options.get("startTime")
headers[http_constants.HttpHeaders.IfModified_since] = start_time
if if_none_match_value:
headers[http_constants.HttpHeaders.IfNoneMatch] = if_none_match_value

headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue
else:
if options.get("continuation"):
headers[http_constants.HttpHeaders.Continuation] = options["continuation"]
if options.get("continuation"):
headers[http_constants.HttpHeaders.Continuation] = options["continuation"]

if options.get("populatePartitionKeyRangeStatistics"):
headers[http_constants.HttpHeaders.PopulatePartitionKeyRangeStatistics] = options[
Expand Down
20 changes: 20 additions & 0 deletions sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# The MIT License (MIT)
# Copyright (c) 2014 Microsoft Corporation

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
20 changes: 20 additions & 0 deletions sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# The MIT License (MIT)
# Copyright (c) 2014 Microsoft Corporation

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
# The MIT License (MIT)
# Copyright (c) 2014 Microsoft Corporation

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""Internal class for processing change feed implementation in the Azure Cosmos
database service.
"""
import base64
import json
from abc import ABC, abstractmethod
from typing import Dict, Any, List, Callable, Tuple, Awaitable, cast

from azure.cosmos import http_constants, exceptions
from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromType
from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV2, ChangeFeedStateVersion
from azure.cosmos.aio import _retry_utility_async
from azure.cosmos.exceptions import CosmosHttpResponseError

# pylint: disable=protected-access

class ChangeFeedFetcher(ABC):

@abstractmethod
async def fetch_next_block(self) -> List[Dict[str, Any]]:
pass

class ChangeFeedFetcherV1(ChangeFeedFetcher):
"""Internal class for change feed fetch v1 implementation.
This is used when partition key range id is used or when the supplied continuation token is in just simple etag.
Please note v1 does not support split or merge.
"""
def __init__(
self,
client,
resource_link: str,
feed_options: Dict[str, Any],
fetch_function: Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]]
) -> None:

self._client = client
self._feed_options = feed_options

self._change_feed_state = self._feed_options.pop("changeFeedState")
if self._change_feed_state.version != ChangeFeedStateVersion.V1:
raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version"
f" {type(self._change_feed_state)}")

self._resource_link = resource_link
self._fetch_function = fetch_function

async def fetch_next_block(self) -> List[Dict[str, Any]]:
"""Returns a block of results.
:return: List of results.
:rtype: list
"""
async def callback():
return await self.fetch_change_feed_items()

return await _retry_utility_async.ExecuteAsync(self._client, self._client._global_endpoint_manager, callback)

async def fetch_change_feed_items(self) -> List[Dict[str, Any]]:
self._feed_options["changeFeedState"] = self._change_feed_state

self._change_feed_state.populate_feed_options(self._feed_options)
is_s_time_first_fetch = self._change_feed_state._continuation is None
while True:
(fetched_items, response_headers) = await self._fetch_function(self._feed_options)
continuation_key = http_constants.HttpHeaders.ETag
# In change feed queries, the continuation token is always populated. The hasNext() test is whether
# there is any items in the response or not.
self._change_feed_state.apply_server_response_continuation(
cast(str, response_headers.get(continuation_key)),
bool(fetched_items))

if fetched_items:
break

# When processing from point in time, there will be no initial results being returned,
# so we will retry with the new continuation token again
if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME
and is_s_time_first_fetch):
is_s_time_first_fetch = False
else:
break
return fetched_items


class ChangeFeedFetcherV2(object):
"""Internal class for change feed fetch v2 implementation.
"""

def __init__(
self,
client,
resource_link: str,
feed_options: Dict[str, Any],
fetch_function: Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]]
) -> None:

self._client = client
self._feed_options = feed_options

self._change_feed_state: ChangeFeedStateV2 = self._feed_options.pop("changeFeedState")
if self._change_feed_state.version != ChangeFeedStateVersion.V2:
raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version "
f"{type(self._change_feed_state.version)}")

self._resource_link = resource_link
self._fetch_function = fetch_function

async def fetch_next_block(self) -> List[Dict[str, Any]]:
"""Returns a block of results.
:return: List of results.
:rtype: list
"""

async def callback():
return await self.fetch_change_feed_items()

try:
return await _retry_utility_async.ExecuteAsync(
self._client,
self._client._global_endpoint_manager,
callback)
except CosmosHttpResponseError as e:
if exceptions._partition_range_is_gone(e) or exceptions._is_partition_split_or_merge(e):
# refresh change feed state
await self._change_feed_state.handle_feed_range_gone_async(
self._client._routing_map_provider,
self._resource_link)
else:
raise e

return await self.fetch_next_block()

async def fetch_change_feed_items(self) -> List[Dict[str, Any]]:
self._feed_options["changeFeedState"] = self._change_feed_state

self._change_feed_state.populate_feed_options(self._feed_options)

is_s_time_first_fetch = True
while True:
(fetched_items, response_headers) = await self._fetch_function(self._feed_options)

continuation_key = http_constants.HttpHeaders.ETag
# In change feed queries, the continuation token is always populated. The hasNext() test is whether
# there is any items in the response or not.

self._change_feed_state.apply_server_response_continuation(
cast(str, response_headers.get(continuation_key)),
bool(fetched_items))

if fetched_items:
self._change_feed_state._continuation._move_to_next_token()
response_headers[continuation_key] = self._get_base64_encoded_continuation()
break

# when there is no items being returned, we will decide to retry based on:
# 1. When processing from point in time, there will be no initial results being returned,
# so we will retry with the new continuation token
# 2. if the feed range of the changeFeedState span multiple physical partitions
# then we will read from the next feed range until we have looped through all physical partitions
if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME
and is_s_time_first_fetch):
response_headers[continuation_key] = self._get_base64_encoded_continuation()
is_s_time_first_fetch = False
should_retry = True
else:
self._change_feed_state._continuation._move_to_next_token()
response_headers[continuation_key] = self._get_base64_encoded_continuation()
should_retry = self._change_feed_state.should_retry_on_not_modified_response()
is_s_time_first_fetch = False

if not should_retry:
break

return fetched_items

def _get_base64_encoded_continuation(self) -> str:
continuation_json = json.dumps(self._change_feed_state.to_dict())
json_bytes = continuation_json.encode('utf-8')
# Encode the bytes to a Base64 string
base64_bytes = base64.b64encode(json_bytes)
# Convert the Base64 bytes to a string
return base64_bytes.decode('utf-8')
Loading

0 comments on commit 4a136a4

Please sign in to comment.