Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Storage] Remove batch delete_files() #24770

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion sdk/storage/azure-storage-file-datalake/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
- Added support for service version 2021-08-06
- Added support for `owner`, `group`, `acl`, `lease_id`, `lease_duration` to both file and directory `create` APIs
- Added support for `expiry_options`, `expires_on` to file `create` APIs
- Added `delete_files()` API

## 12.7.0 (2022-05-09)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,20 @@ def deserialize_metadata(response, obj, headers): # pylint: disable=unused-argu
return {k[10:]: v for k, v in raw_metadata.items()}


def _decode_error(response, error_message=None, serialized_error=None):
def process_storage_error(storage_error): # pylint:disable=too-many-statements
raise_error = HttpResponseError
error_code = response.headers.get('x-ms-error-code')
serialized = False
if not storage_error.response:
raise storage_error
# If it is one of those three then it has been serialized prior by the generated layer.
if isinstance(storage_error, (ResourceNotFoundError, ClientAuthenticationError, ResourceExistsError)):
serialized = True
error_code = storage_error.response.headers.get('x-ms-error-code')
error_message = storage_error.message
additional_data = {}
error_dict = {}
try:
error_body = ContentDecodePolicy.deserialize_from_http_generics(response)
error_body = ContentDecodePolicy.deserialize_from_http_generics(storage_error.response)
# If it is an XML response
if isinstance(error_body, Element):
error_dict = {
Expand All @@ -145,13 +152,9 @@ def _decode_error(response, error_message=None, serialized_error=None):
except DecodeError:
pass

# Convert blob errors to datalake errors
if error_code in [StorageErrorCode.blob_not_found]:
error_code = StorageErrorCode.path_not_found

try:
# This check would be unnecessary if we have already serialized the error.
if error_code and not serialized_error:
if error_code and not serialized:
error_code = StorageErrorCode(error_code)
if error_code in [StorageErrorCode.condition_not_met]:
raise_error = ResourceModifiedError
Expand Down Expand Up @@ -195,25 +198,17 @@ def _decode_error(response, error_message=None, serialized_error=None):
for name, info in additional_data.items():
error_message += "\n{}:{}".format(name, info)

if serialized_error:
serialized_error.message = error_message
error = serialized_error
# No need to create an instance if it has already been serialized by the generated layer
if serialized:
storage_error.message = error_message
error = storage_error
else:
error = raise_error(message=error_message, response=response)
error = raise_error(message=error_message, response=storage_error.response)
# Ensure these properties are stored in the error instance as well (not just the error message)
error.error_code = error_code
error.additional_info = additional_data
# error.args is what's surfaced on the traceback - show error message in all cases
error.args = (error.message,)
return error


def process_storage_error(storage_error):
if not storage_error.response:
raise storage_error
# If it is one of those three then it has been serialized prior by the generated layer.
serialized = isinstance(storage_error, (ResourceNotFoundError, ClientAuthenticationError, ResourceExistsError))
error = _decode_error(storage_error.response, storage_error.message, storage_error if serialized else None)

try:
# `from None` prevents us from double printing the exception (suppresses generated layer error context)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# --------------------------------------------------------------------------
# pylint: disable=too-many-lines
import functools
from typing import Any, Dict, List, Optional, Type, TypeVar, Union, TYPE_CHECKING
from typing import Any, Dict, Optional, Type, TypeVar, Union, TYPE_CHECKING

try:
from urllib.parse import urlparse, quote, unquote
Expand All @@ -28,7 +28,7 @@
from ._data_lake_lease import DataLakeLeaseClient
from ._generated import AzureDataLakeStorageRESTAPI
from ._generated.models import ListBlobsIncludeItem
from ._deserialize import _decode_error, process_storage_error, is_file_path
from ._deserialize import process_storage_error, is_file_path

if TYPE_CHECKING:
from datetime import datetime
Expand Down Expand Up @@ -865,61 +865,6 @@ def _get_root_directory_client(self):
"""
return self.get_directory_client('/')

def delete_files(
self,
*files: str,
**kwargs) -> List[Optional[HttpResponseError]]:
"""Marks the specified files or empty directories for deletion.

The files/empty directories are later deleted during garbage collection.

If a delete retention policy is enabled for the service, then this operation soft deletes the
files/empty directories and retains the files or snapshots for specified number of days.
After specified number of days, files' data is removed from the service during garbage collection.
Soft deleted files/empty directories are accessible through :func:`list_deleted_paths()`.

:param str files:
The files/empty directories to delete. This can be a single file/empty directory, or multiple values can
be supplied, where each value is the name of the file/directory (str).

:keyword ~datetime.datetime if_modified_since:
A DateTime value. Azure expects the date value passed in to be UTC.
If timezone is included, any non-UTC datetimes will be converted to UTC.
If a date is passed in without timezone info, it is assumed to be UTC.
Specify this header to perform the operation only
if the resource has been modified since the specified time.
:keyword ~datetime.datetime if_unmodified_since:
A DateTime value. Azure expects the date value passed in to be UTC.
If timezone is included, any non-UTC datetimes will be converted to UTC.
If a date is passed in without timezone info, it is assumed to be UTC.
Specify this header to perform the operation only if
the resource has not been modified since the specified date/time.
:keyword int timeout:
The timeout parameter is expressed in seconds.
:return: A list containing None for successful operations and
HttpResponseError objects for unsuccessful operations.
:rtype: List[Optional[HttpResponseError]]

.. admonition:: Example:

.. literalinclude:: ../samples/datalake_samples_file_system_async.py
:start-after: [START batch_delete_files_or_empty_directories]
:end-before: [END batch_delete_files_or_empty_directories]
:language: python
:dedent: 4
:caption: Deleting multiple files or empty directories.
"""
results = self._container_client.delete_blobs(raise_on_any_failure=False, *files, **kwargs)

errors = []
for result in results:
if not 200 <= result.status_code < 300:
errors.append(_decode_error(result, result.reason))
else:
errors.append(None)

return errors

def get_directory_client(self, directory # type: Union[DirectoryProperties, str]
):
# type: (...) -> DataLakeDirectoryClient
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from azure.core.tracing.decorator_async import distributed_trace_async
from azure.storage.blob.aio import ContainerClient
from .._serialize import get_api_version
from .._deserialize import _decode_error, process_storage_error, is_file_path
from .._deserialize import process_storage_error, is_file_path
from .._generated.models import ListBlobsIncludeItem

from ._data_lake_file_client_async import DataLakeFileClient
Expand Down Expand Up @@ -764,62 +764,6 @@ async def delete_file(self, file, # type: Union[FileProperties, str]
await file_client.delete_file(**kwargs)
return file_client

@distributed_trace_async
async def delete_files(
self,
*files: str,
**kwargs) -> List[Optional[HttpResponseError]]:
"""Marks the specified files or empty directories for deletion.

The files/empty directories are later deleted during garbage collection.

If a delete retention policy is enabled for the service, then this operation soft deletes the
files/empty directories and retains the files or snapshots for specified number of days.
After specified number of days, files' data is removed from the service during garbage collection.
Soft deleted files/empty directories are accessible through :func:`list_deleted_paths()`.

:param str files:
The files/empty directories to delete. This can be a single file/empty directory, or multiple values can
be supplied, where each value is the name of the file/directory (str).

:keyword ~datetime.datetime if_modified_since:
A DateTime value. Azure expects the date value passed in to be UTC.
If timezone is included, any non-UTC datetimes will be converted to UTC.
If a date is passed in without timezone info, it is assumed to be UTC.
Specify this header to perform the operation only
if the resource has been modified since the specified time.
:keyword ~datetime.datetime if_unmodified_since:
A DateTime value. Azure expects the date value passed in to be UTC.
If timezone is included, any non-UTC datetimes will be converted to UTC.
If a date is passed in without timezone info, it is assumed to be UTC.
Specify this header to perform the operation only if
the resource has not been modified since the specified date/time.
:keyword int timeout:
The timeout parameter is expressed in seconds.
:return: A list containing None for successful operations and
HttpResponseError objects for unsuccessful operations.
:rtype: List[Optional[HttpResponseError]]

.. admonition:: Example:

.. literalinclude:: ../samples/datalake_samples_file_system_async.py
:start-after: [START batch_delete_files_or_empty_directories]
:end-before: [END batch_delete_files_or_empty_directories]
:language: python
:dedent: 4
:caption: Deleting multiple files or empty directories.
"""
response = await self._container_client.delete_blobs(raise_on_any_failure=False, *files, **kwargs)

errors = []
async for result in response:
if not 200 <= result.status_code < 300:
errors.append(_decode_error(result, result.reason))
else:
errors.append(None)

return errors

@distributed_trace_async
async def _undelete_path(self, deleted_path_name, deletion_id, **kwargs):
# type: (str, str, **Any) -> Union[DataLakeDirectoryClient, DataLakeFileClient]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,49 +209,6 @@ def create_file_from_file_system(self):

file_system_client.delete_file_system()

# [START batch_delete_files_or_empty_directories]
def batch_delete_files_or_empty_directories(self):
from azure.storage.filedatalake import FileSystemClient
file_system_client = FileSystemClient.from_connection_string(self.connection_string, "filesystem")

file_system_client.create_file_system()

data = b'hello world'

try:
# create file1
file_system_client.get_file_client('file1').upload_data(data, overwrite=True)

# create file2, then pass file properties in batch delete later
file2 = file_system_client.get_file_client('file2')
file2.upload_data(data, overwrite=True)

# create file3 and batch delete it later only etag matches this file3 etag
file3 = file_system_client.get_file_client('file3')
file3.upload_data(data, overwrite=True)

# create dir1. Empty directory can be deleted using delete_files
file_system_client.get_directory_client('dir1').create_directory()
file_system_client.get_directory_client('dir1').create_file('file4')

except:
pass

response = file_system_client.delete_files(
'file1',
'file2',
'file3',
'dir1', # dir1 is not empty
'dir8', # dir8 doesn't exist
)
print("Total number of sub-responses: " + len(response) + "\n")
print("First failure error code: " + response[3].error_code + "\n")
print("First failure status code: " + response[3].status_code + "\n")
print("Second failure error code: " + response[4].error_code + "\n")
print("Second failure status code: " + response[4].status_code + "\n")
# [END batch_delete_files_or_empty_directories]


if __name__ == '__main__':
sample = FileSystemSamples()
sample.file_system_sample()
Expand All @@ -260,4 +217,3 @@ def batch_delete_files_or_empty_directories(self):
sample.list_paths_in_file_system()
sample.get_file_client_from_file_system()
sample.create_file_from_file_system()
sample.batch_delete_files_or_empty_directories()
Original file line number Diff line number Diff line change
Expand Up @@ -216,50 +216,6 @@ async def create_file_from_file_system(self):
# [END delete_directory_from_file_system]

await file_system_client.delete_file_system()

# [START batch_delete_files_or_empty_directories]
async def batch_delete_files_or_empty_directories(self):
from azure.storage.filedatalake import FileSystemClient
file_system_client = FileSystemClient.from_connection_string(self.connection_string, "filesystem")

async with file_system_client:
await file_system_client.create_file_system()

data = b'hello world'

try:
# create file1
await file_system_client.get_file_client('file1').upload_data(data, overwrite=True)

# create file2, then pass file properties in batch delete later
file2 = file_system_client.get_file_client('file2')
await file2.upload_data(data, overwrite=True)

# create file3 and batch delete it later only etag matches this file3 etag
file3 = file_system_client.get_file_client('file3')
await file3.upload_data(data, overwrite=True)

# create dir1. Empty directory can be deleted using delete_files
await file_system_client.get_directory_client('dir1').create_directory()
await file_system_client.get_directory_client('dir1').create_file('file4')

except:
pass

response = await file_system_client.delete_files(
'file1',
'file2',
'file3',
'dir1', # dir1 is not empty
'dir8', # dir8 doesn't exist
)
print("Total number of sub-responses: " + len(response) + "\n")
print("First failure error code: " + response[3].error_code + "\n")
print("First failure status code: " + response[3].status_code + "\n")
print("Second failure error code: " + response[4].error_code + "\n")
print("Second failure status code: " + response[4].status_code + "\n")
# [END batch_delete_files_or_empty_directories]


async def run():
sample = FileSystemSamplesAsync()
Expand All @@ -269,7 +225,6 @@ async def run():
await sample.list_paths_in_file_system()
await sample.get_file_client_from_file_system()
await sample.create_file_from_file_system()
await sample.batch_delete_files_or_empty_directories()

if __name__ == '__main__':
loop = asyncio.get_event_loop()
Expand Down
Loading