Skip to content

Commit

Permalink
Merge pull request #21 from chrisiou/support-azure-prefixes
Browse files Browse the repository at this point in the history
Support az:// scheme
  • Loading branch information
samansmink authored Nov 7, 2023
2 parents 815e6e3 + cfc5bf9 commit e705776
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/Linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ jobs:
aarch64_cross_compile: 1

- name: Setup vcpkg
uses: lukka/run-vcpkg@v11
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: 9edb1b8e590cc086563301d735cae4b6e732d2d2

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ concurrency:
jobs:
duckdb-stable-build:
name: Build extension binaries
uses: duckdb/duckdb/.github/workflows/_extension_distribution.yml@v0.9.1
uses: duckdb/duckdb/.github/workflows/_extension_distribution.yml@60ddc316ca0c1585f14d55aa73f9db59d8fc05d1
with:
duckdb_version: v0.9.1
extension_name: azure
Expand Down
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES})
set(PARAMETERS "-warnings")
build_loadable_extension(${TARGET_NAME} ${PARAMETERS} ${EXTENSION_SOURCES})

find_package(azure-identity-cpp CONFIG REQUIRED)
find_package(azure-storage-blobs-cpp CONFIG REQUIRED)
find_package(azure-identity-cpp CONFIG)
find_package(azure-storage-blobs-cpp CONFIG)

if(NOT ${azure-identity-cpp_FOUND} OR NOT ${azure-storage-blobs-cpp_FOUND})
message(FATAL_ERROR "Azure SDK not found, did you set up vcpkg correctly?")
endif()

# Static lib
target_link_libraries(${EXTENSION_NAME} Azure::azure-identity
Expand Down
19 changes: 10 additions & 9 deletions src/azure_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,8 @@ time_t AzureStorageFileSystem::GetLastModifiedTime(FileHandle &handle) {
return afh.last_modified;
}

// TODO: this is currently a bit weird: it should be az:// but that shit dont work
bool AzureStorageFileSystem::CanHandleFile(const string &fpath) {
return fpath.rfind("azure://", 0) == 0;
return fpath.rfind("azure://", 0) * fpath.rfind("az://", 0) == 0;
}

void AzureStorageFileSystem::Seek(FileHandle &handle, idx_t location) {
Expand Down Expand Up @@ -273,7 +272,7 @@ vector<string> AzureStorageFileSystem::Glob(const string &path, FileOpener *open
bool is_match = Match(key_splits.begin(), key_splits.end(), pattern_splits.begin(), pattern_splits.end());

if (is_match) {
auto result_full_url = "azure://" + azure_url.container + "/" + key.Name;
auto result_full_url = azure_url.prefix + azure_url.container + "/" + key.Name;
result.push_back(result_full_url);
}
}
Expand Down Expand Up @@ -376,22 +375,24 @@ void AzureStorageFileSystem::ReadRange(FileHandle &handle, idx_t file_offset, ch
}

AzureParsedUrl AzureStorageFileSystem::ParseUrl(const string &url) {
string container, path;
string container, prefix, path;

if (url.rfind("azure://", 0) != 0) {
throw IOException("URL needs to start with s3://");
if (url.rfind("azure://", 0) * url.rfind("az://", 0) != 0) {
throw IOException("URL needs to start with azure:// or az://");
}
auto slash_pos = url.find('/', 8);
auto prefix_end_pos = url.find("//") + 2;
auto slash_pos = url.find('/', prefix_end_pos);
if (slash_pos == string::npos) {
throw IOException("URL needs to contain a '/' after the host");
}
container = url.substr(8, slash_pos - 8);
container = url.substr(prefix_end_pos, slash_pos - prefix_end_pos);
if (container.empty()) {
throw IOException("URL needs to contain a bucket name");
}

prefix = url.substr(0, prefix_end_pos);
path = url.substr(slash_pos + 1);
return {container, path};
return {container, prefix, path};
}

void AzureExtension::Load(DuckDB &db) {
Expand Down
1 change: 1 addition & 0 deletions src/include/azure_extension.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ struct AzureAuthentication {

struct AzureParsedUrl {
string container;
string prefix;
string path;
};

Expand Down
18 changes: 13 additions & 5 deletions test/sql/azure.test
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ require parquet
require-env AZURE_STORAGE_CONNECTION_STRING

# We need a connection string to do requests
foreach prefix azure:// az://

statement error
SELECT sum(l_orderkey) FROM 'azure://testing-private/l.parquet';
SELECT sum(l_orderkey) FROM '${prefix}testing-private/l.parquet';
----
Invalid Input Error: No valid Azure credentials found

Expand All @@ -21,17 +23,23 @@ SET azure_storage_connection_string = '${AZURE_STORAGE_CONNECTION_STRING}';

# Read a column from a parquet file
query I
SELECT sum(l_orderkey) FROM 'azure://testing-private/l.parquet';
SELECT sum(l_orderkey) FROM '${prefix}testing-private/l.parquet';
----
1802759573

# Read from a csv file with no header
query I
SELECT count(*) FROM 'azure://testing-private/lineitem.csv';
SELECT count(*) FROM '${prefix}testing-private/lineitem.csv';
----
60175

query I
SELECT count(*) FROM 'azure://testing-private/l.csv';
SELECT count(*) FROM '${prefix}testing-private/l.csv';
----
60175
60175

# Unset the connection string var
statement ok
SET azure_storage_connection_string = '';

endloop
16 changes: 16 additions & 0 deletions test/sql/azure_glob.test
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,19 @@ query I
SELECT * from GLOB("azure://testing-public/lineitem.*") order by file;
----
azure://testing-public/lineitem.csv

# Testing private blobs with az:// prefix
query I
SELECT * from GLOB("az://testing-private/*.*") order by file;
----
az://testing-private/l.csv
az://testing-private/l.parquet
az://testing-private/lineitem.csv

# Testing public blobs with az:// prefix
query I
SELECT * from GLOB("az://testing-public/*.*") order by file;
----
az://testing-public/l.csv
az://testing-public/l.parquet
az://testing-public/lineitem.csv

0 comments on commit e705776

Please sign in to comment.