Skip to content

Commit

Permalink
version 0.7.1-1
Browse files Browse the repository at this point in the history
  • Loading branch information
hannes authored and cran-robot committed Mar 1, 2023
1 parent c30a7e5 commit fb18a2d
Show file tree
Hide file tree
Showing 181 changed files with 20,357 additions and 19,627 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: duckdb
Title: DBI Package for the DuckDB Database Management System
Version: 0.7.0
Version: 0.7.1-1
Authors@R:
c(person(given = "Hannes",
family = "Mühleisen",
Expand Down Expand Up @@ -49,7 +49,7 @@ Suggests: arrow, bit64, callr, DBItest, dplyr, dbplyr, rlang, testthat,
Encoding: UTF-8
RoxygenNote: 7.2.3
NeedsCompilation: yes
Packaged: 2023-02-13 19:23:23 UTC; hannes
Packaged: 2023-03-01 09:31:23 UTC; hannes
Author: Hannes Mühleisen [aut, cre] (<https://orcid.org/0000-0001-8552-0029>),
Mark Raasveldt [aut] (<https://orcid.org/0000-0001-5005-6844>),
Stichting DuckDB Foundation [cph],
Expand Down Expand Up @@ -78,4 +78,4 @@ Author: Hannes Mühleisen [aut, cre] (<https://orcid.org/0000-0001-8552-0029>),
PCG Project contributors [cph]
Maintainer: Hannes Mühleisen <hannes@cwi.nl>
Repository: CRAN
Date/Publication: 2023-02-14 10:00:10 UTC
Date/Publication: 2023-03-01 12:50:02 UTC
350 changes: 179 additions & 171 deletions MD5

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions R/Driver.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ duckdb <- function(dbdir = DBDIR_MEMORY, read_only = FALSE, bigint = "numeric",
stop(paste("Unsupported bigint configuration", bigint))
)

# R packages are not allowed to write extensions into home directory, so use R_user_dir instead
if (!("extension_directory" %in% names(config))) {
config["extension_directory"] <- tools::R_user_dir("duckdb", "data")
}

new(
"duckdb_driver",
database_ref = rapi_startup(dbdir, read_only, config),
Expand Down
8 changes: 4 additions & 4 deletions R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,12 @@ rapi_release <- function(stmt) {
invisible(.Call(`_duckdb_rapi_release`, stmt))
}

rapi_get_substrait <- function(conn, query) {
.Call(`_duckdb_rapi_get_substrait`, conn, query)
rapi_get_substrait <- function(conn, query, enable_optimizer) {
.Call(`_duckdb_rapi_get_substrait`, conn, query, enable_optimizer)
}

rapi_get_substrait_json <- function(conn, query) {
.Call(`_duckdb_rapi_get_substrait_json`, conn, query)
rapi_get_substrait_json <- function(conn, query, enable_optimizer) {
.Call(`_duckdb_rapi_get_substrait_json`, conn, query, enable_optimizer)
}

rapi_prepare_substrait <- function(conn, query) {
Expand Down
19 changes: 19 additions & 0 deletions R/sql.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
sql <- function(sql, conn = default_connection()) {
stopifnot(dbIsValid(conn))
dbGetQuery(conn, sql)
}

default_duckdb_connection <- new.env(parent=emptyenv())

default_connection <- function() {
if(!exists("con", default_duckdb_connection)) {
con <- DBI::dbConnect(duckdb::duckdb())

default_duckdb_connection$con <- con

reg.finalizer(default_duckdb_connection, onexit = TRUE, function(e) {
DBI::dbDisconnect(e$con, shutdown = TRUE)
})
}
default_duckdb_connection$con
}
11 changes: 6 additions & 5 deletions R/substrait.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,25 @@ duckdb_prepare_substrait_json <- function(conn, json, arrow = FALSE) {
#'
#' @param conn A DuckDB connection, created by `dbConnect()`.
#' @param query The query string in SQL
#' @param enable_optimizer Optional parameter to enable/disable query-optimizer. By default optimizer is enabled.
#' @return A raw vector containing the substrait protobuf blob
#' @export
duckdb_get_substrait <- function(conn, query) {
duckdb_get_substrait <- function(conn, query, enable_optimizer = TRUE) {
stopifnot(dbIsValid(conn))
stopifnot(is.character(query))
rapi_get_substrait(conn@conn_ref, query)
rapi_get_substrait(conn@conn_ref, query, enable_optimizer)
}


#' Get the Substrait plan for a SQL query in the JSON format
#' Transforms a SQL query into a vector containing the serialized Substrait query JSON
#'
#' @param conn A DuckDB connection, created by `dbConnect()`.
#' @param query The query string in SQL
#' @param enable_optimizer Optional parameter to enable/disable query-optimizer. By default optimizer is enabled.
#' @return A vector containing the substrait protobuf JSON
#' @export
duckdb_get_substrait_json <- function(conn, query) {
duckdb_get_substrait_json <- function(conn, query, enable_optimizer = TRUE) {
stopifnot(dbIsValid(conn))
stopifnot(is.character(query))
rapi_get_substrait_json(conn@conn_ref, query)
rapi_get_substrait_json(conn@conn_ref, query, enable_optimizer)
}
4 changes: 2 additions & 2 deletions inst/include/cpp11/external_pointer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ class external_pointer {

external_pointer(SEXP data) : data_(valid_type(data)) {}

external_pointer(pointer p, bool use_deleter = true, bool finalize_on_exit = true)
: data_(safe[R_MakeExternalPtr]((void*)p, R_NilValue, R_NilValue)) {
external_pointer(pointer p, bool use_deleter = true, bool finalize_on_exit = true, SEXP prot = R_NilValue)
: data_(safe[R_MakeExternalPtr]((void*)p, R_NilValue, prot)) {
if (use_deleter) {
R_RegisterCFinalizerEx(data_, r_deleter, static_cast<r_bool>(finalize_on_exit));
}
Expand Down
4 changes: 3 additions & 1 deletion man/duckdb_get_substrait.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/duckdb_get_substrait_json.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,17 +266,17 @@ extern "C" SEXP _duckdb_rapi_release(SEXP stmt) {
END_CPP11
}
// statement.cpp
SEXP rapi_get_substrait(duckdb::conn_eptr_t conn, std::string query);
extern "C" SEXP _duckdb_rapi_get_substrait(SEXP conn, SEXP query) {
SEXP rapi_get_substrait(duckdb::conn_eptr_t conn, std::string query, bool enable_optimizer);
extern "C" SEXP _duckdb_rapi_get_substrait(SEXP conn, SEXP query, SEXP enable_optimizer) {
BEGIN_CPP11
return cpp11::as_sexp(rapi_get_substrait(cpp11::as_cpp<cpp11::decay_t<duckdb::conn_eptr_t>>(conn), cpp11::as_cpp<cpp11::decay_t<std::string>>(query)));
return cpp11::as_sexp(rapi_get_substrait(cpp11::as_cpp<cpp11::decay_t<duckdb::conn_eptr_t>>(conn), cpp11::as_cpp<cpp11::decay_t<std::string>>(query), cpp11::as_cpp<cpp11::decay_t<bool>>(enable_optimizer)));
END_CPP11
}
// statement.cpp
SEXP rapi_get_substrait_json(duckdb::conn_eptr_t conn, std::string query);
extern "C" SEXP _duckdb_rapi_get_substrait_json(SEXP conn, SEXP query) {
SEXP rapi_get_substrait_json(duckdb::conn_eptr_t conn, std::string query, bool enable_optimizer);
extern "C" SEXP _duckdb_rapi_get_substrait_json(SEXP conn, SEXP query, SEXP enable_optimizer) {
BEGIN_CPP11
return cpp11::as_sexp(rapi_get_substrait_json(cpp11::as_cpp<cpp11::decay_t<duckdb::conn_eptr_t>>(conn), cpp11::as_cpp<cpp11::decay_t<std::string>>(query)));
return cpp11::as_sexp(rapi_get_substrait_json(cpp11::as_cpp<cpp11::decay_t<duckdb::conn_eptr_t>>(conn), cpp11::as_cpp<cpp11::decay_t<std::string>>(query), cpp11::as_cpp<cpp11::decay_t<bool>>(enable_optimizer)));
END_CPP11
}
// statement.cpp
Expand Down Expand Up @@ -349,8 +349,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_duckdb_rapi_expr_reference", (DL_FUNC) &_duckdb_rapi_expr_reference, 2},
{"_duckdb_rapi_expr_set_alias", (DL_FUNC) &_duckdb_rapi_expr_set_alias, 2},
{"_duckdb_rapi_expr_tostring", (DL_FUNC) &_duckdb_rapi_expr_tostring, 1},
{"_duckdb_rapi_get_substrait", (DL_FUNC) &_duckdb_rapi_get_substrait, 2},
{"_duckdb_rapi_get_substrait_json", (DL_FUNC) &_duckdb_rapi_get_substrait_json, 2},
{"_duckdb_rapi_get_substrait", (DL_FUNC) &_duckdb_rapi_get_substrait, 3},
{"_duckdb_rapi_get_substrait_json", (DL_FUNC) &_duckdb_rapi_get_substrait_json, 3},
{"_duckdb_rapi_prepare", (DL_FUNC) &_duckdb_rapi_prepare, 2},
{"_duckdb_rapi_prepare_substrait", (DL_FUNC) &_duckdb_rapi_prepare_substrait, 2},
{"_duckdb_rapi_prepare_substrait_json", (DL_FUNC) &_duckdb_rapi_prepare_substrait_json, 2},
Expand Down
7 changes: 7 additions & 0 deletions src/duckdb/extension/parquet/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ void StringColumnReader::PrepareDeltaLengthByteArray(ResizeableBuffer &buffer) {
}
auto length_data = (uint32_t *)length_buffer->ptr;
byte_array_data = make_unique<Vector>(LogicalType::VARCHAR, value_count);
byte_array_count = value_count;
auto string_data = FlatVector::GetData<string_t>(*byte_array_data);
for (idx_t i = 0; i < value_count; i++) {
auto str_len = length_data[i];
Expand All @@ -615,6 +616,7 @@ void StringColumnReader::PrepareDeltaByteArray(ResizeableBuffer &buffer) {
auto prefix_data = (uint32_t *)prefix_buffer->ptr;
auto suffix_data = (uint32_t *)suffix_buffer->ptr;
byte_array_data = make_unique<Vector>(LogicalType::VARCHAR, prefix_count);
byte_array_count = prefix_count;
auto string_data = FlatVector::GetData<string_t>(*byte_array_data);
for (idx_t i = 0; i < prefix_count; i++) {
auto str_len = prefix_data[i] + suffix_data[i];
Expand Down Expand Up @@ -646,6 +648,11 @@ void StringColumnReader::DeltaByteArray(uint8_t *defines, idx_t num_values, parq
continue;
}
if (filter[row_idx + result_offset]) {
if (delta_offset >= byte_array_count) {
throw IOException("DELTA_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted "
"read of %d from %d entries) - corrupt file?",
delta_offset + 1, byte_array_count);
}
result_ptr[row_idx + result_offset] = string_data[delta_offset++];
} else {
delta_offset++;
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/extension/parquet/include/column_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ class ColumnReader {
ParquetReader &reader;
LogicalType type;
unique_ptr<Vector> byte_array_data;
idx_t byte_array_count = 0;

idx_t pending_skips = 0;

Expand Down
11 changes: 2 additions & 9 deletions src/duckdb/extension/parquet/parquet-extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,7 @@ class ParquetScanFunction {
}

FileSystem &fs = FileSystem::GetFileSystem(context);
auto files = fs.Glob(info.file_path, context);
if (files.empty()) {
throw IOException("No files found that match the pattern \"%s\"", info.file_path);
}
auto files = fs.GlobFiles(info.file_path, context);

// The most likely path (Parquet read without union by name option)
if (!parquet_options.union_by_name) {
Expand Down Expand Up @@ -362,11 +359,7 @@ class ParquetScanFunction {
}

static vector<string> ParquetGlob(FileSystem &fs, const string &glob, ClientContext &context) {
auto files = fs.Glob(glob, FileSystem::GetFileOpener(context));
if (files.empty()) {
throw IOException("No files found that match the pattern \"%s\"", glob);
}
return files;
return fs.GlobFiles(glob, context);
}

static unique_ptr<FunctionData> ParquetScanBind(ClientContext &context, TableFunctionBindInput &input,
Expand Down
75 changes: 62 additions & 13 deletions src/duckdb/src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "duckdb/catalog/catalog_search_path.hpp"
#include "duckdb/catalog/catalog_entry/list.hpp"
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
#include "duckdb/catalog/catalog_set.hpp"
#include "duckdb/catalog/default/default_schemas.hpp"
#include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
Expand All @@ -26,7 +27,7 @@
#include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
#include "duckdb/planner/binder.hpp"
#include "duckdb/catalog/default/default_types.hpp"
#include "duckdb/main/extension_functions.hpp"
#include "duckdb/main/extension_entries.hpp"
#include "duckdb/main/connection.hpp"
#include "duckdb/main/attached_database.hpp"
#include "duckdb/main/database_manager.hpp"
Expand Down Expand Up @@ -251,6 +252,20 @@ CatalogEntry *Catalog::CreateCollation(CatalogTransaction transaction, SchemaCat
return schema->CreateCollation(transaction, info);
}

//===--------------------------------------------------------------------===//
// Index
//===--------------------------------------------------------------------===//
CatalogEntry *Catalog::CreateIndex(CatalogTransaction transaction, CreateIndexInfo *info) {
auto &context = transaction.GetContext();
return CreateIndex(context, info);
}

CatalogEntry *Catalog::CreateIndex(ClientContext &context, CreateIndexInfo *info) {
auto schema = GetSchema(context, info->schema);
auto table = GetEntry<TableCatalogEntry>(context, schema->name, info->table->table_name);
return schema->CreateIndex(context, info, table);
}

//===--------------------------------------------------------------------===//
// Lookup Structures
//===--------------------------------------------------------------------===//
Expand Down Expand Up @@ -317,17 +332,26 @@ SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const
return result;
}

string FindExtension(const string &function_name) {
auto size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionFunction);
auto it = std::lower_bound(
EXTENSION_FUNCTIONS, EXTENSION_FUNCTIONS + size, function_name,
[](const ExtensionFunction &element, const string &value) { return element.function < value; });
if (it != EXTENSION_FUNCTIONS + size && it->function == function_name) {
string FindExtensionGeneric(const string &name, const ExtensionEntry entries[], idx_t size) {
auto lcase = StringUtil::Lower(name);
auto it = std::lower_bound(entries, entries + size, lcase,
[](const ExtensionEntry &element, const string &value) { return element.name < value; });
if (it != entries + size && it->name == lcase) {
return it->extension;
}
return "";
}

string FindExtensionForFunction(const string &name) {
idx_t size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionEntry);
return FindExtensionGeneric(name, EXTENSION_FUNCTIONS, size);
}

string FindExtensionForSetting(const string &name) {
idx_t size = sizeof(EXTENSION_SETTINGS) / sizeof(ExtensionEntry);
return FindExtensionGeneric(name, EXTENSION_SETTINGS, size);
}

vector<CatalogSearchEntry> GetCatalogEntries(ClientContext &context, const string &catalog, const string &schema) {
vector<CatalogSearchEntry> entries;
auto &search_path = *context.client_data->catalog_search_path;
Expand Down Expand Up @@ -392,6 +416,26 @@ void FindMinimalQualification(ClientContext &context, const string &catalog_name
qualify_schema = true;
}

CatalogException Catalog::UnrecognizedConfigurationError(ClientContext &context, const string &name) {
// check if the setting exists in any extensions
auto extension_name = FindExtensionForSetting(name);
if (!extension_name.empty()) {
return CatalogException(
"Setting with name \"%s\" is not in the catalog, but it exists in the %s extension.\n\nTo "
"install and load the extension, run:\nINSTALL %s;\nLOAD %s;",
name, extension_name, extension_name, extension_name);
}
// the setting is not in an extension
// get a list of all options
vector<string> potential_names = DBConfig::GetOptionNames();
for (auto &entry : DBConfig::GetConfig(context).extension_parameters) {
potential_names.push_back(entry.first);
}

throw CatalogException("unrecognized configuration parameter \"%s\"\n%s", name,
StringUtil::CandidatesErrorMessage(potential_names, name, "Did you mean"));
}

CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name,
CatalogType type,
const unordered_set<SchemaCatalogEntry *> &schemas,
Expand All @@ -408,13 +452,18 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
unseen_schemas.insert(current_schema);
}
}
auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
auto extension_name = FindExtension(entry_name);
if (!extension_name.empty()) {
return CatalogException("Function with name %s is not on the catalog, but it exists in the %s extension. To "
"Install and Load the extension, run: INSTALL %s; LOAD %s;",
entry_name, extension_name, extension_name, extension_name);
// check if the entry exists in any extension
if (type == CatalogType::TABLE_FUNCTION_ENTRY || type == CatalogType::SCALAR_FUNCTION_ENTRY ||
type == CatalogType::AGGREGATE_FUNCTION_ENTRY) {
auto extension_name = FindExtensionForFunction(entry_name);
if (!extension_name.empty()) {
return CatalogException(
"Function with name \"%s\" is not in the catalog, but it exists in the %s extension.\n\nTo "
"install and load the extension, run:\nINSTALL %s;\nLOAD %s;",
entry_name, extension_name, extension_name, extension_name);
}
}
auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
string did_you_mean;
if (unseen_entry.Found() && unseen_entry.distance < entry.distance) {
// the closest matching entry requires qualification as it is not in the default search path
Expand Down
15 changes: 8 additions & 7 deletions src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ string IndexCatalogEntry::ToSQL() {
return sql;
}

void IndexCatalogEntry::Serialize(duckdb::MetaBlockWriter &serializer) {
// Here we serialize the index metadata in the following order:
// schema name, table name, index name, sql, index type, index constraint type, expression list.
// column_ids, unbound_expression
void IndexCatalogEntry::Serialize(Serializer &serializer) {
// here we serialize the index metadata in the following order:
// schema name, table name, index name, sql, index type, index constraint type, expression list, parsed expressions,
// column IDs

FieldWriter writer(serializer);
writer.WriteString(GetSchemaName());
writer.WriteString(GetTableName());
Expand All @@ -37,9 +38,9 @@ void IndexCatalogEntry::Serialize(duckdb::MetaBlockWriter &serializer) {
}

unique_ptr<CreateIndexInfo> IndexCatalogEntry::Deserialize(Deserializer &source, ClientContext &context) {
// Here we deserialize the index metadata in the following order:
// root block, root offset, schema name, table name, index name, sql, index type, index constraint type, expression
// list.
// here we deserialize the index metadata in the following order:
// schema name, table schema name, table name, index name, sql, index type, index constraint type, expression list,
// parsed expression list, column IDs

auto create_index_info = make_unique<CreateIndexInfo>();

Expand Down
Loading

0 comments on commit fb18a2d

Please sign in to comment.