From 0cfd0df3a5b790dd13d10561b23a9cc35b8954ee Mon Sep 17 00:00:00 2001 From: Jason Kim Date: Wed, 20 Nov 2019 11:05:33 -0800 Subject: [PATCH] [YSQL][#1199][#2496][#2842] FOR [NO KEY] UPDATE Summary: Enable `FOR UPDATE` and `FOR NO KEY UPDATE` row locking statements with the caveat that `FOR NO KEY UPDATE` behaves like `FOR UPDATE` (see follow-up issue #2922). Also, fix conflict detection such that `FOR SHARE` locks do not conflict with each other. Implement the fix as follows: 1. Pass down `FOR UPDATE` and `FOR NO KEY UPDATE` row locks through Protobufs. 1. Pass them through `PgsqlReadRequestPB.row_mark_type`. (Also, change the field from `repeated` to `optional`.) 1. Pass them through `KeyValueWriteBatchPB.row_mark_type`. (Also, change the field from `repeated` to `optional`.) 1. Add a row lock parameter to `docdb::GetStrongIntentTypeSet`, and permeate row lock information throughout the affected areas (fix issue #2842). Remove the isolation level hack in `tablet::Tablet::PrepareTransactionWriteBatch` in favor of using row lock information (fix issue #2496). 1. Create a new value type `kRowLock` to be placed in the value of intents for row locking. Handle this value type in `docdb::(anonymous namespace)::DecodeStrongWriteIntent` (for in-transaction reads) and `docdb::IntentToWriteRequest` (for transaction commits). 1. Create tests, specified in the test plan. Also, do the following: * Create new files defining helper functions related to row locks (row marks). * `src/yb/common/row_mark.cc` * `src/yb/common/row_mark.h` * Prevent `ROW_MARK_REFERENCE` and `ROW_MARK_COPY` from getting passed down as the `rowmark` execution parameter. * Update regress test `yb_pg_privileges` (java test `TestPgRegressAuthorization`) to uncomment `FOR UPDATE` row locking statements. Test Plan: * Jenkins * `./yb_build.sh` * `--cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.RowLockConflictMatrix` * `--cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.SerializableDeleteForNoKeyUpdate` * `--cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.SerializableDeleteForUpdate` * `--cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.SerializableInsertForNoKeyUpdate` * `--cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.SerializableInsertForUpdate` * `--cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.SnapshotDeleteForNoKeyUpdate` * `--cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.SnapshotDeleteForUpdate` * `--cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.SnapshotInsertForNoKeyUpdate` * `--cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.SnapshotInsertForUpdate` Reviewers: hector, sergei, mikhail Reviewed By: mikhail Subscribers: yql, bogdan Differential Revision: https://phabricator.dev.yugabyte.com/D7453 --- src/postgres/src/backend/executor/execMain.c | 7 - .../src/backend/executor/nodeIndexscan.c | 5 +- src/postgres/src/backend/executor/ybc_fdw.c | 5 +- .../regress/expected/yb_pg_privileges.out | 19 ++- .../src/test/regress/sql/yb_pg_privileges.sql | 10 +- src/yb/common/CMakeLists.txt | 1 + src/yb/common/common.proto | 9 +- src/yb/common/pgsql_protocol.proto | 9 +- src/yb/common/row_mark.cc | 86 +++++++++++ src/yb/common/row_mark.h | 50 +++++++ src/yb/docdb/conflict_resolution.cc | 11 +- src/yb/docdb/docdb.cc | 50 +++++-- src/yb/docdb/docdb.h | 5 +- src/yb/docdb/docdb.proto | 6 +- src/yb/docdb/intent.cc | 24 ++- src/yb/docdb/intent.h | 8 +- src/yb/docdb/intent_aware_iterator.cc | 7 +- src/yb/docdb/primitive_value.cc | 4 + src/yb/docdb/value_type.h | 2 + src/yb/tablet/tablet.cc | 18 +-- src/yb/tserver/tablet_service.cc | 28 ++-- src/yb/yql/pggate/pg_doc_op.cc | 17 +-- src/yb/yql/pggate/pg_doc_op.h | 4 +- src/yb/yql/pggate/pg_session.cc | 16 +- src/yb/yql/pggate/pg_session.h | 4 +- src/yb/yql/pgwrapper/pg_mini-test.cc | 137 +++++++++++++++++- 26 files changed, 428 insertions(+), 114 deletions(-) create mode 100644 src/yb/common/row_mark.cc create mode 100644 src/yb/common/row_mark.h diff --git a/src/postgres/src/backend/executor/execMain.c b/src/postgres/src/backend/executor/execMain.c index e6a05d9f78c9..3bb0f7d7910e 100644 --- a/src/postgres/src/backend/executor/execMain.c +++ b/src/postgres/src/backend/executor/execMain.c @@ -959,13 +959,6 @@ InitPlan(QueryDesc *queryDesc, int eflags) { case ROW_MARK_EXCLUSIVE: case ROW_MARK_NOKEYEXCLUSIVE: - relation = heap_open(relid, RowShareLock); - if (IsYBRelation(relation)) - { - YBRaiseNotSupported("SELECT locking option only supported for temporary tables", - 1199); - } - break; case ROW_MARK_SHARE: case ROW_MARK_KEYSHARE: relation = heap_open(relid, RowShareLock); diff --git a/src/postgres/src/backend/executor/nodeIndexscan.c b/src/postgres/src/backend/executor/nodeIndexscan.c index 343f90f9ba6b..6d15aaf1b764 100644 --- a/src/postgres/src/backend/executor/nodeIndexscan.c +++ b/src/postgres/src/backend/executor/nodeIndexscan.c @@ -138,7 +138,10 @@ IndexNext(IndexScanState *node) ListCell *l; foreach(l, estate->es_rowMarks) { ExecRowMark *erm = (ExecRowMark *) lfirst(l); - scandesc->yb_exec_params->rowmark = erm->markType; + // Do not propogate non-row-locking row marks. + if (erm->markType != ROW_MARK_REFERENCE && + erm->markType != ROW_MARK_COPY) + scandesc->yb_exec_params->rowmark = erm->markType; break; } } diff --git a/src/postgres/src/backend/executor/ybc_fdw.c b/src/postgres/src/backend/executor/ybc_fdw.c index 3dbe15f4e39d..468d2bbb3a9b 100644 --- a/src/postgres/src/backend/executor/ybc_fdw.c +++ b/src/postgres/src/backend/executor/ybc_fdw.c @@ -275,7 +275,10 @@ ybcBeginForeignScan(ForeignScanState *node, int eflags) ListCell *l; foreach(l, estate->es_rowMarks) { ExecRowMark *erm = (ExecRowMark *) lfirst(l); - ybc_state->exec_params->rowmark = erm->markType; + // Do not propogate non-row-locking row marks. + if (erm->markType != ROW_MARK_REFERENCE && + erm->markType != ROW_MARK_COPY) + ybc_state->exec_params->rowmark = erm->markType; break; } diff --git a/src/postgres/src/test/regress/expected/yb_pg_privileges.out b/src/postgres/src/test/regress/expected/yb_pg_privileges.out index 0f94a5af1ad2..dd6be946be39 100644 --- a/src/postgres/src/test/regress/expected/yb_pg_privileges.out +++ b/src/postgres/src/test/regress/expected/yb_pg_privileges.out @@ -94,9 +94,15 @@ INSERT INTO atest1 SELECT 1, b FROM atest1; -- ok UPDATE atest1 SET a = 1 WHERE a = 2; -- ok UPDATE atest2 SET col2 = NOT col2; -- fail ERROR: permission denied for table atest2 --- TODO(jason): try uncommenting after issue #1199 is resolved. --- SELECT * FROM atest1 FOR UPDATE; -- ok --- SELECT * FROM atest2 FOR UPDATE; -- fail +SELECT * FROM atest1 FOR UPDATE; -- ok + a | b +---+----- + 1 | two + 1 | two +(2 rows) + +SELECT * FROM atest2 FOR UPDATE; -- fail +ERROR: permission denied for table atest2 DELETE FROM atest2; -- fail ERROR: permission denied for table atest2 TRUNCATE atest2; -- fail @@ -146,9 +152,10 @@ ERROR: permission denied for table atest2 UPDATE atest2 SET col2 = true FROM atest1 WHERE atest1.a = 5; -- ok ERROR: FROM clause in UPDATE not supported yet HINT: See https://github.com/YugaByte/yugabyte-db/issues/738. Click '+' on the description to raise its priority --- TODO(jason): try uncommenting after issue #1199 is resolved. --- SELECT * FROM atest1 FOR UPDATE; -- fail --- SELECT * FROM atest2 FOR UPDATE; -- fail +SELECT * FROM atest1 FOR UPDATE; -- fail +ERROR: permission denied for table atest1 +SELECT * FROM atest2 FOR UPDATE; -- fail +ERROR: permission denied for table atest2 DELETE FROM atest2; -- fail ERROR: permission denied for table atest2 TRUNCATE atest2; -- fail diff --git a/src/postgres/src/test/regress/sql/yb_pg_privileges.sql b/src/postgres/src/test/regress/sql/yb_pg_privileges.sql index adb752cd0b6b..2d7ec2bb5c71 100644 --- a/src/postgres/src/test/regress/sql/yb_pg_privileges.sql +++ b/src/postgres/src/test/regress/sql/yb_pg_privileges.sql @@ -76,9 +76,8 @@ INSERT INTO atest2 VALUES ('foo', true); -- fail INSERT INTO atest1 SELECT 1, b FROM atest1; -- ok UPDATE atest1 SET a = 1 WHERE a = 2; -- ok UPDATE atest2 SET col2 = NOT col2; -- fail --- TODO(jason): try uncommenting after issue #1199 is resolved. --- SELECT * FROM atest1 FOR UPDATE; -- ok --- SELECT * FROM atest2 FOR UPDATE; -- fail +SELECT * FROM atest1 FOR UPDATE; -- ok +SELECT * FROM atest2 FOR UPDATE; -- fail DELETE FROM atest2; -- fail TRUNCATE atest2; -- fail COPY atest2 FROM stdin; -- fail @@ -101,9 +100,8 @@ UPDATE atest1 SET a = 1 WHERE a = 2; -- fail UPDATE atest2 SET col2 = NULL; -- ok UPDATE atest2 SET col2 = NOT col2; -- fails; requires SELECT on atest2 UPDATE atest2 SET col2 = true FROM atest1 WHERE atest1.a = 5; -- ok --- TODO(jason): try uncommenting after issue #1199 is resolved. --- SELECT * FROM atest1 FOR UPDATE; -- fail --- SELECT * FROM atest2 FOR UPDATE; -- fail +SELECT * FROM atest1 FOR UPDATE; -- fail +SELECT * FROM atest2 FOR UPDATE; -- fail DELETE FROM atest2; -- fail TRUNCATE atest2; -- fail COPY atest2 FROM stdin; -- fail diff --git a/src/yb/common/CMakeLists.txt b/src/yb/common/CMakeLists.txt index 0ab5c017a84c..a8c8f1f84c7f 100644 --- a/src/yb/common/CMakeLists.txt +++ b/src/yb/common/CMakeLists.txt @@ -99,6 +99,7 @@ set(COMMON_SRCS partial_row.cc partition.cc row_key-util.cc + row_mark.cc schema.cc index.cc jsonb.cc diff --git a/src/yb/common/common.proto b/src/yb/common/common.proto index 5e72a4530e0a..5d81af2dbd49 100644 --- a/src/yb/common/common.proto +++ b/src/yb/common/common.proto @@ -368,12 +368,13 @@ enum IsolationLevel { } // This enum matches enum RowMarkType defined in src/include/nodes/plannodes.h. +// The exception is ROW_MARK_ABSENT, which signifies the absence of a row mark. enum RowMarkType { - // Obtain exclusive tuple lock. Not supported yet. + // Obtain exclusive tuple lock. ROW_MARK_EXCLUSIVE = 0; - // Obtain no-key exclusive tuple lock. Not supported yet. + // Obtain no-key exclusive tuple lock. ROW_MARK_NOKEYEXCLUSIVE = 1; // Obtain shared tuple lock. @@ -387,6 +388,10 @@ enum RowMarkType // Not supported. Used for postgres compatibility. ROW_MARK_COPY = 5; + + // Obtain no tuple lock (this should never sent be on the wire). The value + // should be high for convenient comparisons with the other row lock types. + ROW_MARK_ABSENT = 15; } enum TransactionStatus { diff --git a/src/yb/common/pgsql_protocol.proto b/src/yb/common/pgsql_protocol.proto index 73255e86f1da..d3b590c6e43d 100644 --- a/src/yb/common/pgsql_protocol.proto +++ b/src/yb/common/pgsql_protocol.proto @@ -290,13 +290,8 @@ message PgsqlReadRequestPB { // The version of the ysql system catalog this query was prepared against. optional uint64 ysql_catalog_version = 16; - // Row mark as used by postgres for row locking. As of 10/12/19 only ROW_MARK_SHARE and - // ROW_MARK_KEYSHARE are supported. - // TODO https://github.com/yugabyte/yugabyte-db/issues/2495: - // As of 10/12/19 whenever a read request has an element in this list, the whole read batch is - // assumed to need the same row locks. This is not a problem because we are not batching - // read requests, but we will need to change this behavior if we start batching requests. - repeated RowMarkType row_mark_type = 23; + // Row mark as used by postgres for row locking. + optional RowMarkType row_mark_type = 23; } //-------------------------------------------------------------------------------------------------- diff --git a/src/yb/common/row_mark.cc b/src/yb/common/row_mark.cc new file mode 100644 index 000000000000..c43c0eaab91f --- /dev/null +++ b/src/yb/common/row_mark.cc @@ -0,0 +1,86 @@ +// +// Copyright (c) YugaByte, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations +// under the License. +// + +#include "yb/common/row_mark.h" + +#include + +#include "yb/common/common.pb.h" +#include "yb/gutil/macros.h" + +namespace yb { + +bool AreConflictingRowMarkTypes( + const RowMarkType row_mark_type_a, + const RowMarkType row_mark_type_b) { + constexpr int kConflictThreshold = 4; + const unsigned int value_a = static_cast(row_mark_type_a); + const unsigned int value_b = static_cast(row_mark_type_b); + + // TODO: remove this when issue #2922 is fixed. + if ((row_mark_type_a == RowMarkType::ROW_MARK_NOKEYEXCLUSIVE && + row_mark_type_b == RowMarkType::ROW_MARK_KEYSHARE) || + (row_mark_type_a == RowMarkType::ROW_MARK_KEYSHARE && + row_mark_type_b == RowMarkType::ROW_MARK_NOKEYEXCLUSIVE)) { + return true; + } + + return (value_a + value_b < kConflictThreshold); +} + +RowMarkType GetStrongestRowMarkType(std::initializer_list row_mark_types) { + RowMarkType strongest_row_mark_type = RowMarkType::ROW_MARK_ABSENT; + for (RowMarkType row_mark_type : row_mark_types) { + strongest_row_mark_type = std::min(row_mark_type, strongest_row_mark_type); + } + return strongest_row_mark_type; +} + +bool IsValidRowMarkType(RowMarkType row_mark_type) { + switch (row_mark_type) { + case RowMarkType::ROW_MARK_EXCLUSIVE: FALLTHROUGH_INTENDED; + case RowMarkType::ROW_MARK_NOKEYEXCLUSIVE: FALLTHROUGH_INTENDED; + case RowMarkType::ROW_MARK_SHARE: FALLTHROUGH_INTENDED; + case RowMarkType::ROW_MARK_KEYSHARE: + return true; + break; + default: + return false; + break; + } +} + +std::string RowMarkTypeToPgsqlString(const RowMarkType row_mark_type) { + switch (row_mark_type) { + case RowMarkType::ROW_MARK_EXCLUSIVE: + return "UPDATE"; + break; + case RowMarkType::ROW_MARK_NOKEYEXCLUSIVE: + return "NO KEY UPDATE"; + break; + case RowMarkType::ROW_MARK_SHARE: + return "SHARE"; + break; + case RowMarkType::ROW_MARK_KEYSHARE: + return "KEY SHARE"; + break; + default: + // We shouldn't get here because other row lock types are disabled at the postgres level. + LOG(DFATAL) << "Unsupported row lock of type " << RowMarkType_Name(row_mark_type); + return ""; + break; + } +} + +} // namespace yb diff --git a/src/yb/common/row_mark.h b/src/yb/common/row_mark.h new file mode 100644 index 000000000000..5eb38cf39659 --- /dev/null +++ b/src/yb/common/row_mark.h @@ -0,0 +1,50 @@ +// +// Copyright (c) YugaByte, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations +// under the License. +// +#ifndef YB_COMMON_ROW_MARK_H +#define YB_COMMON_ROW_MARK_H + +#include + +#include "yb/common/common.pb.h" + +namespace yb { + +// Determine whether two row mark types conflict. +bool AreConflictingRowMarkTypes(RowMarkType row_mark_type_a, RowMarkType row_mark_type_b); + +template +RowMarkType GetRowMarkTypeFromPB(const PB& pb) { + if (pb.has_row_mark_type()) { + if (IsValidRowMarkType(pb.row_mark_type())) { + return pb.row_mark_type(); + } else { + // We shouldn't get here because other row lock types are disabled at the postgres level. + LOG(DFATAL) << "Unsupported row lock of type " << RowMarkType_Name(pb.row_mark_type()); + } + } + return RowMarkType::ROW_MARK_ABSENT; +} + +// Get the most restrictive row mark type from a list of row mark types. +RowMarkType GetStrongestRowMarkType(std::initializer_list row_mark_types); + +// Determine whether a row mark type is valid. +bool IsValidRowMarkType(RowMarkType row_mark_type); + +// Convert a row mark type to a string to use in a PostgreSQL query. +std::string RowMarkTypeToPgsqlString(const RowMarkType row_mark_type); + +} // namespace yb + +#endif // YB_COMMON_ROW_MARK_H diff --git a/src/yb/docdb/conflict_resolution.cc b/src/yb/docdb/conflict_resolution.cc index 65a4b3884253..679ad552c1d4 100644 --- a/src/yb/docdb/conflict_resolution.cc +++ b/src/yb/docdb/conflict_resolution.cc @@ -15,6 +15,7 @@ #include "yb/common/hybrid_time.h" #include "yb/common/pgsql_error.h" +#include "yb/common/row_mark.h" #include "yb/common/transaction.h" #include "yb/common/transaction_error.h" @@ -399,9 +400,11 @@ class TransactionConflictResolverContext : public ConflictResolverContext { boost::container::small_vector paths; KeyBytes encoded_key_buffer; + RowMarkType row_mark = GetRowMarkTypeFromPB(write_batch_); EnumerateIntentsCallback callback = std::bind( &TransactionConflictResolverContext::ProcessIntent, this, resolver, - GetStrongIntentTypeSet(metadata_.isolation, docdb::OperationKind::kWrite), _1, _3); + GetStrongIntentTypeSet(metadata_.isolation, docdb::OperationKind::kWrite, row_mark), _1, + _3); for (const auto& doc_op : doc_ops_) { paths.clear(); IsolationLevel ignored_isolation_level; @@ -429,10 +432,11 @@ class TransactionConflictResolverContext : public ConflictResolverContext { return Status::OK(); } + RowMarkType row_mark = GetRowMarkTypeFromPB(write_batch_); return EnumerateIntents( pairs, std::bind(&TransactionConflictResolverContext::ProcessIntent, this, resolver, - GetStrongIntentTypeSet(metadata_.isolation, kind), _1, _3), + GetStrongIntentTypeSet(metadata_.isolation, kind, row_mark), _1, _3), resolver->partial_range_key_intents()); } @@ -610,7 +614,8 @@ class OperationConflictResolverContext : public ConflictResolverContext { IsolationLevel isolation; RETURN_NOT_OK(doc_op->GetDocPaths(GetDocPathsMode::kIntents, &doc_paths, &isolation)); - strong_intent_types = GetStrongIntentTypeSet(isolation, OperationKind::kWrite); + strong_intent_types = GetStrongIntentTypeSet(isolation, OperationKind::kWrite, + RowMarkType::ROW_MARK_ABSENT); for (const auto& doc_path : doc_paths) { VLOG(4) << "Doc path: " << SubDocKey::DebugSliceToString(doc_path.as_slice()); diff --git a/src/yb/docdb/docdb.cc b/src/yb/docdb/docdb.cc index aa66347988b4..6864aa990baf 100644 --- a/src/yb/docdb/docdb.cc +++ b/src/yb/docdb/docdb.cc @@ -20,6 +20,7 @@ #include "yb/common/hybrid_time.h" #include "yb/common/redis_protocol.pb.h" +#include "yb/common/row_mark.h" #include "yb/common/transaction.h" #include "yb/docdb/conflict_resolution.h" @@ -140,8 +141,9 @@ struct DetermineKeysToLockResult { Result DetermineKeysToLock( const std::vector>& doc_write_ops, const google::protobuf::RepeatedPtrField& read_pairs, - IsolationLevel isolation_level, - OperationKind operation_kind, + const IsolationLevel isolation_level, + const OperationKind operation_kind, + const RowMarkType row_mark_type, bool transactional_table, PartialRangeKeyIntents partial_range_key_intents) { DetermineKeysToLockResult result; @@ -155,7 +157,8 @@ Result DetermineKeysToLock( if (isolation_level != IsolationLevel::NON_TRANSACTIONAL) { level = isolation_level; } - IntentTypeSet strong_intent_types = GetStrongIntentTypeSet(level, operation_kind); + IntentTypeSet strong_intent_types = GetStrongIntentTypeSet(level, operation_kind, + row_mark_type); if (isolation_level == IsolationLevel::SERIALIZABLE_ISOLATION && operation_kind == OperationKind::kWrite && doc_op->RequireReadSnapshot()) { @@ -249,8 +252,9 @@ Result PrepareDocWriteOperation( const std::vector>& doc_write_ops, const google::protobuf::RepeatedPtrField& read_pairs, const scoped_refptr& write_lock_latency, - IsolationLevel isolation_level, - OperationKind operation_kind, + const IsolationLevel isolation_level, + const OperationKind operation_kind, + const RowMarkType row_mark_type, bool transactional_table, CoarseTimePoint deadline, PartialRangeKeyIntents partial_range_key_intents, @@ -258,8 +262,8 @@ Result PrepareDocWriteOperation( PrepareDocWriteOperationResult result; auto determine_keys_to_lock_result = VERIFY_RESULT(DetermineKeysToLock( - doc_write_ops, read_pairs, isolation_level, operation_kind, transactional_table, - partial_range_key_intents)); + doc_write_ops, read_pairs, isolation_level, operation_kind, row_mark_type, + transactional_table, partial_range_key_intents)); if (determine_keys_to_lock_result.lock_batch.empty()) { LOG(ERROR) << "Empty lock batch, doc_write_ops: " << yb::ToString(doc_write_ops) << ", read pairs: " << yb::ToString(read_pairs); @@ -541,8 +545,12 @@ class PrepareTransactionWriteBatchHelper { intra_txn_write_id_(intra_txn_write_id) { } - void Setup(IsolationLevel isolation_level, OperationKind kind) { - strong_intent_types_ = GetStrongIntentTypeSet(isolation_level, kind); + void Setup( + IsolationLevel isolation_level, + OperationKind kind, + RowMarkType row_mark) { + row_mark_ = row_mark; + strong_intent_types_ = GetStrongIntentTypeSet(isolation_level, kind, row_mark); } // Using operator() to pass this object conveniently to EnumerateIntents. @@ -554,14 +562,19 @@ class PrepareTransactionWriteBatchHelper { const auto transaction_value_type = ValueTypeAsChar::kTransactionId; const auto write_id_value_type = ValueTypeAsChar::kWriteId; + const auto row_lock_value_type = ValueTypeAsChar::kRowLock; IntraTxnWriteId big_endian_write_id = BigEndian::FromHost32(*intra_txn_write_id_); std::array value = {{ Slice(&transaction_value_type, 1), Slice(transaction_id_.data, transaction_id_.size()), Slice(&write_id_value_type, 1), Slice(pointer_cast(&big_endian_write_id), sizeof(big_endian_write_id)), - value_slice + value_slice, }}; + // Store a row lock indicator rather than data (in value_slice) for row lock intents. + if (IsValidRowMarkType(row_mark_)) { + value.back() = Slice(&row_lock_value_type, 1); + } ++*intra_txn_write_id_; @@ -623,6 +636,7 @@ class PrepareTransactionWriteBatchHelper { // TODO(dtxn) weak & strong intent in one batch. // TODO(dtxn) extract part of code knowning about intents structure to lower level. + RowMarkType row_mark_; HybridTime hybrid_time_; rocksdb::WriteBatch* rocksdb_write_batch_; const TransactionId& transaction_id_; @@ -652,11 +666,18 @@ void PrepareTransactionWriteBatch( IntraTxnWriteId* write_id) { VLOG(4) << "PrepareTransactionWriteBatch(), write_id = " << *write_id; + RowMarkType row_mark = GetRowMarkTypeFromPB(put_batch); + PrepareTransactionWriteBatchHelper helper( hybrid_time, rocksdb_write_batch, transaction_id, write_id); if (!put_batch.write_pairs().empty()) { - helper.Setup(isolation_level, OperationKind::kWrite); + if (IsValidRowMarkType(row_mark)) { + LOG(WARNING) << "Performing a write with row lock " + << RowMarkType_Name(row_mark) + << " when only reads are expected"; + } + helper.Setup(isolation_level, OperationKind::kWrite, row_mark); // We cannot recover from failures here, because it means that we cannot apply replicated // operation. @@ -665,7 +686,7 @@ void PrepareTransactionWriteBatch( } if (!put_batch.read_pairs().empty()) { - helper.Setup(isolation_level, OperationKind::kRead); + helper.Setup(isolation_level, OperationKind::kRead, row_mark); CHECK_OK(EnumerateIntents(put_batch.read_pairs(), std::ref(helper), partial_range_key_intents)); } @@ -1308,6 +1329,11 @@ CHECKED_STATUS IntentToWriteRequest( << "Value: " << intent_iter->value().ToDebugHexString(); *write_id = stored_write_id; + // Intents for row locks should be ignored (i.e. should not be written as regular records). + if (intent_value.starts_with(ValueTypeAsChar::kRowLock)) { + return Status::OK(); + } + // After strip of prefix and suffix intent_key contains just SubDocKey w/o a hybrid time. // Time will be added when writing batch to RocksDB. std::array key_parts = {{ diff --git a/src/yb/docdb/docdb.h b/src/yb/docdb/docdb.h index 4c30cabbf170..cbd57c2330ce 100644 --- a/src/yb/docdb/docdb.h +++ b/src/yb/docdb/docdb.h @@ -115,8 +115,9 @@ Result PrepareDocWriteOperation( const std::vector>& doc_write_ops, const google::protobuf::RepeatedPtrField& read_pairs, const scoped_refptr& write_lock_latency, - IsolationLevel isolation_level, - OperationKind operation_kind, + const IsolationLevel isolation_level, + const OperationKind operation_kind, + const RowMarkType row_mark_type, bool transactional_table, CoarseTimePoint deadline, PartialRangeKeyIntents partial_range_key_intents, diff --git a/src/yb/docdb/docdb.proto b/src/yb/docdb/docdb.proto index 2491542f09da..7eda744695c9 100644 --- a/src/yb/docdb/docdb.proto +++ b/src/yb/docdb/docdb.proto @@ -29,12 +29,10 @@ message KeyValueWriteBatchPB { repeated KeyValuePairPB write_pairs = 1; optional TransactionMetadataPB transaction = 2; optional bool DEPRECATED_may_have_metadata = 3; - // Used by serializable isolation, to store read intents. + // Used by serializable isolation transactions and row locking statements to store read intents. // In case of read-modify-write operation both read_pairs and write_pairs could present. repeated KeyValuePairPB read_pairs = 5; - // Currently only used when we create read intents for read requests with ROW_MARK_SHARE or - // ROW_MARK_KEYSHARE row locks. When this list is non-empty, we acquire a strong read lock. - repeated RowMarkType row_mark_type = 6; + optional RowMarkType row_mark_type = 6; } message ConsensusFrontierPB { diff --git a/src/yb/docdb/intent.cc b/src/yb/docdb/intent.cc index fb1918f309e0..89a2e7e2bff0 100644 --- a/src/yb/docdb/intent.cc +++ b/src/yb/docdb/intent.cc @@ -17,6 +17,7 @@ #include +#include "yb/common/row_mark.h" #include "yb/common/transaction.h" #include "yb/docdb/value_type.h" @@ -80,7 +81,28 @@ IntentTypeSet AllStrongIntents() { return IntentTypeSet({IntentType::kStrongRead, IntentType::kStrongWrite}); } -IntentTypeSet GetStrongIntentTypeSet(IsolationLevel level, OperationKind operation_kind) { +IntentTypeSet GetStrongIntentTypeSet( + IsolationLevel level, + OperationKind operation_kind, + RowMarkType row_mark) { + if (IsValidRowMarkType(row_mark)) { + // TODO: possibly adjust this when issue #2922 is fixed. + switch (row_mark) { + case RowMarkType::ROW_MARK_EXCLUSIVE: FALLTHROUGH_INTENDED; + case RowMarkType::ROW_MARK_NOKEYEXCLUSIVE: + return IntentTypeSet({IntentType::kStrongRead, IntentType::kStrongWrite}); + break; + case RowMarkType::ROW_MARK_SHARE: FALLTHROUGH_INTENDED; + case RowMarkType::ROW_MARK_KEYSHARE: + return IntentTypeSet({IntentType::kStrongRead}); + break; + default: + // We shouldn't get here because other row lock types are disabled at the postgres level. + LOG(DFATAL) << "Unsupported row lock of type " << RowMarkType_Name(row_mark); + break; + } + } + switch (level) { case IsolationLevel::SNAPSHOT_ISOLATION: return IntentTypeSet({IntentType::kStrongRead, IntentType::kStrongWrite}); diff --git a/src/yb/docdb/intent.h b/src/yb/docdb/intent.h index 767637f07d85..2b59360d116e 100644 --- a/src/yb/docdb/intent.h +++ b/src/yb/docdb/intent.h @@ -31,6 +31,11 @@ struct DecodedIntentKey { // Decodes intent RocksDB key. Result DecodeIntentKey(const Slice &encoded_intent_key); +// Decode intent RocksDB value. +// encoded_intent_value - input intent value to decode. +// transaction_id_slice - input transaction id (to double-check with transaction id in value). +// write_id - output write id. +// body - output the rest of the data after write id. CHECKED_STATUS DecodeIntentValue( const Slice& encoded_intent_value, const Slice& transaction_id_slice, IntraTxnWriteId* write_id, Slice* body); @@ -48,7 +53,8 @@ YB_DEFINE_ENUM(IntentStrength, (kWeak)(kStrong)); YB_DEFINE_ENUM(OperationKind, (kRead)(kWrite)); -IntentTypeSet GetStrongIntentTypeSet(IsolationLevel level, OperationKind operation_kind); +IntentTypeSet GetStrongIntentTypeSet( + IsolationLevel level, OperationKind operation_kind, RowMarkType row_mark); inline IntentTypeSet StrongToWeak(IntentTypeSet inp) { IntentTypeSet result(inp.ToUIntPtr() >> kStrongIntentFlag); diff --git a/src/yb/docdb/intent_aware_iterator.cc b/src/yb/docdb/intent_aware_iterator.cc index ef05c18e23eb..a15fdb1c3029 100644 --- a/src/yb/docdb/intent_aware_iterator.cc +++ b/src/yb/docdb/intent_aware_iterator.cc @@ -166,7 +166,8 @@ std::ostream& operator<<(std::ostream& out, const DecodeStrongWriteIntentResult& } // Decodes intent based on intent_iterator and its transaction commit time if intent is a strong -// write intent and transaction is already committed at specified time or it is current transaction. +// write intent, intent is not for row locking, and transaction is already committed at specified +// time or is current transaction. // Returns HybridTime::kMin as value_time otherwise. // For current transaction returns intent record hybrid time as value_time. // Consumes intent from value_slice leaving only value itself. @@ -189,7 +190,9 @@ Result DecodeStrongWriteIntent( result.intent_value.consume_byte(); IntraTxnWriteId in_txn_write_id = BigEndian::Load32(result.intent_value.data()); result.intent_value.remove_prefix(sizeof(IntraTxnWriteId)); - if (result.same_transaction) { + if (result.intent_value.starts_with(ValueTypeAsChar::kRowLock)) { + result.value_time = DocHybridTime::kMin; + } else if (result.same_transaction) { result.value_time = decoded_intent_key.doc_ht; } else { auto commit_ht = VERIFY_RESULT(transaction_status_cache->GetCommitTime(txn_id)); diff --git a/src/yb/docdb/primitive_value.cc b/src/yb/docdb/primitive_value.cc index 75d726b229a1..0d7b9846bcba 100644 --- a/src/yb/docdb/primitive_value.cc +++ b/src/yb/docdb/primitive_value.cc @@ -55,6 +55,7 @@ using yb::util::DecodeDoubleFromKey; #define IGNORE_NON_PRIMITIVE_VALUE_TYPES_IN_SWITCH \ case ValueType::kArray: FALLTHROUGH_INTENDED; \ case ValueType::kMergeFlags: FALLTHROUGH_INTENDED; \ + case ValueType::kRowLock: FALLTHROUGH_INTENDED; \ case ValueType::kGroupEnd: FALLTHROUGH_INTENDED; \ case ValueType::kGroupEndDescending: FALLTHROUGH_INTENDED; \ case ValueType::kInvalid: FALLTHROUGH_INTENDED; \ @@ -219,6 +220,7 @@ string PrimitiveValue::ToString() const { case ValueType::kObsoleteIntentType: return Format("Intent($0)", uint16_val_); case ValueType::kMergeFlags: FALLTHROUGH_INTENDED; + case ValueType::kRowLock: FALLTHROUGH_INTENDED; case ValueType::kGroupEnd: FALLTHROUGH_INTENDED; case ValueType::kGroupEndDescending: FALLTHROUGH_INTENDED; case ValueType::kTtl: FALLTHROUGH_INTENDED; @@ -544,6 +546,7 @@ string PrimitiveValue::ToValue() const { case ValueType::kObsoleteIntentTypeSet: FALLTHROUGH_INTENDED; case ValueType::kObsoleteIntentType: FALLTHROUGH_INTENDED; case ValueType::kMergeFlags: FALLTHROUGH_INTENDED; + case ValueType::kRowLock: FALLTHROUGH_INTENDED; case ValueType::kGroupEnd: FALLTHROUGH_INTENDED; case ValueType::kGroupEndDescending: FALLTHROUGH_INTENDED; case ValueType::kObsoleteIntentPrefix: FALLTHROUGH_INTENDED; @@ -1127,6 +1130,7 @@ Status PrimitiveValue::DecodeFromValue(const rocksdb::Slice& rocksdb_slice) { case ValueType::kUInt16Hash: FALLTHROUGH_INTENDED; case ValueType::kInvalid: FALLTHROUGH_INTENDED; case ValueType::kMergeFlags: FALLTHROUGH_INTENDED; + case ValueType::kRowLock: FALLTHROUGH_INTENDED; case ValueType::kTtl: FALLTHROUGH_INTENDED; case ValueType::kUserTimestamp: FALLTHROUGH_INTENDED; case ValueType::kColumnId: FALLTHROUGH_INTENDED; diff --git a/src/yb/docdb/value_type.h b/src/yb/docdb/value_type.h index 874ca3354ab5..472489035d6c 100644 --- a/src/yb/docdb/value_type.h +++ b/src/yb/docdb/value_type.h @@ -116,6 +116,8 @@ namespace docdb { \ /* Flag type for merge record flags */ \ ((kMergeFlags, 'k')) /* ASCII code 107 */ \ + /* Indicator for whether an intent is for a row lock. */ \ + ((kRowLock, 'l')) /* ASCII code 108 */ \ /* Timestamp value in microseconds */ \ ((kTimestamp, 's')) /* ASCII code 115 */ \ /* TTL value in milliseconds, optionally present at the start of a value. */ \ diff --git a/src/yb/tablet/tablet.cc b/src/yb/tablet/tablet.cc index c6c83d6e0c99..a10fa3c90c91 100644 --- a/src/yb/tablet/tablet.cc +++ b/src/yb/tablet/tablet.cc @@ -61,10 +61,11 @@ #include "yb/common/common.pb.h" #include "yb/common/hybrid_time.h" -#include "yb/common/schema.h" #include "yb/common/ql_protocol.pb.h" #include "yb/common/ql_rowblock.h" #include "yb/common/pgsql_error.h" +#include "yb/common/row_mark.h" +#include "yb/common/schema.h" #include "yb/consensus/consensus.h" #include "yb/consensus/consensus.pb.h" @@ -833,14 +834,6 @@ Status Tablet::PrepareTransactionWriteBatch( } auto isolation_level = metadata_with_write_id->first.isolation; - if (put_batch.row_mark_type_size() > 0) { - // We used this as a shorthand to acquire the right locks for this operation. This doesn't - // change the isolation level of the current transaction. - // TODO https://github.com/yugabyte/yugabyte-db/issues/2496: - // Use a new method to acquire the right locks. This would avoid any confusion. - isolation_level = IsolationLevel::SERIALIZABLE_ISOLATION; - } - auto write_id = metadata_with_write_id->second; yb::docdb::PrepareTransactionWriteBatch( put_batch, hybrid_time, rocksdb_write_batch, transaction_id, isolation_level, @@ -1889,15 +1882,16 @@ Status Tablet::TEST_SwitchMemtable() { Status Tablet::StartDocWriteOperation(WriteOperation* operation) { auto write_batch = operation->request()->mutable_write_batch(); - auto isolation_level = VERIFY_RESULT(GetIsolationLevelFromPB(*write_batch)); + const IsolationLevel isolation_level = VERIFY_RESULT(GetIsolationLevelFromPB(*write_batch)); + const RowMarkType row_mark_type = GetRowMarkTypeFromPB(*write_batch); const bool transactional_table = metadata_->schema().table_properties().is_transactional(); const auto partial_range_key_intents = UsePartialRangeKeyIntents(metadata_.get()); auto prepare_result = VERIFY_RESULT(docdb::PrepareDocWriteOperation( operation->doc_ops(), write_batch->read_pairs(), metrics_->write_lock_latency, - isolation_level, operation->state()->kind(), transactional_table, operation->deadline(), - partial_range_key_intents, &shared_lock_manager_)); + isolation_level, operation->state()->kind(), row_mark_type, transactional_table, + operation->deadline(), partial_range_key_intents, &shared_lock_manager_)); RequestScope request_scope; if (transaction_participant_) { diff --git a/src/yb/tserver/tablet_service.cc b/src/yb/tserver/tablet_service.cc index f17246d2ea6d..40274db0fe37 100644 --- a/src/yb/tserver/tablet_service.cc +++ b/src/yb/tserver/tablet_service.cc @@ -37,8 +37,9 @@ #include #include -#include "yb/common/schema.h" #include "yb/common/ql_value.h" +#include "yb/common/row_mark.h" +#include "yb/common/schema.h" #include "yb/common/wire_protocol.h" #include "yb/consensus/leader_lease.h" #include "yb/consensus/raft_consensus.h" @@ -1221,10 +1222,12 @@ void TabletServiceImpl::Read(const ReadRequestPB* req, #endif } - bool has_row_lock = false; - // For postgres requests check that the syscatalog version matches. + // Get the most restrictive row mark present in the batch of PostgreSQL requests. + // TODO: rather handle individual row marks once we start batching read requests (issue #2495) + RowMarkType batch_row_mark = RowMarkType::ROW_MARK_ABSENT; if (!req->pgsql_batch().empty()) { for (const auto& pg_req : req->pgsql_batch()) { + // For postgres requests check that the syscatalog version matches. if (pg_req.has_ysql_catalog_version() && pg_req.ysql_catalog_version() < server_->ysql_catalog_version()) { SetupErrorAndRespond(resp->mutable_error(), @@ -1233,24 +1236,24 @@ void TabletServiceImpl::Read(const ReadRequestPB* req, TabletServerErrorPB::MISMATCHED_SCHEMA, &context); return; } - if (pg_req.row_mark_type_size() > 0 && - (pg_req.row_mark_type(0) == RowMarkType::ROW_MARK_SHARE || - pg_req.row_mark_type(0) == RowMarkType::ROW_MARK_KEYSHARE)) { + RowMarkType current_row_mark = GetRowMarkTypeFromPB(pg_req); + if (IsValidRowMarkType(current_row_mark)) { if (!req->has_transaction()) { SetupErrorAndRespond(resp->mutable_error(), STATUS(NotSupported, "Read request with row mark types must be part of a transaction"), TabletServerErrorPB::OPERATION_NOT_SUPPORTED, &context); } - has_row_lock = true; + batch_row_mark = GetStrongestRowMarkType({current_row_mark, batch_row_mark}); } } } + const bool has_row_mark = IsValidRowMarkType(batch_row_mark); LeaderTabletPeer leader_peer; ReadContext read_context = {req, resp, &context}; - if (serializable_isolation || has_row_lock) { + if (serializable_isolation || has_row_mark) { // At this point we expect that we don't have pure read serializable transactions, and // always write read intents to detect conflicts with other writes. leader_peer = LookupLeaderTabletOrRespond( @@ -1319,14 +1322,11 @@ void TabletServiceImpl::Read(const ReadRequestPB* req, host_port_pb.set_port(remote_address.port()); read_context.host_port_pb = &host_port_pb; - if (serializable_isolation || has_row_lock) { + if (serializable_isolation || has_row_mark) { WriteRequestPB write_req; *write_req.mutable_write_batch()->mutable_transaction() = req->transaction(); - if (has_row_lock) { - // This will have to be modified once we support more row locks. - // See https://github.com/yugabyte/yugabyte-db/issues/1199 and - // https://github.com/yugabyte/yugabyte-db/issues/2496. - write_req.mutable_write_batch()->add_row_mark_type(RowMarkType::ROW_MARK_SHARE); + if (has_row_mark) { + write_req.mutable_write_batch()->set_row_mark_type(batch_row_mark); read_context.read_time.ToPB(write_req.mutable_read_time()); } write_req.set_tablet_id(req->tablet_id()); diff --git a/src/yb/yql/pggate/pg_doc_op.cc b/src/yb/yql/pggate/pg_doc_op.cc index 48f607bc6f7c..a0b0c4e93754 100644 --- a/src/yb/yql/pggate/pg_doc_op.cc +++ b/src/yb/yql/pggate/pg_doc_op.cc @@ -225,24 +225,21 @@ void PgDocReadOp::SetRequestPrefetchLimit() { req->set_limit(limit_count); } -void PgDocReadOp::SetRowMarks() { - if (exec_params_.rowmark < 0) { - return; - } - PgsqlReadRequestPB *req = read_op_->mutable_request(); +void PgDocReadOp::SetRowMark() { + PgsqlReadRequestPB *const req = read_op_->mutable_request(); - // We only support one type of row lock at a time. - if (req->row_mark_type_size() > 0) { - return; + if (exec_params_.rowmark < 0) { + req->clear_row_mark_type(); + } else { + req->set_row_mark_type(static_cast(exec_params_.rowmark)); } - req->add_row_mark_type(static_cast(exec_params_.rowmark)); } Status PgDocReadOp::SendRequestUnlocked() { CHECK(!waiting_for_response_); SetRequestPrefetchLimit(); - SetRowMarks(); + SetRowMark(); SCHECK_EQ(VERIFY_RESULT(pg_session_->PgApplyAsync(read_op_, &read_time_)), OpBuffered::kFalse, IllegalState, "YSQL read operation should not be buffered"); diff --git a/src/yb/yql/pggate/pg_doc_op.h b/src/yb/yql/pggate/pg_doc_op.h index 46dcbe0a42bd..accec535ff3e 100644 --- a/src/yb/yql/pggate/pg_doc_op.h +++ b/src/yb/yql/pggate/pg_doc_op.h @@ -147,8 +147,8 @@ class PgDocReadOp : public PgDocOp { // Analyze options and pick the appropriate prefetch limit. void SetRequestPrefetchLimit(); - // Add a row_mark_type element. For now we only support one. - void SetRowMarks(); + // Set the row_mark_type field of our read request based on our exec control parameter. + void SetRowMark(); // Operator. std::shared_ptr read_op_; diff --git a/src/yb/yql/pggate/pg_session.cc b/src/yb/yql/pggate/pg_session.cc index 53d89d77d1dc..1d343a998dd1 100644 --- a/src/yb/yql/pggate/pg_session.cc +++ b/src/yb/yql/pggate/pg_session.cc @@ -31,6 +31,7 @@ #include "yb/common/pgsql_error.h" #include "yb/common/ql_protocol_util.h" +#include "yb/common/row_mark.h" #include "yb/tserver/tserver_shared_mem.h" @@ -533,16 +534,7 @@ Result PgSession::PgApplyAsync(const std::shared_ptrtype() == YBOperation::Type::PGSQL_READ) { const PgsqlReadRequestPB& read_req = down_cast(op.get())->request(); - if (read_req.row_mark_type_size() > 0) { - if (read_req.row_mark_type(0) == RowMarkType::ROW_MARK_SHARE || - read_req.row_mark_type(0) == RowMarkType::ROW_MARK_KEYSHARE) { - has_for_share_lock_ = true; - } else { - // We shouldn't get here because other row lock types are disabled at the postgres level. - LOG(WARNING) << "Unsupported row lock of type " - << RowMarkType_Name(read_req.row_mark_type(0)); - } - } + has_row_mark_ = IsValidRowMarkType(GetRowMarkTypeFromPB(read_req)); } // If the operation is a write op and we are in buffered write mode, save the op and return false @@ -589,7 +581,7 @@ Status PgSession::PgFlushAsync(StatusFunctor callback) { << ": has_txn_ops_=" << has_txn_ops_ << ", has_non_txn_ops_=" << has_non_txn_ops_; has_txn_ops_ = false; has_non_txn_ops_ = false; - has_for_share_lock_ = false; + has_row_mark_ = false; // We specify read_only_op true here because we never start a new write transaction at this point. client::YBSessionPtr session = @@ -606,7 +598,7 @@ Status PgSession::RestartTransaction() { Result PgSession::GetSessionForOp( const std::shared_ptr& op) { - return GetSession(op->IsTransactional(), op->read_only() && !has_for_share_lock_); + return GetSession(op->IsTransactional(), op->read_only() && !has_row_mark_); } namespace { diff --git a/src/yb/yql/pggate/pg_session.h b/src/yb/yql/pggate/pg_session.h index 3bc7eb301d45..f015b58ea387 100644 --- a/src/yb/yql/pggate/pg_session.h +++ b/src/yb/yql/pggate/pg_session.h @@ -256,8 +256,8 @@ class PgSession : public RefCountedThreadSafe { bool has_txn_ops_ = false; bool has_non_txn_ops_ = false; - // True if the read request has a FOR SHARE or FOR KEY SHARE lock. - bool has_for_share_lock_ = false; + // True if the read request has a row mark. + bool has_row_mark_ = false; // Local tablet-server shared memory segment handle. This has a value of nullptr // if the shared memory has not been initialized (e.g. during initdb). diff --git a/src/yb/yql/pgwrapper/pg_mini-test.cc b/src/yb/yql/pgwrapper/pg_mini-test.cc index 4eeae46b3474..aa0c0b6ce62a 100644 --- a/src/yb/yql/pgwrapper/pg_mini-test.cc +++ b/src/yb/yql/pgwrapper/pg_mini-test.cc @@ -23,6 +23,7 @@ #include "yb/yql/pgwrapper/pg_wrapper.h" #include "yb/common/pgsql_error.h" +#include "yb/common/row_mark.h" #include "yb/util/random_util.h" using namespace std::literals; @@ -102,14 +103,21 @@ class PgMiniTest : public YBMiniClusterTestBase { // Run interleaved INSERT, SELECT with specified isolation level and row mark. Possible isolation // levels are SNAPSHOT_ISOLATION and SERIALIZABLE_ISOLATION. Possible row marks are - // ROW_MARK_KEYSHARE and ROW_MARK_SHARE. + // ROW_MARK_KEYSHARE, ROW_MARK_SHARE, ROW_MARK_NOKEYEXCLUSIVE, and ROW_MARK_EXCLUSIVE. void TestInsertSelectRowLock(IsolationLevel isolation, RowMarkType row_mark); // Run interleaved DELETE, SELECT with specified isolation level and row mark. Possible isolation // levels are SNAPSHOT_ISOLATION and SERIALIZABLE_ISOLATION. Possible row marks are - // ROW_MARK_KEYSHARE and ROW_MARK_SHARE. + // ROW_MARK_KEYSHARE, ROW_MARK_SHARE, ROW_MARK_NOKEYEXCLUSIVE, and ROW_MARK_EXCLUSIVE. void TestDeleteSelectRowLock(IsolationLevel isolation, RowMarkType row_mark); + // Test the row lock conflict matrix across a grid of the following parameters + // * 4 row marks for session A + // * 4 row marks for session B + // * 2 isolation levels + // This totals 4 x 4 x 2 = 32 situations. + void TestRowLockConflictMatrix(); + private: std::unique_ptr pg_supervisor_; HostPort pg_host_port_; @@ -260,8 +268,7 @@ TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(ReadRestart)) { void PgMiniTest::TestInsertSelectRowLock(IsolationLevel isolation, RowMarkType row_mark) { const std::string isolation_str = ( isolation == IsolationLevel::SNAPSHOT_ISOLATION ? "REPEATABLE READ" : "SERIALIZABLE"); - const std::string row_mark_str = ( - row_mark == RowMarkType::ROW_MARK_KEYSHARE ? "KEY SHARE" : "SHARE"); + const std::string row_mark_str = RowMarkTypeToPgsqlString(row_mark); constexpr auto kSleepTime = 1s; constexpr int kKeys = 3; PGConn read_conn = ASSERT_RESULT(Connect()); @@ -290,6 +297,7 @@ void PgMiniTest::TestInsertSelectRowLock(IsolationLevel isolation, RowMarkType r ASSERT_OK(read_conn.Execute("ABORT")); } else { ASSERT_OK(result); + // NOTE: vanilla PostgreSQL expects kKeys rows, but kKeys + 1 rows are expected for Yugabyte. ASSERT_EQ(PQntuples(result.get().get()), kKeys + 1); ASSERT_OK(read_conn.Execute("COMMIT")); } @@ -298,8 +306,7 @@ void PgMiniTest::TestInsertSelectRowLock(IsolationLevel isolation, RowMarkType r void PgMiniTest::TestDeleteSelectRowLock(IsolationLevel isolation, RowMarkType row_mark) { const std::string isolation_str = ( isolation == IsolationLevel::SNAPSHOT_ISOLATION ? "REPEATABLE READ" : "SERIALIZABLE"); - const std::string row_mark_str = ( - row_mark == RowMarkType::ROW_MARK_KEYSHARE ? "KEY SHARE" : "SHARE"); + const std::string row_mark_str = RowMarkTypeToPgsqlString(row_mark); constexpr auto kSleepTime = 1s; constexpr int kKeys = 3; PGConn read_conn = ASSERT_RESULT(Connect()); @@ -326,13 +333,31 @@ void PgMiniTest::TestDeleteSelectRowLock(IsolationLevel isolation, RowMarkType r ASSERT_STR_CONTAINS(result.status().ToString(), "Value write after transaction start"); ASSERT_OK(read_conn.Execute("ABORT")); } else { - // TODO: either this or the DELETE above should be ASSERT_NOK (related to issue #2831). ASSERT_OK(result); + // NOTE: vanilla PostgreSQL expects kKeys rows, but kKeys - 1 rows are expected for Yugabyte. ASSERT_EQ(PQntuples(result.get().get()), kKeys - 1); ASSERT_OK(read_conn.Execute("COMMIT")); } } +TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SnapshotInsertForUpdate)) { + TestInsertSelectRowLock(IsolationLevel::SNAPSHOT_ISOLATION, RowMarkType::ROW_MARK_EXCLUSIVE); +} + +TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SerializableInsertForUpdate)) { + TestInsertSelectRowLock(IsolationLevel::SERIALIZABLE_ISOLATION, RowMarkType::ROW_MARK_EXCLUSIVE); +} + +TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SnapshotInsertForNoKeyUpdate)) { + TestInsertSelectRowLock(IsolationLevel::SNAPSHOT_ISOLATION, + RowMarkType::ROW_MARK_NOKEYEXCLUSIVE); +} + +TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SerializableInsertForNoKeyUpdate)) { + TestInsertSelectRowLock(IsolationLevel::SERIALIZABLE_ISOLATION, + RowMarkType::ROW_MARK_NOKEYEXCLUSIVE); +} + TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SnapshotInsertForShare)) { TestInsertSelectRowLock(IsolationLevel::SNAPSHOT_ISOLATION, RowMarkType::ROW_MARK_SHARE); } @@ -349,6 +374,24 @@ TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SerializableInsertForKeyShare)) TestInsertSelectRowLock(IsolationLevel::SERIALIZABLE_ISOLATION, RowMarkType::ROW_MARK_KEYSHARE); } +TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SnapshotDeleteForUpdate)) { + TestDeleteSelectRowLock(IsolationLevel::SNAPSHOT_ISOLATION, RowMarkType::ROW_MARK_EXCLUSIVE); +} + +TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SerializableDeleteForUpdate)) { + TestDeleteSelectRowLock(IsolationLevel::SERIALIZABLE_ISOLATION, RowMarkType::ROW_MARK_EXCLUSIVE); +} + +TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SnapshotDeleteForNoKeyUpdate)) { + TestDeleteSelectRowLock(IsolationLevel::SNAPSHOT_ISOLATION, + RowMarkType::ROW_MARK_NOKEYEXCLUSIVE); +} + +TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SerializableDeleteForNoKeyUpdate)) { + TestDeleteSelectRowLock(IsolationLevel::SERIALIZABLE_ISOLATION, + RowMarkType::ROW_MARK_NOKEYEXCLUSIVE); +} + TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SnapshotDeleteForShare)) { TestDeleteSelectRowLock(IsolationLevel::SNAPSHOT_ISOLATION, RowMarkType::ROW_MARK_SHARE); } @@ -365,6 +408,86 @@ TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SerializableDeleteForKeyShare)) TestDeleteSelectRowLock(IsolationLevel::SERIALIZABLE_ISOLATION, RowMarkType::ROW_MARK_KEYSHARE); } +void PgMiniTest::TestRowLockConflictMatrix() { + constexpr auto kSleepTime = 1s; + constexpr int kKeys = 3; + constexpr int kNumIsolationLevels = 2; + constexpr int kNumRowMarkTypes = 4; + PGConn conn_a = ASSERT_RESULT(Connect()); + PGConn conn_b = ASSERT_RESULT(Connect()); + PGConn conn_misc = ASSERT_RESULT(Connect()); + + const std::array row_mark_types = {{ + RowMarkType::ROW_MARK_EXCLUSIVE, + RowMarkType::ROW_MARK_NOKEYEXCLUSIVE, + RowMarkType::ROW_MARK_SHARE, + RowMarkType::ROW_MARK_KEYSHARE, + }}; + const std::array isolation_strs = {{ + "REPEATABLE READ", + "SERIALIZABLE", + }}; + + // Set up table + ASSERT_OK(conn_misc.Execute("CREATE TABLE t (i INT PRIMARY KEY, j INT)")); + // TODO: remove this sleep when issue #2857 is fixed. + std::this_thread::sleep_for(kSleepTime); + for (int i = 0; i < kKeys; ++i) { + ASSERT_OK(conn_misc.ExecuteFormat("INSERT INTO t (i, j) VALUES ($0, $0)", i)); + } + + for (const auto& row_mark_type_a : row_mark_types) { + for (const auto& row_mark_type_b : row_mark_types) { + for (const auto& isolation_str : isolation_strs) { + const std::string row_mark_str_a = RowMarkTypeToPgsqlString(row_mark_type_a); + const std::string row_mark_str_b = RowMarkTypeToPgsqlString(row_mark_type_b); + LOG(INFO) << "Testing " << row_mark_str_a << " vs " << row_mark_str_b << " with " + << isolation_str << " isolation transactions."; + + ASSERT_OK(conn_a.ExecuteFormat("BEGIN TRANSACTION ISOLATION LEVEL $0", isolation_str)); + ASSERT_RESULT(conn_a.Fetch("SELECT '(setting read point)'")); + ASSERT_OK(conn_b.ExecuteFormat("BEGIN TRANSACTION ISOLATION LEVEL $0", isolation_str)); + ASSERT_RESULT(conn_b.Fetch("SELECT '(setting read point)'")); + ASSERT_RESULT(conn_a.FetchFormat("SELECT * FROM t FOR $0", row_mark_str_a)); + Result result_select = conn_b.FetchFormat("SELECT * FROM t FOR $0", + row_mark_str_b); + // TODO: remove this sleep when issue #2910 is fixed. + std::this_thread::sleep_for(kSleepTime); + Status status_commit = conn_a.Execute("COMMIT"); + ASSERT_OK(conn_b.Execute("COMMIT")); + if (AreConflictingRowMarkTypes(row_mark_type_a, row_mark_type_b)) { + // There should be a conflict. + if (result_select.ok()) { + // Should conflict on COMMIT only. + ASSERT_NOK(status_commit); + ASSERT_TRUE(status_commit.IsNetworkError()) << status_commit; + ASSERT_EQ(PgsqlError(status_commit), YBPgErrorCode::YB_PG_T_R_SERIALIZATION_FAILURE) + << status_commit; + ASSERT_STR_CONTAINS(status_commit.ToString(), "Transaction expired: 25P02"); + } else { + // Should conflict on SELECT only. + ASSERT_OK(status_commit); + ASSERT_TRUE(result_select.status().IsNetworkError()) << result_select.status(); + ASSERT_EQ(PgsqlError(result_select.status()), + YBPgErrorCode::YB_PG_T_R_SERIALIZATION_FAILURE) + << result_select.status(); + ASSERT_STR_CONTAINS(result_select.status().ToString(), + "Conflicts with higher priority transaction"); + } + } else { + // There should not be a conflict. + ASSERT_OK(result_select); + ASSERT_OK(status_commit); + } + } + } + } +} + +TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(RowLockConflictMatrix)) { + TestRowLockConflictMatrix(); +} + TEST_F(PgMiniTest, YB_DISABLE_TEST_IN_SANITIZERS(SerializableReadOnly)) { PGConn read_conn = ASSERT_RESULT(Connect()); PGConn setup_conn = ASSERT_RESULT(Connect());