From 7baa4f28e2e649e73790c319fb7dfbfb05d87bf5 Mon Sep 17 00:00:00 2001
From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>
Date: Tue, 19 Nov 2024 11:13:33 +0200
Subject: [PATCH] Handle MySQL handler error codes (#17252)

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>
---
 go/mysql/sqlerror/constants.go                | 190 ++++++++++++++++++
 go/mysql/sqlerror/sql_error.go                |  12 ++
 go/mysql/sqlerror/sql_error_test.go           |  21 ++
 .../tabletmanager/vreplication/utils.go       |  18 ++
 .../tabletmanager/vreplication/utils_test.go  |  10 +
 5 files changed, 251 insertions(+)

diff --git a/go/mysql/sqlerror/constants.go b/go/mysql/sqlerror/constants.go
index da2351bf00d..5fedf567998 100644
--- a/go/mysql/sqlerror/constants.go
+++ b/go/mysql/sqlerror/constants.go
@@ -203,6 +203,7 @@ const (
 	ERBlobKeyWithoutLength         = ErrorCode(1170)
 	ERPrimaryCantHaveNull          = ErrorCode(1171)
 	ERTooManyRows                  = ErrorCode(1172)
+	ERErrorDuringCommit            = ErrorCode(1180)
 	ERLockOrActiveTransaction      = ErrorCode(1192)
 	ERUnknownSystemVariable        = ErrorCode(1193)
 	ERSetConstantsOnly             = ErrorCode(1204)
@@ -301,6 +302,195 @@ const (
 	ERServerIsntAvailable = ErrorCode(3168)
 )
 
+// HandlerErrorCode is for errors thrown by the handler, and which are then embedded in other errors.
+// See https://github.com/mysql/mysql-server/blob/trunk/include/my_base.h
+type HandlerErrorCode uint16
+
+func (e HandlerErrorCode) ToString() string {
+	return strconv.FormatUint(uint64(e), 10)
+}
+
+const (
+	// Didn't find key on read or update
+	HaErrKeyNotFound = HandlerErrorCode(120)
+	// Duplicate key on write
+	HaErrFoundDuppKey = HandlerErrorCode(121)
+	// Internal error
+	HaErrInternalError = HandlerErrorCode(122)
+	// Uppdate with is recoverable
+	HaErrRecordChanged = HandlerErrorCode(123)
+	// Wrong index given to function
+	HaErrWrongIndex = HandlerErrorCode(124)
+	// Transaction has been rolled back
+	HaErrRolledBack = HandlerErrorCode(125)
+	// Indexfile is crashed
+	HaErrCrashed = HandlerErrorCode(126)
+	// Record-file is crashed
+	HaErrWrongInRecord = HandlerErrorCode(127)
+	// Record-file is crashed
+	HaErrOutOfMem = HandlerErrorCode(128)
+	// not a MYI file - no signature
+	HaErrNotATable = HandlerErrorCode(130)
+	// Command not supported
+	HaErrWrongCommand = HandlerErrorCode(131)
+	// old database file
+	HaErrOldFile = HandlerErrorCode(132)
+	// No record read in update()
+	HaErrNoActiveRecord = HandlerErrorCode(133)
+	// A record is not there
+	HaErrRecordDeleted = HandlerErrorCode(134)
+	// No more room in file
+	HaErrRecordFileFull = HandlerErrorCode(135)
+	// No more room in file
+	HaErrIndexFileFull = HandlerErrorCode(136)
+	// end in next/prev/first/last
+	HaErrEndOfFile = HandlerErrorCode(137)
+	// unsupported extension used
+	HaErrUnsupported = HandlerErrorCode(138)
+	// Too big row
+	HaErrTooBigRow = HandlerErrorCode(139)
+	// Wrong create option
+	HaWrongCreateOption = HandlerErrorCode(140)
+	// Duplicate unique on write
+	HaErrFoundDuppUnique = HandlerErrorCode(141)
+	// Can't open charset
+	HaErrUnknownCharset = HandlerErrorCode(142)
+	// conflicting tables in MERGE
+	HaErrWrongMrgTableDef = HandlerErrorCode(143)
+	// Last (automatic?) repair failed
+	HaErrCrashedOnRepair = HandlerErrorCode(144)
+	// Table must be repaired
+	HaErrCrashedOnUsage = HandlerErrorCode(145)
+	// Lock wait timeout
+	HaErrLockWaitTimeout = HandlerErrorCode(146)
+	// Lock table is full
+	HaErrLockTableFull = HandlerErrorCode(147)
+	// Updates not allowed
+	HaErrReadOnlyTransaction = HandlerErrorCode(148)
+	// Deadlock found when trying to get lock
+	HaErrLockDeadlock = HandlerErrorCode(149)
+	// Cannot add a foreign key constr.
+	HaErrCannotAddForeign = HandlerErrorCode(150)
+	// Cannot add a child row
+	HaErrNoReferencedRow = HandlerErrorCode(151)
+	// Cannot delete a parent row
+	HaErrRowIsReferenced = HandlerErrorCode(152)
+	// No savepoint with that name
+	HaErrNoSavepoint = HandlerErrorCode(153)
+	// Non unique key block size
+	HaErrNonUniqueBlockSize = HandlerErrorCode(154)
+	// The table does not exist in engine
+	HaErrNoSuchTable = HandlerErrorCode(155)
+	// The table existed in storage engine
+	HaErrTableExist = HandlerErrorCode(156)
+	// Could not connect to storage engine
+	HaErrNoConnection = HandlerErrorCode(157)
+	// NULLs are not supported in spatial index
+	HaErrNullInSpatial = HandlerErrorCode(158)
+	// The table changed in storage engine
+	HaErrTableDefChanged = HandlerErrorCode(159)
+	// There's no partition in table for given value
+	HaErrNoPartitionFound = HandlerErrorCode(160)
+	// Row-based binlogging of row failed
+	HaErrRbrLoggingFailed = HandlerErrorCode(161)
+	// Index needed in foreign key constraint
+	HaErrDropIndexFk = HandlerErrorCode(162)
+	// Upholding foreign key constraints would lead to a duplicate key error in some other table.
+	HaErrForeignDuplicateKey = HandlerErrorCode(163)
+	// The table changed in storage engine
+	HaErrTableNeedsUpgrade = HandlerErrorCode(164)
+	// The table is not writable
+	HaErrTableReadonly = HandlerErrorCode(165)
+	// Failed to get next autoinc value
+	HaErrAutoincReadFailed = HandlerErrorCode(166)
+	// Failed to set row autoinc value
+	HaErrAutoincErange = HandlerErrorCode(167)
+	// Generic error
+	HaErrGeneric = HandlerErrorCode(168)
+	// row not actually updated: new values same as the old values
+	HaErrRecordIsTheSame = HandlerErrorCode(169)
+	// It is not possible to log this statement
+	HaErrLoggingImpossible = HandlerErrorCode(170)
+	// The event was corrupt, leading to illegal data being read
+	HaErrCorruptEvent = HandlerErrorCode(171)
+	// New file format
+	HaErrNewFile = HandlerErrorCode(172)
+	// The event could not be processed no other handler error happened
+	HaErrRowsEventApply = HandlerErrorCode(173)
+	// Error during initialization
+	HaErrInitialization = HandlerErrorCode(174)
+	// File too short
+	HaErrFileTooShort = HandlerErrorCode(175)
+	// Wrong CRC on page
+	HaErrWrongCrc = HandlerErrorCode(176)
+	// Too many active concurrent transactions
+	HaErrTooManyConcurrentTrxs = HandlerErrorCode(177)
+	// There's no explicitly listed partition in table for the given value
+	HaErrNotInLockPartitions = HandlerErrorCode(178)
+	// Index column length exceeds limit
+	HaErrIndexColTooLong = HandlerErrorCode(179)
+	// InnoDB index corrupted
+	HaErrIndexCorrupt = HandlerErrorCode(180)
+	// Undo log record too big
+	HaErrUndoRecTooBig = HandlerErrorCode(181)
+	// Invalid InnoDB Doc ID
+	HaFtsInvalidDocid = HandlerErrorCode(182)
+	// Table being used in foreign key check
+	HaErrTableInFkCheck = HandlerErrorCode(183)
+	// The tablespace existed in storage engine
+	HaErrTablespaceExists = HandlerErrorCode(184)
+	// Table has too many columns
+	HaErrTooManyFields = HandlerErrorCode(185)
+	// Row in wrong partition
+	HaErrRowInWrongPartition = HandlerErrorCode(186)
+	// InnoDB is in read only mode.
+	HaErrInnodbReadOnly = HandlerErrorCode(187)
+	// FTS query exceeds result cache limit
+	HaErrFtsExceedResultCacheLimit = HandlerErrorCode(188)
+	// Temporary file write failure
+	HaErrTempFileWriteFailure = HandlerErrorCode(189)
+	// Innodb is in force recovery mode
+	HaErrInnodbForcedRecovery = HandlerErrorCode(190)
+	// Too many words in a phrase
+	HaErrFtsTooManyWordsInPhrase = HandlerErrorCode(191)
+	// FK cascade depth exceeded
+	HaErrFkDepthExceeded = HandlerErrorCode(192)
+	// Option Missing during Create
+	HaMissingCreateOption = HandlerErrorCode(193)
+	// Out of memory in storage engine
+	HaErrSeOutOfMemory = HandlerErrorCode(194)
+	// Table/Clustered index is corrupted.
+	HaErrTableCorrupt = HandlerErrorCode(195)
+	// The query was interrupted
+	HaErrQueryInterrupted = HandlerErrorCode(196)
+	// Missing Tablespace
+	HaErrTablespaceMissing = HandlerErrorCode(197)
+	// Tablespace is not empty
+	HaErrTablespaceIsNotEmpty = HandlerErrorCode(198)
+	// Invalid Filename
+	HaErrWrongFileName = HandlerErrorCode(199)
+	// Operation is not allowed
+	HaErrNotAllowedCommand = HandlerErrorCode(200)
+	// Compute generated column value failed
+	HaErrComputeFailed = HandlerErrorCode(201)
+	// Table's row format has changed in the storage engine. Information in the data-dictionary needs to be updated.
+	HaErrRowFormatChanged = HandlerErrorCode(202)
+	// Don't wait for record lock
+	HaErrNoWaitLock = HandlerErrorCode(203)
+	// No more room in disk
+	HaErrDiskFullNowait = HandlerErrorCode(204)
+	// No session temporary space available
+	HaErrNoSessionTemp = HandlerErrorCode(205)
+	// Wrong or Invalid table name
+	HaErrWrongTableName = HandlerErrorCode(206)
+	// Path is too long for the OS
+	HaErrTooLongPath = HandlerErrorCode(207)
+	// Histogram sampling initialization failed
+	HaErrSamplingInitFailed = HandlerErrorCode(208)
+	// Too many sub-expression in search string
+	HaErrFtsTooManyNestedExp = HandlerErrorCode(209)
+)
+
 // Sql states for errors.
 // Originally found in include/mysql/sql_state.h
 const (
diff --git a/go/mysql/sqlerror/sql_error.go b/go/mysql/sqlerror/sql_error.go
index f2a5fb46388..4600f0927cc 100644
--- a/go/mysql/sqlerror/sql_error.go
+++ b/go/mysql/sqlerror/sql_error.go
@@ -53,6 +53,18 @@ func NewSQLError(number ErrorCode, sqlState string, msg string) *SQLError {
 	}
 }
 
+var handlerErrExtract = regexp.MustCompile(`Got error ([0-9]*) [-] .* (from storage engine|during COMMIT|during ROLLBACK)`)
+
+func (se *SQLError) HaErrorCode() HandlerErrorCode {
+	match := handlerErrExtract.FindStringSubmatch(se.Message)
+	if len(match) >= 1 {
+		if code, err := strconv.ParseUint(match[1], 10, 16); err == nil {
+			return HandlerErrorCode(code)
+		}
+	}
+	return 0
+}
+
 // Error implements the error interface
 func (se *SQLError) Error() string {
 	var buf strings.Builder
diff --git a/go/mysql/sqlerror/sql_error_test.go b/go/mysql/sqlerror/sql_error_test.go
index b38cec26388..9e73138d60f 100644
--- a/go/mysql/sqlerror/sql_error_test.go
+++ b/go/mysql/sqlerror/sql_error_test.go
@@ -57,6 +57,7 @@ func TestNewSQLErrorFromError(t *testing.T) {
 	var tCases = []struct {
 		err error
 		num ErrorCode
+		ha  HandlerErrorCode
 		ss  string
 	}{
 		{
@@ -179,6 +180,24 @@ func TestNewSQLErrorFromError(t *testing.T) {
 			num: ERDupEntry,
 			ss:  SSConstraintViolation,
 		},
+		{
+			err: fmt.Errorf("ERROR HY000: Got error 204 - 'No more room in disk' during COMMIT"),
+			num: ERUnknownError,
+			ss:  SSUnknownSQLState,
+			ha:  HaErrDiskFullNowait,
+		},
+		{
+			err: fmt.Errorf("COMMIT failed w/ error: Got error 204 - 'No more room in disk' during COMMIT (errno 1180) (sqlstate HY000) during query: commit"),
+			num: ERErrorDuringCommit,
+			ss:  SSUnknownSQLState,
+			ha:  HaErrDiskFullNowait,
+		},
+		{
+			err: fmt.Errorf("COMMIT failed w/ error: Got error 149 - 'Lock deadlock; Retry transaction' during COMMIT (errno 1180) (sqlstate HY000) during query: commit"),
+			num: ERErrorDuringCommit,
+			ss:  SSUnknownSQLState,
+			ha:  HaErrLockDeadlock,
+		},
 	}
 
 	for _, tc := range tCases {
@@ -187,6 +206,8 @@ func TestNewSQLErrorFromError(t *testing.T) {
 			require.ErrorAs(t, NewSQLErrorFromError(tc.err), &err)
 			assert.Equal(t, tc.num, err.Number())
 			assert.Equal(t, tc.ss, err.SQLState())
+			ha := err.HaErrorCode()
+			assert.Equal(t, tc.ha, ha)
 		})
 	}
 }
diff --git a/go/vt/vttablet/tabletmanager/vreplication/utils.go b/go/vt/vttablet/tabletmanager/vreplication/utils.go
index bb1c469cc93..67b52c56261 100644
--- a/go/vt/vttablet/tabletmanager/vreplication/utils.go
+++ b/go/vt/vttablet/tabletmanager/vreplication/utils.go
@@ -232,6 +232,24 @@ func isUnrecoverableError(err error) bool {
 		sqlerror.ERWrongValueCountOnRow:
 		log.Errorf("Got unrecoverable error: %v", sqlErr)
 		return true
+	case sqlerror.ERErrorDuringCommit:
+		switch sqlErr.HaErrorCode() {
+		case
+			0, // Not really a HA error.
+			sqlerror.HaErrLockDeadlock,
+			sqlerror.HaErrLockTableFull,
+			sqlerror.HaErrLockWaitTimeout,
+			sqlerror.HaErrNotInLockPartitions,
+			sqlerror.HaErrQueryInterrupted,
+			sqlerror.HaErrRolledBack,
+			sqlerror.HaErrTooManyConcurrentTrxs,
+			sqlerror.HaErrUndoRecTooBig:
+			// These are recoverable errors.
+			return false
+		default:
+			log.Errorf("Got unrecoverable error: %v", sqlErr)
+			return true
+		}
 	}
 	return false
 }
diff --git a/go/vt/vttablet/tabletmanager/vreplication/utils_test.go b/go/vt/vttablet/tabletmanager/vreplication/utils_test.go
index 69a57c34341..15093e299fc 100644
--- a/go/vt/vttablet/tabletmanager/vreplication/utils_test.go
+++ b/go/vt/vttablet/tabletmanager/vreplication/utils_test.go
@@ -152,6 +152,16 @@ func TestIsUnrecoverableError(t *testing.T) {
 			err:      sqlerror.NewSQLError(sqlerror.ERDataOutOfRange, "data out of range", "test"),
 			expected: true,
 		},
+		{
+			name:     "SQL error with HaErrDiskFullNowait error",
+			err:      sqlerror.NewSQLError(sqlerror.ERErrorDuringCommit, "unknown", "ERROR HY000: Got error 204 - 'No more room in disk' during COMMIT"),
+			expected: true,
+		},
+		{
+			name:     "SQL error with HaErrLockDeadlock error",
+			err:      sqlerror.NewSQLError(sqlerror.ERErrorDuringCommit, "unknown", "ERROR HY000: Got error 149 - 'Lock deadlock; Retry transaction' during COMMIT"),
+			expected: false,
+		},
 	}
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {