Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Client now accepts :encoding option. #64

Merged
merged 2 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
### Fixed
- Fix msec values for time columns. #61

### Changed
- (BREAKING CHANGE) C API `#trilogy_build_auth_packet` accepts encoding option now. The Ruby binding for the
Trilogy client can now accept an `:encoding` option, which will tell the connection to use the specified encoding,
and will ensure that outgoing query strings are transcoded appropriately. If no encoding is supplied,
utf8mb4 is used by default. #64

## 2.3.0

### Added
Expand Down
22 changes: 19 additions & 3 deletions contrib/ruby/ext/trilogy-ruby/cext.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,15 @@ struct trilogy_ctx {
trilogy_conn_t conn;
char server_version[TRILOGY_SERVER_VERSION_SIZE + 1];
unsigned int query_flags;
VALUE encoding;
};

static void mark_trilogy(void *ptr)
{
struct trilogy_ctx *ctx = ptr;
rb_gc_mark(ctx->encoding);
}

static void free_trilogy(void *ptr)
{
struct trilogy_ctx *ctx = ptr;
Expand All @@ -57,7 +64,7 @@ static size_t trilogy_memsize(const void *ptr) {
static const rb_data_type_t trilogy_data_type = {
.wrap_struct_name = "trilogy",
.function = {
.dmark = NULL,
.dmark = mark_trilogy,
.dfree = free_trilogy,
.dsize = trilogy_memsize,
},
Expand Down Expand Up @@ -359,13 +366,16 @@ static void authenticate(struct trilogy_ctx *ctx, trilogy_handshake_t *handshake
}
}

static VALUE rb_trilogy_initialize(VALUE self, VALUE opts)
static VALUE rb_trilogy_initialize(VALUE self, VALUE encoding, VALUE charset, VALUE opts)
{
struct trilogy_ctx *ctx = get_ctx(self);
trilogy_sockopt_t connopt = {0};
trilogy_handshake_t handshake;
VALUE val;

RB_OBJ_WRITE(self, &ctx->encoding, encoding);
connopt.encoding = NUM2INT(charset);

Check_Type(opts, T_HASH);
rb_ivar_set(self, id_connection_options, opts);

Expand Down Expand Up @@ -821,6 +831,7 @@ static VALUE rb_trilogy_query(VALUE self, VALUE query)
struct trilogy_ctx *ctx = get_open_ctx(self);

StringValue(query);
adrianna-chang-shopify marked this conversation as resolved.
Show resolved Hide resolved
query = rb_str_export_to_enc(query, rb_to_encoding(ctx->encoding));

int rc = trilogy_query_send(&ctx->conn, RSTRING_PTR(query), RSTRING_LEN(query));

Expand Down Expand Up @@ -1019,7 +1030,7 @@ RUBY_FUNC_EXPORTED void Init_cext()
VALUE Trilogy = rb_const_get(rb_cObject, rb_intern("Trilogy"));
rb_define_alloc_func(Trilogy, allocate_trilogy);

rb_define_method(Trilogy, "initialize", rb_trilogy_initialize, 1);
rb_define_private_method(Trilogy, "_initialize", rb_trilogy_initialize, 3);
rb_define_method(Trilogy, "change_db", rb_trilogy_change_db, 1);
rb_define_method(Trilogy, "query", rb_trilogy_query, 1);
rb_define_method(Trilogy, "ping", rb_trilogy_ping, 0);
Expand Down Expand Up @@ -1137,4 +1148,9 @@ RUBY_FUNC_EXPORTED void Init_cext()
#define XX(name, code) rb_const_set(Trilogy, rb_intern((char *)#name + strlen("TRILOGY_")), LONG2NUM(name));
TRILOGY_SET_SERVER_OPTION(XX)
#undef XX

// charsets
#define XX(name, code) rb_const_set(Trilogy, rb_intern((char *)#name + strlen("TRILOGY_")), LONG2NUM(name));
TRILOGY_CHARSETS(XX)
#undef XX
}
99 changes: 99 additions & 0 deletions contrib/ruby/lib/trilogy.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

require "trilogy/version"

class Trilogy
Expand Down Expand Up @@ -107,6 +109,60 @@ class ConnectionClosed < IOError
include ConnectionError
end

MYSQL_TO_RUBY_ENCODINGS_MAP = {
"big5" => "Big5",
"dec8" => nil,
"cp850" => "CP850",
"hp8" => nil,
"koi8r" => "KOI8-R",
"latin1" => "ISO-8859-1",
"latin2" => "ISO-8859-2",
"swe7" => nil,
"ascii" => "US-ASCII",
"ujis" => "eucJP-ms",
"sjis" => "Shift_JIS",
"hebrew" => "ISO-8859-8",
"tis620" => "TIS-620",
"euckr" => "EUC-KR",
"koi8u" => "KOI8-R",
"gb2312" => "GB2312",
"greek" => "ISO-8859-7",
"cp1250" => "Windows-1250",
"gbk" => "GBK",
"latin5" => "ISO-8859-9",
"armscii8" => nil,
"utf8" => "UTF-8",
"ucs2" => "UTF-16BE",
"cp866" => "IBM866",
"keybcs2" => nil,
"macce" => "macCentEuro",
"macroman" => "macRoman",
"cp852" => "CP852",
"latin7" => "ISO-8859-13",
"utf8mb4" => "UTF-8",
"cp1251" => "Windows-1251",
"utf16" => "UTF-16",
"cp1256" => "Windows-1256",
"cp1257" => "Windows-1257",
"utf32" => "UTF-32",
"binary" => "ASCII-8BIT",
"geostd8" => nil,
"cp932" => "Windows-31J",
"eucjpms" => "eucJP-ms",
"utf16le" => "UTF-16LE",
"gb18030" => "GB18030",
}.freeze

def initialize(options = {})
mysql_encoding = options[:encoding] || "utf8mb4"
unless rb_encoding = MYSQL_TO_RUBY_ENCODINGS_MAP[mysql_encoding]
raise ArgumentError, "Unknown or unsupported encoding: #{mysql_encoding}"
end
encoding = Encoding.find(rb_encoding)
charset = charset_for_mysql_encoding(mysql_encoding)
_initialize(encoding, charset, **options)
end

def connection_options
@connection_options.dup.freeze
end
Expand Down Expand Up @@ -169,6 +225,49 @@ def each(&bk)

include Enumerable
end

private

def charset_for_mysql_encoding(mysql_encoding)
@mysql_encodings_map ||= {
"big5" => CHARSET_BIG5_CHINESE_CI,
"cp850" => CHARSET_CP850_GENERAL_CI,
"koi8r" => CHARSET_KOI8R_GENERAL_CI,
"latin1" => CHARSET_LATIN1_GENERAL_CI,
"latin2" => CHARSET_LATIN2_GENERAL_CI,
"ascii" => CHARSET_ASCII_GENERAL_CI,
"ujis" => CHARSET_UJIS_JAPANESE_CI,
"sjis" => CHARSET_SJIS_JAPANESE_CI,
"hebrew" => CHARSET_HEBREW_GENERAL_CI,
"tis620" => CHARSET_TIS620_THAI_CI,
"euckr" => CHARSET_EUCKR_KOREAN_CI,
"koi8u" => CHARSET_KOI8U_GENERAL_CI,
"gb2312" => CHARSET_GB2312_CHINESE_CI,
"greek" => CHARSET_GREEK_GENERAL_CI,
"cp1250" => CHARSET_CP1250_GENERAL_CI,
"gbk" => CHARSET_GBK_CHINESE_CI,
"latin5" => CHARSET_LATIN5_TURKISH_CI,
"utf8" => CHARSET_UTF8_GENERAL_CI,
"ucs2" => CHARSET_UCS2_GENERAL_CI,
"cp866" => CHARSET_CP866_GENERAL_CI,
"cp932" => CHARSET_CP932_JAPANESE_CI,
"eucjpms" => CHARSET_EUCJPMS_JAPANESE_CI,
"utf16le" => CHARSET_UTF16_GENERAL_CI,
"gb18030" => CHARSET_GB18030_CHINESE_CI,
"macce" => CHARSET_MACCE_GENERAL_CI,
"macroman" => CHARSET_MACROMAN_GENERAL_CI,
"cp852" => CHARSET_CP852_GENERAL_CI,
"latin7" => CHARSET_LATIN7_GENERAL_CI,
"utf8mb4" => CHARSET_UTF8MB4_GENERAL_CI,
"cp1251" => CHARSET_CP1251_GENERAL_CI,
"utf16" => CHARSET_UTF16_GENERAL_CI,
"cp1256" => CHARSET_CP1256_GENERAL_CI,
"cp1257" => CHARSET_CP1257_GENERAL_CI,
"utf32" => CHARSET_UTF32_GENERAL_CI,
"binary" => CHARSET_BINARY,
}.freeze
@mysql_encodings_map[mysql_encoding]
end
end

require "trilogy/cext"
46 changes: 46 additions & 0 deletions contrib/ruby/test/client_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -919,4 +919,50 @@ def test_discard_doesnt_terminate_parent_connection
# The client is still usable after a child discarded it.
assert_equal [1], client.query("SELECT 1").to_a.first
end

def test_no_character_encoding
client = new_tcp_client

assert_equal "utf8mb4", client.query("SELECT @@character_set_client").first.first
assert_equal "utf8mb4", client.query("SELECT @@character_set_results").first.first
assert_equal "utf8mb4", client.query("SELECT @@character_set_connection").first.first
assert_equal "utf8mb4_general_ci", client.query("SELECT @@collation_connection").first.first
end

def test_bad_character_encoding
err = assert_raises ArgumentError do
new_tcp_client(encoding: "invalid")
end
assert_equal "Unknown or unsupported encoding: invalid", err.message
end

def test_character_encoding
client = new_tcp_client(encoding: "cp932")

assert_equal "cp932", client.query("SELECT @@character_set_client").first.first
assert_equal "cp932", client.query("SELECT @@character_set_results").first.first
assert_equal "cp932", client.query("SELECT @@character_set_connection").first.first
assert_equal "cp932_japanese_ci", client.query("SELECT @@collation_connection").first.first

expected = "こんにちは".encode(Encoding::CP932)
assert_equal expected, client.query("SELECT 'こんにちは'").to_a.first.first
end

def test_character_encoding_handles_binary_queries
client = new_tcp_client
expected = "\xff".b

result = client.query("SELECT _binary'#{expected}'").to_a.first.first
assert_equal expected, result
assert_equal Encoding::BINARY, result.encoding

result = client.query("SELECT '#{expected}'").to_a.first.first
assert_equal expected.dup.force_encoding(Encoding::UTF_8), result
assert_equal Encoding::UTF_8, result.encoding

client = new_tcp_client(encoding: "cp932")
result = client.query("SELECT '#{expected}'").to_a.first.first
assert_equal expected.dup.force_encoding(Encoding::Windows_31J), result
assert_equal Encoding::Windows_31J, result.encoding
end
end
42 changes: 23 additions & 19 deletions inc/trilogy/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,25 +412,27 @@ typedef enum {
* This should be sent in response to the initial handshake packet the server
* sends upon connection.
*
* builder - A pointer to a pre-initialized trilogy_builder_t.
* user - The username to use for authentication. Must be a C-string.
* pass - The password to use for authentication. Optional, and can be NULL.
* pass_len - The length of password in bytes.
* auth_plugin - Plugin authentication mechanism that the server requested.
* scramble - The scramble value the server sent in the initial handshake.
* flags - Bitmask of TRILOGY_CAPABILITIES_t flags.
* The TRILOGY_CAPABILITIES_PROTOCOL_41 and
* TRILOGY_CAPABILITIES_SECURE_CONNECTION flags will always be set
* internally.
* builder - A pointer to a pre-initialized trilogy_builder_t.
* user - The username to use for authentication. Must be a C-string.
* pass - The password to use for authentication. Optional, and can be NULL.
* pass_len - The length of password in bytes.
* database - The initial database to connect to. Optional, and can be NULL.
* client_encoding - The charset to use for the connection.
* auth_plugin - Plugin authentication mechanism that the server requested.
* scramble - The scramble value the server sent in the initial handshake.
* flags - Bitmask of TRILOGY_CAPABILITIES_t flags.
* The TRILOGY_CAPABILITIES_PROTOCOL_41 and
* TRILOGY_CAPABILITIES_SECURE_CONNECTION flags will always be set
* internally.
*
* Return values:
* TRILOGY_OK - The packet was successfully built and written to the
* builder's internal buffer.
* TRILOGY_SYSERR - A system error occurred, check errno.
*/
int trilogy_build_auth_packet(trilogy_builder_t *builder, const char *user, const char *pass, size_t pass_len,
const char *database, const char *auth_plugin, const char *scramble,
TRILOGY_CAPABILITIES_t flags);
const char *database, TRILOGY_CHARSET_t client_encoding, const char *auth_plugin,
const char *scramble, TRILOGY_CAPABILITIES_t flags);

/* trilogy_build_auth_switch_response_packet - Build a response for when
* authentication switching it requested.
Expand Down Expand Up @@ -520,19 +522,21 @@ int trilogy_build_quit_packet(trilogy_builder_t *builder);
* sends upon connection, where an auth packet would normally be sent. A regular
* auth packet is to be sent after the SSL handshake completes.
*
* builder - A pointer to a pre-initialized trilogy_builder_t.
* flags - Bitmask of TRILOGY_CAPABILITIES_t flags.
* The TRILOGY_CAPABILITIES_PROTOCOL_41 and
* TRILOGY_CAPABILITIES_SECURE_CONNECTION flags will always be set
* internally.
* The TRILOGY_CAPABILITIES_SSL flag will also be set.
* builder - A pointer to a pre-initialized trilogy_builder_t.
* flags - Bitmask of TRILOGY_CAPABILITIES_t flags.
* The TRILOGY_CAPABILITIES_PROTOCOL_41 and
* TRILOGY_CAPABILITIES_SECURE_CONNECTION flags will always be set
* internally.
* The TRILOGY_CAPABILITIES_SSL flag will also be set.
* client_encoding - The charset to use for the connection.
*
* Return values:
* TRILOGY_OK - The packet was successfully built and written to the
* builder's internal buffer.
* TRILOGY_SYSERR - A system error occurred, check errno.
*/
int trilogy_build_ssl_request_packet(trilogy_builder_t *builder, TRILOGY_CAPABILITIES_t flags);
int trilogy_build_ssl_request_packet(trilogy_builder_t *builder, TRILOGY_CAPABILITIES_t flags,
TRILOGY_CHARSET_t client_encoding);

#define TRILOGY_SERVER_VERSION_SIZE 32

Expand Down
1 change: 1 addition & 0 deletions inc/trilogy/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ typedef struct {
char *username;
char *password;
size_t password_len;
uint8_t encoding;

trilogy_ssl_mode_t ssl_mode;
trilogy_tls_version_t tls_min_version;
Expand Down
7 changes: 4 additions & 3 deletions src/client.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,9 @@ int trilogy_auth_send(trilogy_conn_t *conn, const trilogy_handshake_t *handshake
}

rc = trilogy_build_auth_packet(&builder, conn->socket->opts.username, conn->socket->opts.password,
conn->socket->opts.password_len, conn->socket->opts.database, handshake->auth_plugin,
handshake->scramble, conn->socket->opts.flags);
conn->socket->opts.password_len, conn->socket->opts.database,
conn->socket->opts.encoding, handshake->auth_plugin, handshake->scramble,
conn->socket->opts.flags);

if (rc < 0) {
return rc;
Expand All @@ -378,7 +379,7 @@ int trilogy_ssl_request_send(trilogy_conn_t *conn)
}

conn->socket->opts.flags |= TRILOGY_CAPABILITIES_SSL;
rc = trilogy_build_ssl_request_packet(&builder, conn->socket->opts.flags);
rc = trilogy_build_ssl_request_packet(&builder, conn->socket->opts.flags, conn->socket->opts.encoding);

if (rc < 0) {
return rc;
Expand Down
10 changes: 4 additions & 6 deletions src/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,8 @@ static void trilogy_pack_scramble_sha2_hash(const char *scramble, const char *pa
}

int trilogy_build_auth_packet(trilogy_builder_t *builder, const char *user, const char *pass, size_t pass_len,
const char *database, const char *auth_plugin, const char *scramble,
TRILOGY_CAPABILITIES_t flags)
const char *database, TRILOGY_CHARSET_t client_encoding, const char *auth_plugin,
const char *scramble, TRILOGY_CAPABILITIES_t flags)
{
int rc = TRILOGY_OK;

Expand All @@ -507,8 +507,6 @@ int trilogy_build_auth_packet(trilogy_builder_t *builder, const char *user, cons

uint32_t max_packet_len = TRILOGY_MAX_PACKET_LEN;

uint8_t client_encoding = TRILOGY_CHARSET_UTF8_GENERAL_CI;

unsigned int auth_response_len = 0;
uint8_t auth_response[EVP_MAX_MD_SIZE];

Expand Down Expand Up @@ -663,12 +661,12 @@ int trilogy_build_set_option_packet(trilogy_builder_t *builder, const uint16_t o
}


int trilogy_build_ssl_request_packet(trilogy_builder_t *builder, TRILOGY_CAPABILITIES_t flags)
int trilogy_build_ssl_request_packet(trilogy_builder_t *builder, TRILOGY_CAPABILITIES_t flags,
TRILOGY_CHARSET_t client_encoding)
{
static const char zeroes[23] = {0};

const uint32_t max_packet_len = TRILOGY_MAX_PACKET_LEN;
const uint8_t client_encoding = TRILOGY_CHARSET_UTF8_GENERAL_CI;
const uint32_t capabilities = flags | TRILOGY_CAPABILITIES_CLIENT | TRILOGY_CAPABILITIES_SSL;

int rc = TRILOGY_OK;
Expand Down
Loading