Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add collation option #564

Merged
merged 8 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions MySQLdb/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ class object, used to create cursors (keyword only)
If supplied, the connection character set will be changed
to this character set.

:param str collation:
If ``charset`` and ``collation`` are both supplied, the
character set and collation for the current connection
will be set.

If omitted, empty string, or None, the default collation
for the ``charset`` is implied.

:param str auth_plugin:
If supplied, the connection default authentication plugin will be
changed to this value. Example values:
Expand Down Expand Up @@ -168,6 +176,7 @@ class object, used to create cursors (keyword only)

cursorclass = kwargs2.pop("cursorclass", self.default_cursor)
charset = kwargs2.get("charset", "")
collation = kwargs2.pop("collation", "")
use_unicode = kwargs2.pop("use_unicode", True)
sql_mode = kwargs2.pop("sql_mode", "")
self._binary_prefix = kwargs2.pop("binary_prefix", False)
Expand All @@ -194,7 +203,7 @@ class object, used to create cursors (keyword only)

if not charset:
charset = self.character_set_name()
self.set_character_set(charset)
self.set_character_set(charset, collation)

if sql_mode:
self.set_sql_mode(sql_mode)
Expand Down Expand Up @@ -293,10 +302,13 @@ def begin(self):
"""
self.query(b"BEGIN")

def set_character_set(self, charset):
def set_character_set(self, charset, collation=None):
"""Set the connection character set to charset."""
super().set_character_set(charset)
self.encoding = _charset_to_encoding.get(charset, charset)
if collation:
self.query("SET NAMES %s COLLATE %s" % (charset, collation))
self.store_result()

def set_sql_mode(self, sql_mode):
"""Set the connection sql_mode. See MySQL documentation for
Expand Down
16 changes: 16 additions & 0 deletions doc/user_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,22 @@ connect(parameters...)

*This must be a keyword parameter.*

collation
If ``charset`` and ``collation`` are both supplied, the
character set and collation for the current connection
will be set.

If omitted, empty string, or None, the default collation
for the ``charset`` is implied by the database server.

To learn more about the quiddities of character sets and
collations, consult the `MySQL docs
<https://dev.mysql.com/doc/refman/8.0/en/charset.html>`_
and `MariaDB docs
<https://mariadb.com/kb/en/character-sets/>`_

*This must be a keyword parameter.*

sql_mode
If present, the session SQL mode will be set to the given
string. For more information on sql_mode, see the MySQL
Expand Down
30 changes: 30 additions & 0 deletions tests/test_MySQLdb_nonstandard.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,33 @@ def test_context_manager(self):
with connection_factory() as conn:
self.assertFalse(conn.closed)
self.assertTrue(conn.closed)


class TestCollation(unittest.TestCase):
"""Test charset and collation connection options."""

def setUp(self):
# Initialize a connection with a non-default character set and
# collation.
self.conn = connection_factory(
charset="utf8mb4",
collation="utf8mb4_esperanto_ci",
)

def tearDown(self):
self.conn.close()

def test_charset_collation(self):
c = self.conn.cursor()
c.execute(
"""
SHOW VARIABLES WHERE
Variable_Name="character_set_connection" OR
Variable_Name="collation_connection";
"""
)
row = c.fetchall()
charset = row[0][1]
collation = row[1][1]
self.assertEqual(charset, "utf8mb4")
self.assertEqual(collation, "utf8mb4_esperanto_ci")