Skip to content

Commit

Permalink
[3.11] [3.12] pythongh-115712: Support CSV dialects with delimiter=' …
Browse files Browse the repository at this point in the history
…' and skipinitialspace=True (pythonGH-115721) (pythonGH-115729)

(cherry picked from commit 5ea86f4)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
csv.writer() now quotes empty fields if delimiter is a space and
skipinitialspace is true and raises exception if quoting is not possible.
(cherry picked from commit 937d282)
  • Loading branch information
serhiy-storchaka committed Feb 20, 2024
1 parent aff083e commit b0d4df6
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 7 deletions.
63 changes: 57 additions & 6 deletions Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,20 @@ def _test_arg_valid(self, ctor, arg):
quoting=csv.QUOTE_ALL, quotechar=None)
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_NONE, quotechar='')
ctor(arg, delimiter=' ')
ctor(arg, escapechar=' ')
ctor(arg, quotechar=' ')
ctor(arg, delimiter='\t', skipinitialspace=True)
ctor(arg, escapechar='\t', skipinitialspace=True)
ctor(arg, quotechar='\t', skipinitialspace=True)
ctor(arg, delimiter=' ', skipinitialspace=True)
ctor(arg, delimiter='^')
ctor(arg, escapechar='^')
ctor(arg, quotechar='^')
ctor(arg, delimiter='\x85')
ctor(arg, escapechar='\x85')
ctor(arg, quotechar='\x85')
ctor(arg, lineterminator='\x85')

def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
Expand Down Expand Up @@ -152,9 +166,6 @@ def _write_error_test(self, exc, fields, **kwargs):

def test_write_arg_valid(self):
self._write_error_test(csv.Error, None)
self._write_test((), '')
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
self._write_error_test(OSError, BadIterable())
class BadList:
Expand All @@ -168,7 +179,6 @@ class BadItem:
def __str__(self):
raise OSError
self._write_error_test(OSError, [BadItem()])

def test_write_bigfield(self):
# This exercises the buffer realloc functionality
bigstring = 'X' * 50000
Expand Down Expand Up @@ -271,6 +281,39 @@ def test_writerows_with_none(self):
fileobj.seek(0)
self.assertEqual(fileobj.read(), 'a\r\n""\r\n')


def test_write_empty_fields(self):
self._write_test((), '')
self._write_test([''], '""')
self._write_error_test(csv.Error, [''], quoting=csv.QUOTE_NONE)
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NONE)
self._write_test(['', ''], ',')
self._write_test([None, None], ',')

def test_write_empty_fields_space_delimiter(self):
self._write_test([''], '""', delimiter=' ', skipinitialspace=False)
self._write_test([''], '""', delimiter=' ', skipinitialspace=True)
self._write_test([None], '""', delimiter=' ', skipinitialspace=False)
self._write_test([None], '""', delimiter=' ', skipinitialspace=True)

self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False)
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True)
self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False)
self._write_test([None, None], '"" ""', delimiter=' ', skipinitialspace=True)

self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False,
quoting=csv.QUOTE_NONE)
self._write_error_test(csv.Error, ['', ''],
delimiter=' ', skipinitialspace=True,
quoting=csv.QUOTE_NONE)

self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False,
quoting=csv.QUOTE_NONE)
self._write_error_test(csv.Error, [None, None],
delimiter=' ', skipinitialspace=True,
quoting=csv.QUOTE_NONE)

def test_writerows_errors(self):
with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
writer = csv.writer(fileobj)
Expand Down Expand Up @@ -372,6 +415,14 @@ def test_read_skipinitialspace(self):
[['no space', 'space', 'spaces', '\ttab']],
skipinitialspace=True)

def test_read_space_delimiter(self):
self._read_test(['a b', ' a ', ' ', ''],
[['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []],
delimiter=' ', skipinitialspace=False)
self._read_test(['a b', ' a ', ' ', ''],
[['a', 'b'], ['a', ''], [''], []],
delimiter=' ', skipinitialspace=True)

def test_read_bigfield(self):
# This exercises the buffer realloc functionality and field size
# limits.
Expand Down Expand Up @@ -498,10 +549,10 @@ class space(csv.excel):
escapechar = "\\"

with TemporaryFile("w+", encoding="utf-8") as fileobj:
fileobj.write("abc def\nc1ccccc1 benzene\n")
fileobj.write("abc def\nc1ccccc1 benzene\n")
fileobj.seek(0)
reader = csv.reader(fileobj, dialect=space())
self.assertEqual(next(reader), ["abc", "def"])
self.assertEqual(next(reader), ["abc", "", "", "def"])
self.assertEqual(next(reader), ["c1ccccc1", "benzene"])

def compare_dialect_123(self, expected, *writeargs, **kwwriteargs):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:func:`csv.writer()` now quotes empty fields if delimiter is a
space and skipinitialspace is true and raises exception if quoting is not
possible.
14 changes: 13 additions & 1 deletion Modules/_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,7 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
static int
join_append(WriterObj *self, PyObject *field, int quoted)
{
DialectObj *dialect = self->dialect;
unsigned int field_kind = -1;
const void *field_data = NULL;
Py_ssize_t field_len = 0;
Expand All @@ -1192,6 +1193,15 @@ join_append(WriterObj *self, PyObject *field, int quoted)
field_data = PyUnicode_DATA(field);
field_len = PyUnicode_GET_LENGTH(field);
}
if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) {
if (dialect->quoting == QUOTE_NONE) {
PyErr_Format(self->error_obj,
"empty field must be quoted if delimiter is a space "
"and skipinitialspace is true");
return 0;
}
quoted = 1;
}
rec_len = join_append_data(self, field_kind, field_data, field_len,
&quoted, 0);
if (rec_len < 0)
Expand Down Expand Up @@ -1243,6 +1253,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
{
DialectObj *dialect = self->dialect;
PyObject *iter, *field, *line, *result;
bool null_field = false;

iter = PyObject_GetIter(seq);
if (iter == NULL) {
Expand Down Expand Up @@ -1273,11 +1284,12 @@ csv_writerow(WriterObj *self, PyObject *seq)
break;
}

null_field = (field == Py_None);
if (PyUnicode_Check(field)) {
append_ok = join_append(self, field, quoted);
Py_DECREF(field);
}
else if (field == Py_None) {
else if (null_field) {
append_ok = join_append(self, NULL, quoted);
Py_DECREF(field);
}
Expand Down

0 comments on commit b0d4df6

Please sign in to comment.