Skip to content

Commit

Permalink
Merge branch 'master' into sgpeter1/master
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Aug 16, 2024
2 parents 54f482e + 5ba0aa7 commit 7c4478f
Show file tree
Hide file tree
Showing 46 changed files with 175 additions and 108 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
Unreleased
----------

- feat: Add a :code:`--no-leading-zeroes` option to tools that support type inference.
- feat: :doc:`/scripts/csvsql` adds a :code:`--engine-option` option.
- feat: :doc:`/scripts/csvsql` adds a :code:`--sql-delimiter` option, to set a different delimiter than ``;`` for the :code:`--query`, :code:`--before-insert` and :code:`after-insert` options.
- feat: :doc:`/scripts/sql2csv` adds a :code:`--execution-option` option.
- feat: :doc:`/scripts/sql2csv` uses the ``stream_results=True`` execution option, by default, to not load all data into memory at once.
- fix: :doc:`/scripts/csvsql` uses a default value of 1 for the :code:`--min-col-len` and :code:`--col-len-multiplier` options.

2.0.1 - July 12, 2024
---------------------
Expand Down
44 changes: 24 additions & 20 deletions csvkit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,22 +208,22 @@ def _init_common_parser(self):
self.argparser.add_argument(
'-S', '--skipinitialspace', dest='skipinitialspace', action='store_true',
help='Ignore whitespace immediately following the delimiter.')
if 'blanks' not in self.override_flags:
if 'I' not in self.override_flags:
self.argparser.add_argument(
'--blanks', dest='blanks', action='store_true',
help='Do not convert "", "na", "n/a", "none", "null", "." to NULL.')
if 'blanks' not in self.override_flags:
self.argparser.add_argument(
'--null-value', dest='null_values', nargs='+', default=[],
help='Convert this value to NULL. --null-value can be specified multiple times.')
if 'date-format' not in self.override_flags:
self.argparser.add_argument(
'--date-format', dest='date_format',
help='Specify a strptime date format string like "%%m/%%d/%%Y".')
if 'datetime-format' not in self.override_flags:
self.argparser.add_argument(
'--datetime-format', dest='datetime_format',
help='Specify a strptime datetime format string like "%%m/%%d/%%Y %%I:%%M %%p".')
self.argparser.add_argument(
'--no-leading-zeroes', dest='no_leading_zeroes', action='store_true',
help='Do not convert a numeric value with leading zeroes to a number.')
if 'H' not in self.override_flags:
self.argparser.add_argument(
'-H', '--no-header-row', dest='no_header_row', action='store_true',
Expand Down Expand Up @@ -347,27 +347,31 @@ def get_column_types(self):
type_kwargs['null_values'].append(null_value)

text_type = agate.Text(**type_kwargs)
number_type = agate.Number(locale=self.args.locale, **type_kwargs)

if getattr(self.args, 'no_inference', None):
types = [text_type]
elif getattr(self.args, 'out_quoting', None) == 2:
types = [number_type, text_type]
else:
# See the order in the `agate.TypeTester` class.
types = [
agate.Boolean(**type_kwargs),
agate.TimeDelta(**type_kwargs),
agate.Date(date_format=self.args.date_format, **type_kwargs),
agate.DateTime(datetime_format=self.args.datetime_format, **type_kwargs),
text_type,
]

# In order to parse dates like "20010101".
if self.args.date_format or self.args.datetime_format:
types.insert(-1, number_type)
number_type = agate.Number(
locale=self.args.locale, no_leading_zeroes=getattr(self.args, 'no_leading_zeroes', None), **type_kwargs
)

if getattr(self.args, 'out_quoting', None) == 2: # QUOTE_NONUMERIC
types = [number_type, text_type]
else:
types.insert(1, number_type)
# See the order in the `agate.TypeTester` class.
types = [
agate.Boolean(**type_kwargs),
agate.TimeDelta(**type_kwargs),
agate.Date(date_format=self.args.date_format, **type_kwargs),
agate.DateTime(datetime_format=self.args.datetime_format, **type_kwargs),
text_type,
]

# In order to parse dates like "20010101".
if self.args.date_format or self.args.datetime_format:
types.insert(-1, number_type)
else:
types.insert(1, number_type)

return agate.TypeTester(types=types)

Expand Down
2 changes: 1 addition & 1 deletion csvkit/utilities/csvclean.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

class CSVClean(CSVKitUtility):
description = 'Report and fix common errors in a CSV file.'
override_flags = ['L', 'blanks', 'date-format', 'datetime-format']
override_flags = ['L', 'I']

def add_arguments(self):
self.argparser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion csvkit/utilities/csvcut.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

class CSVCut(CSVKitUtility):
description = 'Filter and truncate CSV files. Like the Unix "cut" command, but for tabular data.'
override_flags = ['L', 'blanks', 'date-format', 'datetime-format']
override_flags = ['L', 'I']

def add_arguments(self):
self.argparser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion csvkit/utilities/csvformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class CSVFormat(CSVKitUtility):
description = 'Convert a CSV file to a custom output format.'
override_flags = ['blanks', 'date-format', 'datetime-format']
override_flags = ['I']

def add_arguments(self):
self.argparser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion csvkit/utilities/csvgrep.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

class CSVGrep(CSVKitUtility):
description = 'Search CSV files. Like the Unix "grep" command, but for tabular data.'
override_flags = ['L', 'blanks', 'date-format', 'datetime-format']
override_flags = ['L', 'I']

def add_arguments(self):
self.argparser.add_argument(
Expand Down
3 changes: 2 additions & 1 deletion csvkit/utilities/csvjoin.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def add_arguments(self):
'Specify "0" to disable sniffing entirely, or "-1" to sniff the entire file.')
self.argparser.add_argument(
'-I', '--no-inference', dest='no_inference', action='store_true',
help='Disable type inference when parsing CSV input.')
help='Disable type inference (and --locale, --date-format, --datetime-format, --no-leading-zeroes) '
'when parsing the input.')

def main(self):
if isatty(sys.stdin) and self.args.input_paths == ['-']:
Expand Down
3 changes: 2 additions & 1 deletion csvkit/utilities/csvjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def add_arguments(self):
'Specify "0" to disable sniffing entirely, or "-1" to sniff the entire file.')
self.argparser.add_argument(
'-I', '--no-inference', dest='no_inference', action='store_true',
help='Disable type inference (and --locale, --date-format, --datetime-format) when parsing CSV input.')
help='Disable type inference (and --locale, --date-format, --datetime-format, --no-leading-zeroes) '
'when parsing the input.')

def main(self):
"""
Expand Down
3 changes: 2 additions & 1 deletion csvkit/utilities/csvlook.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def add_arguments(self):
'Specify "0" to disable sniffing entirely, or "-1" to sniff the entire file.')
self.argparser.add_argument(
'-I', '--no-inference', dest='no_inference', action='store_true',
help='Disable type inference when parsing the input. This disables the reformatting of values.')
help='Disable type inference (and --locale, --date-format, --datetime-format, --no-leading-zeroes) '
'when parsing the input.')

def main(self):
if self.additional_input_expected():
Expand Down
3 changes: 2 additions & 1 deletion csvkit/utilities/csvpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def add_arguments(self):
'Specify "0" to disable sniffing entirely, or "-1" to sniff the entire file.')
self.argparser.add_argument(
'-I', '--no-inference', dest='no_inference', action='store_true',
help='Disable type inference when parsing the input. This disables the reformatting of values.')
help='Disable type inference (and --locale, --date-format, --datetime-format, --no-leading-zeroes) '
'when parsing the input.')

def main(self):
if self.input_file == sys.stdin:
Expand Down
3 changes: 2 additions & 1 deletion csvkit/utilities/csvsort.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def add_arguments(self):
'Specify "0" to disable sniffing entirely, or "-1" to sniff the entire file.')
self.argparser.add_argument(
'-I', '--no-inference', dest='no_inference', action='store_true',
help='Disable type inference when parsing the input.')
help='Disable type inference (and --locale, --date-format, --datetime-format, --no-leading-zeroes) '
'when parsing the input.')

def main(self):
if self.args.names_only:
Expand Down
7 changes: 4 additions & 3 deletions csvkit/utilities/csvsql.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,16 @@ def add_arguments(self):
'Specify "0" to disable sniffing entirely, or "-1" to sniff the entire file.')
self.argparser.add_argument(
'-I', '--no-inference', dest='no_inference', action='store_true',
help='Disable type inference when parsing the input.')
help='Disable type inference (and --locale, --date-format, --datetime-format, --no-leading-zeroes) '
'when parsing the input.')
self.argparser.add_argument(
'--chunk-size', dest='chunk_size', type=int,
help='Chunk size for batch insert into the table. Requires --insert.')
self.argparser.add_argument(
'--min-col-len', dest='min_col_len', type=int,
'--min-col-len', dest='min_col_len', type=int, default=1,
help='The minimum length of text columns.')
self.argparser.add_argument(
'--col-len-multiplier', dest='col_len_multiplier', type=int,
'--col-len-multiplier', dest='col_len_multiplier', type=int, default=1,
help='Multiply the maximum column length by this multiplier to accomodate larger values in later runs.')

def main(self):
Expand Down
2 changes: 1 addition & 1 deletion csvkit/utilities/csvstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def _skip_lines(f, args):
class CSVStack(CSVKitUtility):
description = 'Stack up the rows from multiple CSV files, optionally adding a grouping value.'
# Override 'f' because the utility accepts multiple files.
override_flags = ['f', 'L', 'blanks', 'date-format', 'datetime-format']
override_flags = ['f', 'L', 'I']

def add_arguments(self):
self.argparser.add_argument(
Expand Down
3 changes: 2 additions & 1 deletion csvkit/utilities/csvstat.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ def add_arguments(self):
'Specify "0" to disable sniffing entirely, or "-1" to sniff the entire file.')
self.argparser.add_argument(
'-I', '--no-inference', dest='no_inference', action='store_true',
help='Disable type inference when parsing the input. Disable reformatting of values.')
help='Disable type inference (and --locale, --date-format, --datetime-format, --no-leading-zeroes) '
'when parsing the input.')

def main(self):
if self.args.names_only:
Expand Down
3 changes: 2 additions & 1 deletion csvkit/utilities/in2csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def add_arguments(self):
'Specify "0" to disable sniffing entirely, or "-1" to sniff the entire file.')
self.argparser.add_argument(
'-I', '--no-inference', dest='no_inference', action='store_true',
help='Disable type inference (and --locale, --date-format, --datetime-format) when parsing CSV input.')
help='Disable type inference (and --locale, --date-format, --datetime-format, --no-leading-zeroes) '
'when parsing CSV input.')

# This is called only from open_excel_input_file(), but is a separate method to use caching.
@functools.lru_cache
Expand Down
2 changes: 1 addition & 1 deletion csvkit/utilities/sql2csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
class SQL2CSV(CSVKitUtility):
description = 'Execute a SQL query on a database and output the result to a CSV file.'
# Overrides all flags except --linenumbers, --verbose, --version.
override_flags = 'f,b,d,e,H,K,L,p,q,S,t,u,z,blanks,date-format,datetime-format,zero'.split(',')
override_flags = ['f', 'b', 'd', 'e', 'H', 'I', 'K', 'L', 'p', 'q', 'S', 't', 'u', 'z', 'zero']

def add_arguments(self):
self.argparser.add_argument(
Expand Down
2 changes: 2 additions & 0 deletions docs/common_arguments.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ csvkit's tools share a set of common command-line arguments. Not every argument
--datetime-format DATETIME_FORMAT
Specify a strptime datetime format string like
"%m/%d/%Y %I:%M %p".
--no-leading-zeroes Do not convert a numeric value with leading zeroes to
a number.
-H, --no-header-row Specify that the input CSV file has no header row.
Will create default headers (a,b,c,...).
-K SKIP_LINES, --skip-lines SKIP_LINES
Expand Down
4 changes: 3 additions & 1 deletion docs/scripts/csvclean.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ Usage
--omit-error-rows Omit data rows that contain errors, from standard
output.
--label LABEL Add a "label" column to standard error. Useful in
automated workflows.
automated workflows. Use "-" to default to the input
filename.
--header-normalize-space
Strip leading and trailing whitespace and replace
sequences of whitespace characters by a single space
Expand All @@ -140,6 +141,7 @@ Usage
The value with which to fill short rows. Defaults to
none.
See also: :doc:`../common_arguments`.

Examples
Expand Down
6 changes: 3 additions & 3 deletions docs/scripts/csvformat.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ Convert a CSV file to a custom output format.:
none.
-B, --out-no-doublequote
Whether or not double quotes are doubled in the output
CSV file.
file.
-P OUT_ESCAPECHAR, --out-escapechar OUT_ESCAPECHAR
Character used to escape the delimiter in the output
CSV file if --quoting 3 ("Quote None") is specified
and to escape the QUOTECHAR if --out-no-doublequote is
file if --quoting 3 ("Quote None") is specified and to
escape the QUOTECHAR if --out-no-doublequote is
specified.
-M OUT_LINETERMINATOR, --out-lineterminator OUT_LINETERMINATOR
Character used to terminate lines in the output file.
Expand Down
2 changes: 1 addition & 1 deletion docs/scripts/csvgrep.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Filter tabular data to only those rows where certain columns contain a given val
file (stripped of line separators) is an exact match
of the cell value, the row matches.
-i, --invert-match Select non-matching rows, instead of matching rows.
-a --any-match Select rows in which any column matches, instead of
-a, --any-match Select rows in which any column matches, instead of
all columns.
See also: :doc:`../common_arguments`.
Expand Down
7 changes: 5 additions & 2 deletions docs/scripts/csvjoin.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,11 @@ Merges two or more CSV tables together using a method analogous to SQL JOIN oper
starting at the right.
-y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT
Limit CSV dialect sniffing to the specified number of
bytes. Specify "0" to disable sniffing.
-I, --no-inference Disable type inference when parsing CSV input.
bytes. Specify "0" to disable sniffing entirely, or
"-1" to sniff the entire file.
-I, --no-inference Disable type inference (and --locale, --date-format,
--datetime-format, --no-leading-zeroes) when parsing
the input.
Note that the join operation requires reading all files into memory. Don't try
this on very large files.
Expand Down
6 changes: 4 additions & 2 deletions docs/scripts/csvjson.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,11 @@ Converts a CSV file into JSON or GeoJSON (depending on flags):
rather than an as an array.
-y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT
Limit CSV dialect sniffing to the specified number of
bytes. Specify "0" to disable sniffing.
bytes. Specify "0" to disable sniffing entirely, or
"-1" to sniff the entire file.
-I, --no-inference Disable type inference (and --locale, --date-format,
--datetime-format) when parsing CSV input.
--datetime-format, --no-leading-zeroes) when parsing
the input.
See also: :doc:`../common_arguments`.

Expand Down
5 changes: 3 additions & 2 deletions docs/scripts/csvlook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ Renders a CSV to the command line in a Markdown-compatible, fixed-width format:
Limit CSV dialect sniffing to the specified number of
bytes. Specify "0" to disable sniffing entirely, or
"-1" to sniff the entire file.
-I, --no-inference Disable type inference when parsing the input. This
disables the reformatting of values.
-I, --no-inference Disable type inference (and --locale, --date-format,
--datetime-format, --no-leading-zeroes) when parsing
the input.
If a table is too wide to display properly try piping the output to ``less -S`` or truncating it using :doc:`csvcut`.

Expand Down
5 changes: 3 additions & 2 deletions docs/scripts/csvpy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ Loads a CSV file into a :class:`agate.csv.Reader` object and then drops into a P
Limit CSV dialect sniffing to the specified number of
bytes. Specify "0" to disable sniffing entirely, or
"-1" to sniff the entire file.
-I, --no-inference Disable type inference when parsing the input. This
disables the reformatting of values.
-I, --no-inference Disable type inference (and --locale, --date-format,
--datetime-format, --no-leading-zeroes) when parsing
the input.
This tool will automatically use the IPython shell if it is installed, otherwise it will use the running Python shell.

Expand Down
7 changes: 5 additions & 2 deletions docs/scripts/csvsort.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ Sort CSV files. Like the Unix "sort" command, but for tabular data:
-i, --ignore-case Perform case-independent sorting.
-y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT
Limit CSV dialect sniffing to the specified number of
bytes. Specify "0" to disable sniffing.
-I, --no-inference Disable type inference when parsing the input.
bytes. Specify "0" to disable sniffing entirely, or
"-1" to sniff the entire file.
-I, --no-inference Disable type inference (and --locale, --date-format,
--datetime-format, --no-leading-zeroes) when parsing
the input.
See also: :doc:`../common_arguments`.

Expand Down
Loading

0 comments on commit 7c4478f

Please sign in to comment.