From c2d22797cc1238649540bc49d2b5ed095755562f Mon Sep 17 00:00:00 2001 From: Matias Carrasco Kind Date: Tue, 26 Apr 2016 09:57:39 -0500 Subject: [PATCH 1/7] adding parser to load_table --- easyaccess/easyaccess.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py index 68be11a..f88ec32 100755 --- a/easyaccess/easyaccess.py +++ b/easyaccess/easyaccess.py @@ -56,6 +56,10 @@ def colored(line, color): return line import webbrowser import signal +class KeyParser(argparse.ArgumentParser): + def error(self, message): + sys.exit(2) + logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') @@ -1866,7 +1870,23 @@ def do_load_table(self, line, name=''): - For fits file header must have columns names and data types - For filenames use .csv or .fits do not use extra points """ - filename = self.get_filename(line) + line = line.replace(';','') + load_parser = KeyParser(prog='', usage='', add_help=False) + load_parser.add_argument('filename', help='name for the file', action='store', default=None) + load_parser.add_argument('--name', help='name for the table', action='store', default='') + load_parser.add_argument('-h', '--help', help='print help', action='store_true') + try: + load_args = load_parser.parse_args(line.split()) + except: + self.do_help('load_table') + return + if load_args.help: + self.do_help('load_table') + return + filename = self.get_filename(load_args.filename) + name = load_args.name + print(filename) + print(name) if filename is None: return base, ext = os.path.splitext(os.path.basename(filename)) From 656076f3cc4b63a75d884fb5fbbf21c7c3e45f78 Mon Sep 17 00:00:00 2001 From: Matias Carrasco Kind Date: Tue, 26 Apr 2016 15:31:15 -0500 Subject: [PATCH 2/7] inline parsing arguments for load_table --- easyaccess/easyaccess.py | 4 +--- easyaccess/eautils/fileio.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py index f88ec32..8fafcb7 100755 --- a/easyaccess/easyaccess.py +++ b/easyaccess/easyaccess.py @@ -1877,7 +1877,7 @@ def do_load_table(self, line, name=''): load_parser.add_argument('-h', '--help', help='print help', action='store_true') try: load_args = load_parser.parse_args(line.split()) - except: + except SystemExit: self.do_help('load_table') return if load_args.help: @@ -1885,8 +1885,6 @@ def do_load_table(self, line, name=''): return filename = self.get_filename(load_args.filename) name = load_args.name - print(filename) - print(name) if filename is None: return base, ext = os.path.splitext(os.path.basename(filename)) diff --git a/easyaccess/eautils/fileio.py b/easyaccess/eautils/fileio.py index 6174e78..57ebb1f 100644 --- a/easyaccess/eautils/fileio.py +++ b/easyaccess/eautils/fileio.py @@ -13,7 +13,7 @@ import numpy as np import version import fitsio - +import pandas as pd try: import easyaccess.eautils.dtypes as eatypes From 6389aa97315a9cb6c3cabc2a390ccd893d6a2da3 Mon Sep 17 00:00:00 2001 From: Matias Carrasco Kind Date: Wed, 27 Apr 2016 10:54:42 -0500 Subject: [PATCH 3/7] added --tablename options for inside and outside the interpreter --- easyaccess/easyaccess.py | 47 +++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py index 8fafcb7..4756f8d 100755 --- a/easyaccess/easyaccess.py +++ b/easyaccess/easyaccess.py @@ -1857,7 +1857,7 @@ def do_load_table(self, line, name=''): """ DB:Loads a table from a file (csv or fits) taking name from filename and columns from header - Usage: load_table + Usage: load_table [--name NAME] Ex: example.csv has the following content RA,DEC,MAG 1.23,0.13,23 @@ -1865,6 +1865,10 @@ def do_load_table(self, line, name=''): This command will create a table named EXAMPLE with 3 columns RA,DEC and MAG and values taken from file + Optional Arguments: + + --tablename NAME given name for the table, default is taken from filename + Note: - For csv or tab files, first line must have the column names (without # or any other comment) and same format as data (using ',' or space) - For fits file header must have columns names and data types @@ -1873,7 +1877,7 @@ def do_load_table(self, line, name=''): line = line.replace(';','') load_parser = KeyParser(prog='', usage='', add_help=False) load_parser.add_argument('filename', help='name for the file', action='store', default=None) - load_parser.add_argument('--name', help='name for the table', action='store', default='') + load_parser.add_argument('--tablename', help='name for the table', action='store', default='') load_parser.add_argument('-h', '--help', help='print help', action='store_true') try: load_args = load_parser.parse_args(line.split()) @@ -1884,7 +1888,7 @@ def do_load_table(self, line, name=''): self.do_help('load_table') return filename = self.get_filename(load_args.filename) - name = load_args.name + name = load_args.tablename if filename is None: return base, ext = os.path.splitext(os.path.basename(filename)) @@ -1945,21 +1949,39 @@ def do_append_table(self, line, name=''): """ DB:Appends a table from a file (csv or fits) taking name from filename and columns from header. - Usage: append_table + Usage: append_table [--name NAME] Ex: example.csv has the following content RA,DEC,MAG 1.23,0.13,23 0.13,0.01,22 This command will append the contents of example.csv to the table named EXAMPLE. + It is meant to use after load_table command + + Optional Arguments: + + --tablename NAME given name for the table, default is taken from filename Note: - For csv or tab files, first line must have the column names (without # or any other comment) and same format as data (using ',' or space) - For fits file header must have columns names and data types - For filenames use .csv or .fits do not use extra points """ - - filename = self.get_filename(line) + line = line.replace(';','') + append_parser = KeyParser(prog='', usage='', add_help=False) + append_parser.add_argument('filename', help='name for the file', action='store', default=None) + append_parser.add_argument('--tablename', help='name for the table to append to', action='store', default='') + append_parser.add_argument('-h', '--help', help='print help', action='store_true') + try: + append_args = append_parser.parse_args(line.split()) + except SystemExit: + self.do_help('append_table') + return + if append_args.help: + self.do_help('append_table') + return + filename = self.get_filename(append_args.filename) + name = append_args.tablename if filename is None: return base, ext = os.path.splitext(os.path.basename(filename)) @@ -2272,10 +2294,15 @@ def initial_message(quiet=False, clear=True): help="Loads a sql command, execute it and exit") parser.add_argument("-lt", "--load_table", dest='loadtable', help="Loads data from a csv, tab, or fits formatted file \ - into a DB table using the filename as the table name") + into a DB table using the filename as the table name or a custom \ + name with --tablename MYTABLE") parser.add_argument("-at", "--append_table", dest='appendtable', help="Appends data from a csv, tab, or fits formatted file \ - into a DB table using the filename as the table name") + into a DB table using the filename as the table name or a custom \ + name with --tablename MYABLE") + parser.add_argument("--tablename", dest='tablename', + help="Custom table name to be used with --load_table\ + or --append_table") parser.add_argument("-s", "--db",dest='db', #choices=[...]? help="Override database name [dessci,desoper,destest]") parser.add_argument("-q", "--quiet", action="store_true", dest='quiet', @@ -2395,12 +2422,16 @@ def colored(line, color): return line initial_message(args.quiet, clear=False) cmdinterp = easy_or(conf, desconf, db, interactive=False, quiet=args.quiet) linein = "load_table " + args.loadtable + if args.tablename is not None: + linein += ' --tablename ' + args.tablename cmdinterp.onecmd(linein) os._exit(0) elif args.appendtable is not None: initial_message(args.quiet, clear=False) cmdinterp = easy_or(conf, desconf, db, interactive=False, quiet=args.quiet) linein = "append_table " + args.appendtable + if args.tablename is not None: + linein += ' --tablename ' + args.tablename cmdinterp.onecmd(linein) os._exit(0) else: From 42f3836ad3fd95d9188887ac206abbbaf7aa8f3a Mon Sep 17 00:00:00 2001 From: Matias Carrasco Kind Date: Wed, 27 Apr 2016 14:11:37 -0500 Subject: [PATCH 4/7] added --chunksize option --- easyaccess/easyaccess.py | 87 ++++++++++++++++++++++++++++-------- easyaccess/eautils/fileio.py | 44 ++++++++++++------ 2 files changed, 99 insertions(+), 32 deletions(-) diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py index 4756f8d..c78e802 100755 --- a/easyaccess/easyaccess.py +++ b/easyaccess/easyaccess.py @@ -1878,6 +1878,8 @@ def do_load_table(self, line, name=''): load_parser = KeyParser(prog='', usage='', add_help=False) load_parser.add_argument('filename', help='name for the file', action='store', default=None) load_parser.add_argument('--tablename', help='name for the table', action='store', default='') + load_parser.add_argument('--chunksize', help='number of rows to read in blocks to avoid memory ' + 'issues', action='store', type=int, default=None) load_parser.add_argument('-h', '--help', help='print help', action='store_true') try: load_args = load_parser.parse_args(line.split()) @@ -1889,6 +1891,7 @@ def do_load_table(self, line, name=''): return filename = self.get_filename(load_args.filename) name = load_args.tablename + chunk = load_args.chunksize if filename is None: return base, ext = os.path.splitext(os.path.basename(filename)) @@ -1910,28 +1913,76 @@ def do_load_table(self, line, name=''): return # Get the data in a way that Oracle understands - columns = data.ea_get_columns() - values = data.ea_get_values() - dtypes = data.ea_get_dtypes() - # Clean up the original object - del data + iteration = 0 + done = False + total_rows = 0 + if data.file_type == 'pandas': + while not done: + try: + df = data.get_chunk(chunk) + if len(df) == 0: break + if iteration == 0: + dtypes = eafile.get_dtypes(df) + columns = df.columns.values.tolist() + values = df.values.tolist() + total_rows += len(df) + del df + except: + break + if iteration == 0: + try: + self.create_table(table, columns, dtypes) + except: + print_exception() + self.drop_table(table) + return + + try: + if not done: + self.insert_data(table, columns, values, dtypes) + iteration += 1 + except: + print_exception() + self.drop_table(table) + return + + if data.file_type == 'fits': + if chunk is None: chunk = data[1].get_nrows() + start = 0 + while not done: + try: + df = data + if iteration == 0: + dtypes = eafile.get_dtypes(df) + columns = df[1].get_colnames() + values = df[1][start:start+chunk].tolist() + start += chunk + if len(values) == 0 : break + total_rows += len(values) + except: + break + if iteration == 0: + try: + self.create_table(table, columns, dtypes) + except: + print_exception() + self.drop_table(table) + return + + try: + if not done: + self.insert_data(table, columns, values, dtypes) + iteration += 1 + except: + print_exception() + self.drop_table(table) + return - try: - self.create_table(table, columns, dtypes) - except: - print_exception() - self.drop_table(table) - return - try: - self.insert_data(table, columns, values, dtypes) - except: - print_exception() - self.drop_table(table) - return - print(colored('\n Table %s loaded successfully.\n' % table.upper(), "green")) + print(colored('\n ** Table %s loaded successfully with %d rows.\n' % (table.upper(), total_rows), + "green")) print(colored(' You may want to refresh the metadata so your new table appears during\n autocompletion',"cyan")) print(colored(' DESDB ~> refresh_metadata_cache;',"cyan")) diff --git a/easyaccess/eautils/fileio.py b/easyaccess/eautils/fileio.py index 38436ec..f2da956 100644 --- a/easyaccess/eautils/fileio.py +++ b/easyaccess/eautils/fileio.py @@ -274,6 +274,20 @@ def read_file(filename): return data +def get_dtypes(df): + """ + Get dtypes from pandas DataFrame or fitsio.FITS + """ + if df.file_type == 'pandas': + dtypes = [df[c].dtype if df[c].dtype.kind != 'O' + else np.dtype('S' + str(max(df[c].str.len()))) + for i, c in enumerate(df)] + if df.file_type == 'fits': + dtype = df[1].get_rec_dtype(vstorage='fixed')[0] + dtypes = [dtype[i] for i, d in enumerate(dtype.descr)] + return dtypes + + def read_pandas(filename): """ Read an input file into a pandas DataFrame. Accepted file @@ -295,22 +309,23 @@ def read_pandas(filename): if ext in ('.csv', '.tab'): if ext == '.csv': sepa = ',' if ext == '.tab': sepa = None - df = pd.read_csv(filename, sep=sepa) - elif ext in ('.h5'): - df = pd.read_hdf(filename, key='data') + df = pd.read_csv(filename, sep=sepa, iterator = True) + #elif ext in ('.h5'): + # df = pd.read_hdf(filename, key='data') # iterator for hdf in padnas 0.18 except: msg = 'Problem reading %s\n' % filename raise IOError(msg) # Monkey patch to grab columns and values # List comprehension is faster but less readable - dtypes = [df[c].dtype if df[c].dtype.kind != 'O' - else np.dtype('S' + str(max(df[c].str.len()))) - for i, c in enumerate(df)] + #dtypes = [df[c].dtype if df[c].dtype.kind != 'O' + # else np.dtype('S' + str(max(df[c].str.len()))) + # for i, c in enumerate(df)] - df.ea_get_columns = df.columns.values.tolist - df.ea_get_values = df.values.tolist - df.ea_get_dtypes = lambda: dtypes + #df.ea_get_columns = df.columns.values.tolist + #df.ea_get_values = df.values.tolist + #df.ea_get_dtypes = lambda: dtypes + df.file_type = 'pandas' return df @@ -334,12 +349,13 @@ def read_fitsio(filename): msg = 'Problem reading %s\n' % filename raise IOError(msg) # Monkey patch to grab columns and values - dtype = fits[1].get_rec_dtype(vstorage='fixed')[0] - dtypes = [dtype[i] for i, d in enumerate(dtype.descr)] + #dtype = fits[1].get_rec_dtype(vstorage='fixed')[0] + #dtypes = [dtype[i] for i, d in enumerate(dtype.descr)] - fits.ea_get_columns = fits[1].get_colnames - fits.ea_get_values = fits[1].read().tolist - fits.ea_get_dtypes = lambda: dtypes + #fits.ea_get_columns = fits[1].get_colnames + #fits.ea_get_values = fits[1].read().tolist + #fits.ea_get_dtypes = lambda: dtypes + fits.file_type = 'fits' # ## # Hack to just get a subset of columns ### x1,x2 = 25,37 From 4d505ed8f86cc22ff0faf0e64c7610e78b09c107 Mon Sep 17 00:00:00 2001 From: Matias Carrasco Kind Date: Wed, 27 Apr 2016 16:23:51 -0500 Subject: [PATCH 5/7] Added load and append tables by chunks --- easyaccess/easyaccess.py | 87 ++++++++++++++++++++++++++++-------- easyaccess/eautils/fileio.py | 1 + 2 files changed, 70 insertions(+), 18 deletions(-) diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py index c78e802..7c01af4 100755 --- a/easyaccess/easyaccess.py +++ b/easyaccess/easyaccess.py @@ -1803,7 +1803,7 @@ def create_table(self, table, columns, dtypes): self.cur.execute(qtable) if self.autocommit: self.con.commit() - def insert_data(self, table, columns, values, dtypes=None): + def insert_data(self, table, columns, values, dtypes=None, niter = 0): """Insert data into a DB table. Trim trailing whitespace from string columns. Because of the @@ -1849,8 +1849,8 @@ def insert_data(self, table, columns, values, dtypes=None): raise cx_Oracle.DatabaseError(msg) print(colored( - '\n Inserted %d rows and %d columns into table %s in %.2f seconds' % ( - len(values), len(columns), table.upper(), t2 - t1), "green")) + '\n [Iter: %d] Inserted %d rows and %d columns into table %s in %.2f seconds' % ( + niter+1, len(values), len(columns), table.upper(), t2 - t1), "green")) def do_load_table(self, line, name=''): @@ -1868,6 +1868,8 @@ def do_load_table(self, line, name=''): Optional Arguments: --tablename NAME given name for the table, default is taken from filename + --chunksize CHUNK Number of rows to be inserted at a time. Useful for large files + that do not fit in memory Note: - For csv or tab files, first line must have the column names (without # or any other comment) and same format as data (using ',' or space) @@ -1921,13 +1923,13 @@ def do_load_table(self, line, name=''): while not done: try: df = data.get_chunk(chunk) + df.file_type = 'pandas' if len(df) == 0: break if iteration == 0: dtypes = eafile.get_dtypes(df) - columns = df.columns.values.tolist() + columns = df.columns.values.tolist() values = df.values.tolist() total_rows += len(df) - del df except: break if iteration == 0: @@ -1937,10 +1939,9 @@ def do_load_table(self, line, name=''): print_exception() self.drop_table(table) return - try: if not done: - self.insert_data(table, columns, values, dtypes) + self.insert_data(table, columns, values, dtypes, iteration) iteration += 1 except: print_exception() @@ -1972,7 +1973,7 @@ def do_load_table(self, line, name=''): try: if not done: - self.insert_data(table, columns, values, dtypes) + self.insert_data(table, columns, values, dtypes, iteration) iteration += 1 except: print_exception() @@ -2012,6 +2013,8 @@ def do_append_table(self, line, name=''): Optional Arguments: --tablename NAME given name for the table, default is taken from filename + --chunksize CHUNK Number of rows to be inserted at a time. Useful for large files + that do not fit in memory Note: - For csv or tab files, first line must have the column names (without # or any other comment) and same format as data (using ',' or space) @@ -2022,6 +2025,8 @@ def do_append_table(self, line, name=''): append_parser = KeyParser(prog='', usage='', add_help=False) append_parser.add_argument('filename', help='name for the file', action='store', default=None) append_parser.add_argument('--tablename', help='name for the table to append to', action='store', default='') + append_parser.add_argument('--chunksize', help='number of rows to read in blocks to avoid memory ' + 'issues', action='store', default=None, type=int) append_parser.add_argument('-h', '--help', help='print help', action='store_true') try: append_args = append_parser.parse_args(line.split()) @@ -2033,6 +2038,7 @@ def do_append_table(self, line, name=''): return filename = self.get_filename(append_args.filename) name = append_args.tablename + chunk = append_args.chunksize if filename is None: return base, ext = os.path.splitext(os.path.basename(filename)) @@ -2052,18 +2058,56 @@ def do_append_table(self, line, name=''): print_exception() return - columns = data.ea_get_columns() - values = data.ea_get_values() - dtypes = data.ea_get_dtypes() - del data - try: - self.insert_data(table, columns, values, dtypes) - except: - print_exception() - return + iteration = 0 + done = False + total_rows = 0 + if data.file_type == 'pandas': + while not done: + try: + df = data.get_chunk(chunk) + df.file_type = 'pandas' + if len(df) == 0: break + if iteration == 0: + dtypes = eafile.get_dtypes(df) + columns = df.columns.values.tolist() + values = df.values.tolist() + total_rows += len(df) + except: + break + try: + if not done: + self.insert_data(table, columns, values, dtypes, iteration) + iteration += 1 + except: + print_exception() + return + + if data.file_type == 'fits': + if chunk is None: chunk = data[1].get_nrows() + start = 0 + while not done: + try: + df = data + if iteration == 0: + dtypes = eafile.get_dtypes(df) + columns = df[1].get_colnames() + values = df[1][start:start+chunk].tolist() + start += chunk + if len(values) == 0 : break + total_rows += len(values) + except: + break + try: + if not done: + self.insert_data(table, columns, values, dtypes, iteration) + iteration += 1 + except: + print_exception() + return - print(colored('\n Table %s appended successfully.' % table.upper(), "green")) + print(colored('\n ** Table %s appended successfully with %d rows.' % (table.upper(), total_rows), + "green")) def complete_append_table(self, text, line, start_idx, end_idx): @@ -2354,6 +2398,9 @@ def initial_message(quiet=False, clear=True): parser.add_argument("--tablename", dest='tablename', help="Custom table name to be used with --load_table\ or --append_table") + parser.add_argument("--chunksize", dest='chunksize', type=int, default = None, + help="Number of rows to be inserted at a time. Useful for large files \ + that do not fit in memory. Use with --load_table") parser.add_argument("-s", "--db",dest='db', #choices=[...]? help="Override database name [dessci,desoper,destest]") parser.add_argument("-q", "--quiet", action="store_true", dest='quiet', @@ -2475,6 +2522,8 @@ def colored(line, color): return line linein = "load_table " + args.loadtable if args.tablename is not None: linein += ' --tablename ' + args.tablename + if args.chunksize is not None: + linein += ' --chunksize ' + str(args.chunksize) cmdinterp.onecmd(linein) os._exit(0) elif args.appendtable is not None: @@ -2483,6 +2532,8 @@ def colored(line, color): return line linein = "append_table " + args.appendtable if args.tablename is not None: linein += ' --tablename ' + args.tablename + if args.chunksize is not None: + linein += ' --chunksize ' + str(args.chunksize) cmdinterp.onecmd(linein) os._exit(0) else: diff --git a/easyaccess/eautils/fileio.py b/easyaccess/eautils/fileio.py index f2da956..b093971 100644 --- a/easyaccess/eautils/fileio.py +++ b/easyaccess/eautils/fileio.py @@ -282,6 +282,7 @@ def get_dtypes(df): dtypes = [df[c].dtype if df[c].dtype.kind != 'O' else np.dtype('S' + str(max(df[c].str.len()))) for i, c in enumerate(df)] + if df.file_type == 'fits': dtype = df[1].get_rec_dtype(vstorage='fixed')[0] dtypes = [dtype[i] for i, d in enumerate(dtype.descr)] From 163532cbde2473ad8133ac552ffc6a7d893c31d8 Mon Sep 17 00:00:00 2001 From: Matias Carrasco Kind Date: Wed, 27 Apr 2016 17:08:39 -0500 Subject: [PATCH 6/7] wrapping up this fix or loading big files --- easyaccess/easyaccess.py | 27 +++++++++++++++++++++++---- easyaccess/eautils/fileio.py | 10 ++++++---- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py index 7c01af4..02a31d2 100755 --- a/easyaccess/easyaccess.py +++ b/easyaccess/easyaccess.py @@ -1897,6 +1897,11 @@ def do_load_table(self, line, name=''): if filename is None: return base, ext = os.path.splitext(os.path.basename(filename)) + if ext == '.h5' and chunk is not None: + print(colored("\nHDF5 file upload with chunksize is not supported yet. Try without " + "--chunksize\n","red")) + return + if name == '': table = base else: @@ -1909,7 +1914,7 @@ def do_load_table(self, line, name=''): return try: - data = self.load_data(filename) + data, iterator = self.load_data(filename) except: print_exception() return @@ -1922,7 +1927,10 @@ def do_load_table(self, line, name=''): if data.file_type == 'pandas': while not done: try: - df = data.get_chunk(chunk) + if iterator: + df = data.get_chunk(chunk) + else: + df = data df.file_type = 'pandas' if len(df) == 0: break if iteration == 0: @@ -1943,6 +1951,7 @@ def do_load_table(self, line, name=''): if not done: self.insert_data(table, columns, values, dtypes, iteration) iteration += 1 + if not iterator: done =True except: print_exception() self.drop_table(table) @@ -2042,6 +2051,12 @@ def do_append_table(self, line, name=''): if filename is None: return base, ext = os.path.splitext(os.path.basename(filename)) + if ext == '.h5' and chunk is not None: + print(colored("\nHDF5 file upload with chunksize is not supported yet. Try without " + "--chunksize\n","red")) + return + + if name == '': table = base else: @@ -2053,7 +2068,7 @@ def do_append_table(self, line, name=''): '\n DESDB ~> CREATE TABLE %s (COL1 TYPE1(SIZE), ..., COLN TYPEN(SIZE));\n' % table.upper()) return try: - data = self.load_data(filename) + data, iterator = self.load_data(filename) except: print_exception() return @@ -2065,7 +2080,10 @@ def do_append_table(self, line, name=''): if data.file_type == 'pandas': while not done: try: - df = data.get_chunk(chunk) + if iterator: + df = data.get_chunk(chunk) + else: + df = data df.file_type = 'pandas' if len(df) == 0: break if iteration == 0: @@ -2079,6 +2097,7 @@ def do_append_table(self, line, name=''): if not done: self.insert_data(table, columns, values, dtypes, iteration) iteration += 1 + if not iterator: done = True except: print_exception() return diff --git a/easyaccess/eautils/fileio.py b/easyaccess/eautils/fileio.py index b093971..2747ded 100644 --- a/easyaccess/eautils/fileio.py +++ b/easyaccess/eautils/fileio.py @@ -311,8 +311,10 @@ def read_pandas(filename): if ext == '.csv': sepa = ',' if ext == '.tab': sepa = None df = pd.read_csv(filename, sep=sepa, iterator = True) - #elif ext in ('.h5'): - # df = pd.read_hdf(filename, key='data') # iterator for hdf in padnas 0.18 + iterator = True + elif ext in ('.h5'): + df = pd.read_hdf(filename, key='data') # iterator for hdf in padnas 0.18 + iterator = False except: msg = 'Problem reading %s\n' % filename raise IOError(msg) @@ -328,7 +330,7 @@ def read_pandas(filename): #df.ea_get_dtypes = lambda: dtypes df.file_type = 'pandas' - return df + return df, iterator def read_fitsio(filename): @@ -366,7 +368,7 @@ def read_fitsio(filename): ### fits.ea_get_values = lambda: fits[1].read(columns=fits.ea_get_columns()).tolist() ### fits.ea_get_dtypes = lambda: dtypes[x1:x2] - return fits + return fits, True if __name__ == "__main__": From 66f91c615f50320aae1fd344b2829d93e7e7045d Mon Sep 17 00:00:00 2001 From: Matias Carrasco Kind Date: Wed, 27 Apr 2016 17:15:20 -0500 Subject: [PATCH 7/7] fix documentation --- easyaccess/easyaccess.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py index 02a31d2..ac622dd 100755 --- a/easyaccess/easyaccess.py +++ b/easyaccess/easyaccess.py @@ -1857,7 +1857,7 @@ def do_load_table(self, line, name=''): """ DB:Loads a table from a file (csv or fits) taking name from filename and columns from header - Usage: load_table [--name NAME] + Usage: load_table [--tablename NAME] [--chunksize CHUNK] Ex: example.csv has the following content RA,DEC,MAG 1.23,0.13,23 @@ -2010,7 +2010,7 @@ def do_append_table(self, line, name=''): """ DB:Appends a table from a file (csv or fits) taking name from filename and columns from header. - Usage: append_table [--name NAME] + Usage: append_table [--tablename NAME] [--chunksize CHUNK] Ex: example.csv has the following content RA,DEC,MAG 1.23,0.13,23