From c2d22797cc1238649540bc49d2b5ed095755562f Mon Sep 17 00:00:00 2001
From: Matias Carrasco Kind <mgckind@gmail.com>
Date: Tue, 26 Apr 2016 09:57:39 -0500
Subject: [PATCH 1/7] adding parser to load_table

---
 easyaccess/easyaccess.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py
index 68be11a..f88ec32 100755
--- a/easyaccess/easyaccess.py
+++ b/easyaccess/easyaccess.py
@@ -56,6 +56,10 @@ def colored(line, color): return line
 import webbrowser
 import signal
 
+class KeyParser(argparse.ArgumentParser):
+    def error(self, message):
+        sys.exit(2)
+
 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s',
                     datefmt='%m/%d/%Y %I:%M:%S %p')
 
@@ -1866,7 +1870,23 @@ def do_load_table(self, line, name=''):
               - For fits file header must have columns names and data types
               - For filenames use <table_name>.csv or <table_name>.fits do not use extra points
         """
-        filename = self.get_filename(line)
+        line = line.replace(';','')
+        load_parser = KeyParser(prog='', usage='', add_help=False)
+        load_parser.add_argument('filename', help='name for the file', action='store', default=None)
+        load_parser.add_argument('--name', help='name for the table', action='store', default='')
+        load_parser.add_argument('-h', '--help', help='print help', action='store_true')
+        try:
+            load_args = load_parser.parse_args(line.split())
+        except:
+            self.do_help('load_table')
+            return
+        if load_args.help:
+            self.do_help('load_table')
+            return
+        filename = self.get_filename(load_args.filename)
+        name = load_args.name
+        print(filename)
+        print(name)
         if filename is None: return
         base, ext = os.path.splitext(os.path.basename(filename))
 

From 656076f3cc4b63a75d884fb5fbbf21c7c3e45f78 Mon Sep 17 00:00:00 2001
From: Matias Carrasco Kind <mgckind@gmail.com>
Date: Tue, 26 Apr 2016 15:31:15 -0500
Subject: [PATCH 2/7] inline parsing arguments for load_table

---
 easyaccess/easyaccess.py     | 4 +---
 easyaccess/eautils/fileio.py | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py
index f88ec32..8fafcb7 100755
--- a/easyaccess/easyaccess.py
+++ b/easyaccess/easyaccess.py
@@ -1877,7 +1877,7 @@ def do_load_table(self, line, name=''):
         load_parser.add_argument('-h', '--help', help='print help', action='store_true')
         try:
             load_args = load_parser.parse_args(line.split())
-        except:
+        except SystemExit:
             self.do_help('load_table')
             return
         if load_args.help:
@@ -1885,8 +1885,6 @@ def do_load_table(self, line, name=''):
             return
         filename = self.get_filename(load_args.filename)
         name = load_args.name
-        print(filename)
-        print(name)
         if filename is None: return
         base, ext = os.path.splitext(os.path.basename(filename))
 
diff --git a/easyaccess/eautils/fileio.py b/easyaccess/eautils/fileio.py
index 6174e78..57ebb1f 100644
--- a/easyaccess/eautils/fileio.py
+++ b/easyaccess/eautils/fileio.py
@@ -13,7 +13,7 @@
 import numpy as np
 import version
 import fitsio
-
+import pandas as pd
 
 try:
     import easyaccess.eautils.dtypes as eatypes

From 6389aa97315a9cb6c3cabc2a390ccd893d6a2da3 Mon Sep 17 00:00:00 2001
From: Matias Carrasco Kind <mgckind@gmail.com>
Date: Wed, 27 Apr 2016 10:54:42 -0500
Subject: [PATCH 3/7] added --tablename options for inside and outside the
 interpreter

---
 easyaccess/easyaccess.py | 47 +++++++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py
index 8fafcb7..4756f8d 100755
--- a/easyaccess/easyaccess.py
+++ b/easyaccess/easyaccess.py
@@ -1857,7 +1857,7 @@ def do_load_table(self, line, name=''):
         """
         DB:Loads a table from a file (csv or fits) taking name from filename and columns from header
 
-        Usage: load_table <filename>
+        Usage: load_table <filename> [--name NAME]
         Ex: example.csv has the following content
              RA,DEC,MAG
              1.23,0.13,23
@@ -1865,6 +1865,10 @@ def do_load_table(self, line, name=''):
 
         This command will create a table named EXAMPLE with 3 columns RA,DEC and MAG and values taken from file
 
+        Optional Arguments:
+
+            --tablename NAME            given name for the table, default is taken from filename
+
         Note: - For csv or tab files, first line must have the column names (without # or any other comment) and same format
         as data (using ',' or space)
               - For fits file header must have columns names and data types
@@ -1873,7 +1877,7 @@ def do_load_table(self, line, name=''):
         line = line.replace(';','')
         load_parser = KeyParser(prog='', usage='', add_help=False)
         load_parser.add_argument('filename', help='name for the file', action='store', default=None)
-        load_parser.add_argument('--name', help='name for the table', action='store', default='')
+        load_parser.add_argument('--tablename', help='name for the table', action='store', default='')
         load_parser.add_argument('-h', '--help', help='print help', action='store_true')
         try:
             load_args = load_parser.parse_args(line.split())
@@ -1884,7 +1888,7 @@ def do_load_table(self, line, name=''):
             self.do_help('load_table')
             return
         filename = self.get_filename(load_args.filename)
-        name = load_args.name
+        name = load_args.tablename
         if filename is None: return
         base, ext = os.path.splitext(os.path.basename(filename))
 
@@ -1945,21 +1949,39 @@ def do_append_table(self, line, name=''):
         """
         DB:Appends a table from a file (csv or fits) taking name from filename and columns from header.
 
-        Usage: append_table <filename>
+        Usage: append_table <filename> [--name NAME]
         Ex: example.csv has the following content
              RA,DEC,MAG
              1.23,0.13,23
              0.13,0.01,22
 
         This command will append the contents of example.csv to the table named EXAMPLE.
+        It is meant to use after load_table command
+
+         Optional Arguments:
+    
+              --tablename NAME            given name for the table, default is taken from filename
 
         Note: - For csv or tab files, first line must have the column names (without # or any other comment) and same format
         as data (using ',' or space)
               - For fits file header must have columns names and data types
               - For filenames use <table_name>.csv or <table_name>.fits do not use extra points
         """
-
-        filename = self.get_filename(line)
+        line = line.replace(';','')
+        append_parser = KeyParser(prog='', usage='', add_help=False)
+        append_parser.add_argument('filename', help='name for the file', action='store', default=None)
+        append_parser.add_argument('--tablename', help='name for the table to append to', action='store', default='')
+        append_parser.add_argument('-h', '--help', help='print help', action='store_true')
+        try:
+            append_args = append_parser.parse_args(line.split())
+        except SystemExit:
+            self.do_help('append_table')
+            return
+        if append_args.help:
+            self.do_help('append_table')
+            return
+        filename = self.get_filename(append_args.filename)
+        name = append_args.tablename
         if filename is None: return
         base, ext = os.path.splitext(os.path.basename(filename))
 
@@ -2272,10 +2294,15 @@ def initial_message(quiet=False, clear=True):
                         help="Loads a sql command, execute it and exit")
     parser.add_argument("-lt", "--load_table", dest='loadtable', 
                         help="Loads data from a csv, tab, or fits formatted file \
-                        into a DB table using the filename as the table name")
+                        into a DB table using the filename as the table name or a custom \
+                        name with --tablename MYTABLE")
     parser.add_argument("-at", "--append_table", dest='appendtable', 
                         help="Appends data from a csv, tab, or fits formatted file \
-                        into a DB table using the filename as the table name")
+                        into a DB table using the filename as the table name or a custom \
+                        name with --tablename MYABLE")
+    parser.add_argument("--tablename", dest='tablename', 
+                        help="Custom table name to be used with --load_table\
+                        or --append_table")
     parser.add_argument("-s", "--db",dest='db', #choices=[...]?
                         help="Override database name [dessci,desoper,destest]")
     parser.add_argument("-q", "--quiet", action="store_true", dest='quiet', 
@@ -2395,12 +2422,16 @@ def colored(line, color): return line
         initial_message(args.quiet, clear=False)
         cmdinterp = easy_or(conf, desconf, db, interactive=False, quiet=args.quiet)
         linein = "load_table " + args.loadtable
+        if args.tablename is not None:
+            linein += ' --tablename ' + args.tablename
         cmdinterp.onecmd(linein)
         os._exit(0)
     elif args.appendtable is not None:
         initial_message(args.quiet, clear=False)
         cmdinterp = easy_or(conf, desconf, db, interactive=False, quiet=args.quiet)
         linein = "append_table " + args.appendtable
+        if args.tablename is not None:
+            linein += ' --tablename ' + args.tablename
         cmdinterp.onecmd(linein)
         os._exit(0)
     else:

From 42f3836ad3fd95d9188887ac206abbbaf7aa8f3a Mon Sep 17 00:00:00 2001
From: Matias Carrasco Kind <mgckind@gmail.com>
Date: Wed, 27 Apr 2016 14:11:37 -0500
Subject: [PATCH 4/7] added --chunksize option

---
 easyaccess/easyaccess.py     | 87 ++++++++++++++++++++++++++++--------
 easyaccess/eautils/fileio.py | 44 ++++++++++++------
 2 files changed, 99 insertions(+), 32 deletions(-)

diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py
index 4756f8d..c78e802 100755
--- a/easyaccess/easyaccess.py
+++ b/easyaccess/easyaccess.py
@@ -1878,6 +1878,8 @@ def do_load_table(self, line, name=''):
         load_parser = KeyParser(prog='', usage='', add_help=False)
         load_parser.add_argument('filename', help='name for the file', action='store', default=None)
         load_parser.add_argument('--tablename', help='name for the table', action='store', default='')
+        load_parser.add_argument('--chunksize', help='number of rows to read in blocks to avoid memory '
+    'issues', action='store', type=int, default=None)
         load_parser.add_argument('-h', '--help', help='print help', action='store_true')
         try:
             load_args = load_parser.parse_args(line.split())
@@ -1889,6 +1891,7 @@ def do_load_table(self, line, name=''):
             return
         filename = self.get_filename(load_args.filename)
         name = load_args.tablename
+        chunk = load_args.chunksize
         if filename is None: return
         base, ext = os.path.splitext(os.path.basename(filename))
 
@@ -1910,28 +1913,76 @@ def do_load_table(self, line, name=''):
             return
 
         # Get the data in a way that Oracle understands
-        columns = data.ea_get_columns()
-        values  = data.ea_get_values()
-        dtypes  = data.ea_get_dtypes()
 
-        # Clean up the original object
-        del data
+        iteration = 0
+        done = False
+        total_rows = 0
+        if data.file_type == 'pandas':
+            while not done:
+                try:
+                    df = data.get_chunk(chunk)
+                    if len(df) == 0: break
+                    if iteration == 0:
+                        dtypes = eafile.get_dtypes(df)
+                        columns = df.columns.values.tolist()
+                    values = df.values.tolist()
+                    total_rows += len(df)
+                    del df
+                except:
+                    break
+                if iteration == 0:
+                    try:
+                        self.create_table(table, columns, dtypes)
+                    except:
+                        print_exception()
+                        self.drop_table(table)
+                        return
+
+                try:
+                    if not done:
+                        self.insert_data(table, columns, values, dtypes)
+                        iteration += 1
+                except:
+                    print_exception()
+                    self.drop_table(table)
+                    return
+
+        if data.file_type == 'fits':
+            if chunk is None: chunk = data[1].get_nrows()
+            start = 0
+            while not done:
+                try:
+                    df = data
+                    if iteration == 0:
+                        dtypes = eafile.get_dtypes(df)
+                        columns = df[1].get_colnames()
+                    values = df[1][start:start+chunk].tolist()
+                    start += chunk
+                    if len(values) == 0 : break
+                    total_rows += len(values)
+                except:
+                    break
+                if iteration == 0:
+                    try:
+                        self.create_table(table, columns, dtypes)
+                    except:
+                        print_exception()
+                        self.drop_table(table)
+                        return
+
+                try:
+                    if not done:
+                        self.insert_data(table, columns, values, dtypes)
+                        iteration += 1
+                except:
+                    print_exception()
+                    self.drop_table(table)
+                    return
 
-        try:
-            self.create_table(table, columns, dtypes)
-        except:
-            print_exception()
-            self.drop_table(table)
-            return
 
-        try:
-            self.insert_data(table, columns, values, dtypes)
-        except:
-            print_exception()
-            self.drop_table(table)
-            return
 
-        print(colored('\n Table %s loaded successfully.\n' % table.upper(), "green"))
+        print(colored('\n ** Table %s loaded successfully with %d rows.\n' % (table.upper(), total_rows),
+                      "green"))
         print(colored(' You may want to refresh the metadata so your new table appears during\n autocompletion',"cyan"))
         print(colored(' DESDB ~> refresh_metadata_cache;',"cyan"))
 
diff --git a/easyaccess/eautils/fileio.py b/easyaccess/eautils/fileio.py
index 38436ec..f2da956 100644
--- a/easyaccess/eautils/fileio.py
+++ b/easyaccess/eautils/fileio.py
@@ -274,6 +274,20 @@ def read_file(filename):
     return data
 
 
+def get_dtypes(df):
+    """
+    Get dtypes from pandas DataFrame or fitsio.FITS
+    """
+    if df.file_type == 'pandas':
+        dtypes = [df[c].dtype if df[c].dtype.kind != 'O'
+                  else np.dtype('S' + str(max(df[c].str.len())))
+                  for i, c in enumerate(df)]
+    if df.file_type == 'fits':
+        dtype = df[1].get_rec_dtype(vstorage='fixed')[0]
+        dtypes = [dtype[i] for i, d in enumerate(dtype.descr)]
+    return dtypes
+
+
 def read_pandas(filename):
     """
     Read an input file into a pandas DataFrame.  Accepted file
@@ -295,22 +309,23 @@ def read_pandas(filename):
         if ext in ('.csv', '.tab'):
             if ext == '.csv': sepa = ','
             if ext == '.tab': sepa = None
-            df = pd.read_csv(filename, sep=sepa)
-        elif ext in ('.h5'):
-            df = pd.read_hdf(filename, key='data')
+            df = pd.read_csv(filename, sep=sepa, iterator = True)
+        #elif ext in ('.h5'):
+        #    df = pd.read_hdf(filename, key='data')  # iterator for hdf in padnas 0.18
     except:
         msg = 'Problem reading %s\n' % filename
         raise IOError(msg)
 
     # Monkey patch to grab columns and values
     # List comprehension is faster but less readable
-    dtypes = [df[c].dtype if df[c].dtype.kind != 'O'
-              else np.dtype('S' + str(max(df[c].str.len())))
-              for i, c in enumerate(df)]
+    #dtypes = [df[c].dtype if df[c].dtype.kind != 'O'
+    #          else np.dtype('S' + str(max(df[c].str.len())))
+    #          for i, c in enumerate(df)]
 
-    df.ea_get_columns = df.columns.values.tolist
-    df.ea_get_values = df.values.tolist
-    df.ea_get_dtypes = lambda: dtypes
+    #df.ea_get_columns = df.columns.values.tolist
+    #df.ea_get_values = df.values.tolist
+    #df.ea_get_dtypes = lambda: dtypes
+    df.file_type = 'pandas'
 
     return df
 
@@ -334,12 +349,13 @@ def read_fitsio(filename):
         msg = 'Problem reading %s\n' % filename
         raise IOError(msg)
     # Monkey patch to grab columns and values
-    dtype = fits[1].get_rec_dtype(vstorage='fixed')[0]
-    dtypes = [dtype[i] for i, d in enumerate(dtype.descr)]
+    #dtype = fits[1].get_rec_dtype(vstorage='fixed')[0]
+    #dtypes = [dtype[i] for i, d in enumerate(dtype.descr)]
 
-    fits.ea_get_columns = fits[1].get_colnames
-    fits.ea_get_values = fits[1].read().tolist
-    fits.ea_get_dtypes = lambda: dtypes
+    #fits.ea_get_columns = fits[1].get_colnames
+    #fits.ea_get_values = fits[1].read().tolist
+    #fits.ea_get_dtypes = lambda: dtypes
+    fits.file_type = 'fits'
 
     # ## # Hack to just get a subset of columns
     ### x1,x2 = 25,37

From 4d505ed8f86cc22ff0faf0e64c7610e78b09c107 Mon Sep 17 00:00:00 2001
From: Matias Carrasco Kind <mgckind@gmail.com>
Date: Wed, 27 Apr 2016 16:23:51 -0500
Subject: [PATCH 5/7] Added load and append tables by chunks

---
 easyaccess/easyaccess.py     | 87 ++++++++++++++++++++++++++++--------
 easyaccess/eautils/fileio.py |  1 +
 2 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py
index c78e802..7c01af4 100755
--- a/easyaccess/easyaccess.py
+++ b/easyaccess/easyaccess.py
@@ -1803,7 +1803,7 @@ def create_table(self, table, columns, dtypes):
         self.cur.execute(qtable)
         if self.autocommit: self.con.commit()
 
-    def insert_data(self, table, columns, values, dtypes=None):
+    def insert_data(self, table, columns, values, dtypes=None, niter = 0):
         """Insert data into a DB table. 
 
         Trim trailing whitespace from string columns. Because of the
@@ -1849,8 +1849,8 @@ def insert_data(self, table, columns, values, dtypes=None):
             raise cx_Oracle.DatabaseError(msg)
                 
         print(colored(
-            '\n Inserted %d rows and %d columns into table %s in %.2f seconds' % (
-                len(values), len(columns), table.upper(), t2 - t1), "green"))
+            '\n [Iter: %d] Inserted %d rows and %d columns into table %s in %.2f seconds' % (
+                niter+1, len(values), len(columns), table.upper(), t2 - t1), "green"))
 
 
     def do_load_table(self, line, name=''):
@@ -1868,6 +1868,8 @@ def do_load_table(self, line, name=''):
         Optional Arguments:
 
             --tablename NAME            given name for the table, default is taken from filename
+            --chunksize CHUNK           Number of rows to be inserted at a time. Useful for large files
+                                        that do not fit in memory
 
         Note: - For csv or tab files, first line must have the column names (without # or any other comment) and same format
         as data (using ',' or space)
@@ -1921,13 +1923,13 @@ def do_load_table(self, line, name=''):
             while not done:
                 try:
                     df = data.get_chunk(chunk)
+                    df.file_type = 'pandas'
                     if len(df) == 0: break
                     if iteration == 0:
                         dtypes = eafile.get_dtypes(df)
-                        columns = df.columns.values.tolist()
+                    columns = df.columns.values.tolist()
                     values = df.values.tolist()
                     total_rows += len(df)
-                    del df
                 except:
                     break
                 if iteration == 0:
@@ -1937,10 +1939,9 @@ def do_load_table(self, line, name=''):
                         print_exception()
                         self.drop_table(table)
                         return
-
                 try:
                     if not done:
-                        self.insert_data(table, columns, values, dtypes)
+                        self.insert_data(table, columns, values, dtypes, iteration)
                         iteration += 1
                 except:
                     print_exception()
@@ -1972,7 +1973,7 @@ def do_load_table(self, line, name=''):
 
                 try:
                     if not done:
-                        self.insert_data(table, columns, values, dtypes)
+                        self.insert_data(table, columns, values, dtypes, iteration)
                         iteration += 1
                 except:
                     print_exception()
@@ -2012,6 +2013,8 @@ def do_append_table(self, line, name=''):
          Optional Arguments:
     
               --tablename NAME            given name for the table, default is taken from filename
+              --chunksize CHUNK           Number of rows to be inserted at a time. Useful for large files
+                                        that do not fit in memory
 
         Note: - For csv or tab files, first line must have the column names (without # or any other comment) and same format
         as data (using ',' or space)
@@ -2022,6 +2025,8 @@ def do_append_table(self, line, name=''):
         append_parser = KeyParser(prog='', usage='', add_help=False)
         append_parser.add_argument('filename', help='name for the file', action='store', default=None)
         append_parser.add_argument('--tablename', help='name for the table to append to', action='store', default='')
+        append_parser.add_argument('--chunksize', help='number of rows to read in blocks to avoid memory '
+                                                       'issues', action='store', default=None, type=int)
         append_parser.add_argument('-h', '--help', help='print help', action='store_true')
         try:
             append_args = append_parser.parse_args(line.split())
@@ -2033,6 +2038,7 @@ def do_append_table(self, line, name=''):
             return
         filename = self.get_filename(append_args.filename)
         name = append_args.tablename
+        chunk = append_args.chunksize
         if filename is None: return
         base, ext = os.path.splitext(os.path.basename(filename))
 
@@ -2052,18 +2058,56 @@ def do_append_table(self, line, name=''):
             print_exception()
             return
 
-        columns = data.ea_get_columns()
-        values  = data.ea_get_values()
-        dtypes  = data.ea_get_dtypes()
-        del data
 
-        try:
-            self.insert_data(table, columns, values, dtypes)
-        except:
-            print_exception()
-            return
+        iteration = 0
+        done = False
+        total_rows = 0
+        if data.file_type == 'pandas':
+            while not done:
+                try:
+                    df = data.get_chunk(chunk)
+                    df.file_type = 'pandas'
+                    if len(df) == 0: break
+                    if iteration == 0:
+                        dtypes = eafile.get_dtypes(df)
+                    columns = df.columns.values.tolist()
+                    values = df.values.tolist()
+                    total_rows += len(df)
+                except:
+                    break
+                try:
+                    if not done:
+                        self.insert_data(table, columns, values, dtypes, iteration)
+                        iteration += 1
+                except:
+                    print_exception()
+                    return
+
+        if data.file_type == 'fits':
+            if chunk is None: chunk = data[1].get_nrows()
+            start = 0
+            while not done:
+                try:
+                    df = data
+                    if iteration == 0:
+                        dtypes = eafile.get_dtypes(df)
+                        columns = df[1].get_colnames()
+                    values = df[1][start:start+chunk].tolist()
+                    start += chunk
+                    if len(values) == 0 : break
+                    total_rows += len(values)
+                except:
+                    break
+                try:
+                    if not done:
+                        self.insert_data(table, columns, values, dtypes, iteration)
+                        iteration += 1
+                except:
+                    print_exception()
+                    return
 
-        print(colored('\n Table %s appended successfully.' % table.upper(), "green"))
+        print(colored('\n ** Table %s appended successfully with %d rows.' % (table.upper(), total_rows),
+                      "green"))
 
 
     def complete_append_table(self, text, line, start_idx, end_idx):
@@ -2354,6 +2398,9 @@ def initial_message(quiet=False, clear=True):
     parser.add_argument("--tablename", dest='tablename', 
                         help="Custom table name to be used with --load_table\
                         or --append_table")
+    parser.add_argument("--chunksize", dest='chunksize', type=int, default = None,
+                        help="Number of rows to be inserted at a time. Useful for large files \
+                                        that do not fit in memory. Use with --load_table")
     parser.add_argument("-s", "--db",dest='db', #choices=[...]?
                         help="Override database name [dessci,desoper,destest]")
     parser.add_argument("-q", "--quiet", action="store_true", dest='quiet', 
@@ -2475,6 +2522,8 @@ def colored(line, color): return line
         linein = "load_table " + args.loadtable
         if args.tablename is not None:
             linein += ' --tablename ' + args.tablename
+        if args.chunksize is not None:
+            linein += ' --chunksize ' + str(args.chunksize)
         cmdinterp.onecmd(linein)
         os._exit(0)
     elif args.appendtable is not None:
@@ -2483,6 +2532,8 @@ def colored(line, color): return line
         linein = "append_table " + args.appendtable
         if args.tablename is not None:
             linein += ' --tablename ' + args.tablename
+        if args.chunksize is not None:
+            linein += ' --chunksize ' + str(args.chunksize)
         cmdinterp.onecmd(linein)
         os._exit(0)
     else:
diff --git a/easyaccess/eautils/fileio.py b/easyaccess/eautils/fileio.py
index f2da956..b093971 100644
--- a/easyaccess/eautils/fileio.py
+++ b/easyaccess/eautils/fileio.py
@@ -282,6 +282,7 @@ def get_dtypes(df):
         dtypes = [df[c].dtype if df[c].dtype.kind != 'O'
                   else np.dtype('S' + str(max(df[c].str.len())))
                   for i, c in enumerate(df)]
+
     if df.file_type == 'fits':
         dtype = df[1].get_rec_dtype(vstorage='fixed')[0]
         dtypes = [dtype[i] for i, d in enumerate(dtype.descr)]

From 163532cbde2473ad8133ac552ffc6a7d893c31d8 Mon Sep 17 00:00:00 2001
From: Matias Carrasco Kind <mgckind@gmail.com>
Date: Wed, 27 Apr 2016 17:08:39 -0500
Subject: [PATCH 6/7] wrapping up this fix or loading big files

---
 easyaccess/easyaccess.py     | 27 +++++++++++++++++++++++----
 easyaccess/eautils/fileio.py | 10 ++++++----
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py
index 7c01af4..02a31d2 100755
--- a/easyaccess/easyaccess.py
+++ b/easyaccess/easyaccess.py
@@ -1897,6 +1897,11 @@ def do_load_table(self, line, name=''):
         if filename is None: return
         base, ext = os.path.splitext(os.path.basename(filename))
 
+        if ext == '.h5' and chunk is not None:
+            print(colored("\nHDF5 file upload with chunksize is not supported yet. Try without "
+                          "--chunksize\n","red"))
+            return
+
         if name == '':
             table = base
         else:
@@ -1909,7 +1914,7 @@ def do_load_table(self, line, name=''):
             return
 
         try:
-            data = self.load_data(filename)
+            data, iterator = self.load_data(filename)
         except:
             print_exception()
             return
@@ -1922,7 +1927,10 @@ def do_load_table(self, line, name=''):
         if data.file_type == 'pandas':
             while not done:
                 try:
-                    df = data.get_chunk(chunk)
+                    if iterator:
+                        df = data.get_chunk(chunk)
+                    else:
+                        df = data
                     df.file_type = 'pandas'
                     if len(df) == 0: break
                     if iteration == 0:
@@ -1943,6 +1951,7 @@ def do_load_table(self, line, name=''):
                     if not done:
                         self.insert_data(table, columns, values, dtypes, iteration)
                         iteration += 1
+                        if not iterator: done =True
                 except:
                     print_exception()
                     self.drop_table(table)
@@ -2042,6 +2051,12 @@ def do_append_table(self, line, name=''):
         if filename is None: return
         base, ext = os.path.splitext(os.path.basename(filename))
 
+        if ext == '.h5' and chunk is not None:
+            print(colored("\nHDF5 file upload with chunksize is not supported yet. Try without "
+                          "--chunksize\n","red"))
+            return
+
+
         if name == '':
             table = base
         else:
@@ -2053,7 +2068,7 @@ def do_append_table(self, line, name=''):
                   '\n DESDB ~> CREATE TABLE %s (COL1 TYPE1(SIZE), ..., COLN TYPEN(SIZE));\n' % table.upper())
             return
         try:
-            data = self.load_data(filename)
+            data, iterator = self.load_data(filename)
         except:
             print_exception()
             return
@@ -2065,7 +2080,10 @@ def do_append_table(self, line, name=''):
         if data.file_type == 'pandas':
             while not done:
                 try:
-                    df = data.get_chunk(chunk)
+                    if iterator:
+                        df = data.get_chunk(chunk)
+                    else:
+                        df = data
                     df.file_type = 'pandas'
                     if len(df) == 0: break
                     if iteration == 0:
@@ -2079,6 +2097,7 @@ def do_append_table(self, line, name=''):
                     if not done:
                         self.insert_data(table, columns, values, dtypes, iteration)
                         iteration += 1
+                        if not iterator: done = True
                 except:
                     print_exception()
                     return
diff --git a/easyaccess/eautils/fileio.py b/easyaccess/eautils/fileio.py
index b093971..2747ded 100644
--- a/easyaccess/eautils/fileio.py
+++ b/easyaccess/eautils/fileio.py
@@ -311,8 +311,10 @@ def read_pandas(filename):
             if ext == '.csv': sepa = ','
             if ext == '.tab': sepa = None
             df = pd.read_csv(filename, sep=sepa, iterator = True)
-        #elif ext in ('.h5'):
-        #    df = pd.read_hdf(filename, key='data')  # iterator for hdf in padnas 0.18
+            iterator = True
+        elif ext in ('.h5'):
+            df = pd.read_hdf(filename, key='data')  # iterator for hdf in padnas 0.18
+            iterator = False
     except:
         msg = 'Problem reading %s\n' % filename
         raise IOError(msg)
@@ -328,7 +330,7 @@ def read_pandas(filename):
     #df.ea_get_dtypes = lambda: dtypes
     df.file_type = 'pandas'
 
-    return df
+    return df, iterator
 
 
 def read_fitsio(filename):
@@ -366,7 +368,7 @@ def read_fitsio(filename):
     ### fits.ea_get_values = lambda: fits[1].read(columns=fits.ea_get_columns()).tolist()
     ### fits.ea_get_dtypes = lambda: dtypes[x1:x2]
 
-    return fits
+    return fits, True
 
 
 if __name__ == "__main__":

From 66f91c615f50320aae1fd344b2829d93e7e7045d Mon Sep 17 00:00:00 2001
From: Matias Carrasco Kind <mgckind@gmail.com>
Date: Wed, 27 Apr 2016 17:15:20 -0500
Subject: [PATCH 7/7] fix documentation

---
 easyaccess/easyaccess.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py
index 02a31d2..ac622dd 100755
--- a/easyaccess/easyaccess.py
+++ b/easyaccess/easyaccess.py
@@ -1857,7 +1857,7 @@ def do_load_table(self, line, name=''):
         """
         DB:Loads a table from a file (csv or fits) taking name from filename and columns from header
 
-        Usage: load_table <filename> [--name NAME]
+        Usage: load_table <filename> [--tablename NAME] [--chunksize CHUNK]
         Ex: example.csv has the following content
              RA,DEC,MAG
              1.23,0.13,23
@@ -2010,7 +2010,7 @@ def do_append_table(self, line, name=''):
         """
         DB:Appends a table from a file (csv or fits) taking name from filename and columns from header.
 
-        Usage: append_table <filename> [--name NAME]
+        Usage: append_table <filename> [--tablename NAME] [--chunksize CHUNK]
         Ex: example.csv has the following content
              RA,DEC,MAG
              1.23,0.13,23