From 1259dca1174cac7311d1f1f873161b12a8a53c3c Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Thu, 11 Jul 2013 10:01:37 -0400 Subject: [PATCH 01/16] ENH #4163 Use SQLAlchemy for DB abstraction TST Import sqlalchemy on Travis. DOC add docstrings to read sql ENH read_sql connects via Connection, Engine, file path, or :memory: string CLN Separate legacy code into new file, and fallback so that all old tests pass. TST to use sqlachemy syntax in tests CLN sql into classes, legacy passes FIX few engine vs con calls CLN pep8 cleanup add postgres support for pandas.io.sql.get_schema WIP: cleaup of sql io module - imported correct SQLALCHEMY type, delete redundant PandasSQLWithCon TODO: renamed _engine_read_table, need to think of a better name. TODO: clean up get_conneciton function ENH: cleanup of SQL io TODO: check that legacy mode works TODO: run tests correctly enabled coerce_float option Cleanup and bug-fixing mainly on legacy mode sql. IMPORTANT - changed legacy to require connection rather than cursor. This is still not yet finalized. TODO: tests and doc Added Test coverage for basic functionality using in-memory SQLite database Simplified API by automatically distinguishing between engine and connection. Added warnings --- pandas/io/sql.py | 802 +++++++++++++++++++--------- pandas/io/sql_legacy.py | 332 ++++++++++++ pandas/io/tests/data/iris.csv | 151 ++++++ pandas/io/tests/test_sql.py | 824 +++++++++++++---------------- pandas/io/tests/test_sql_legacy.py | 497 +++++++++++++++++ 5 files changed, 1905 insertions(+), 701 deletions(-) create mode 100644 pandas/io/sql_legacy.py create mode 100644 pandas/io/tests/data/iris.csv create mode 100644 pandas/io/tests/test_sql_legacy.py diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e269d14f72712..345fc4dde7d9a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -4,20 +4,40 @@ """ from __future__ import print_function from datetime import datetime, date - -from pandas.compat import range, lzip, map, zip +import warnings +from pandas.compat import range, lzip, map, zip, raise_with_traceback import pandas.compat as compat import numpy as np -import traceback -from pandas.core.datetools import format as date_format + from pandas.core.api import DataFrame +from pandas.core.base import PandasObject + + +class SQLAlchemyRequired(ImportError): + pass + + +class LegacyMySQLConnection(Exception): + pass + + +class DatabaseError(IOError): + pass + #------------------------------------------------------------------------------ -# Helper execution function +# Helper execution functions + +def _convert_params(sql, params): + """convert sql and params args to DBAPI2.0 compliant format""" + args = [sql] + if params is not None: + args += list(params) + return args -def execute(sql, con, retry=True, cur=None, params=None): +def execute(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): """ Execute the given SQL query using the provided connection object. @@ -25,52 +45,25 @@ def execute(sql, con, retry=True, cur=None, params=None): ---------- sql: string Query to be executed - con: database connection instance - Database connection. Must implement PEP249 (Database API v2.0). - retry: bool - Not currently implemented - cur: database cursor, optional - Must implement PEP249 (Datbase API v2.0). If cursor is not provided, - one will be obtained from the database connection. + con: SQLAlchemy engine or DBAPI2 connection (legacy mode) + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object is given, a supported SQL flavor must also be provided + cur: depreciated, cursor is obtained from connection params: list or tuple, optional List of parameters to pass to execute method. - + flavor : string {sqlite, mysql} specifying the flavor of SQL to use. + Ignored when using SQLAlchemy engine. Required when using DBAPI2 connection. Returns ------- - Cursor object + Results Iterable """ - try: - if cur is None: - cur = con.cursor() - - if params is None: - cur.execute(sql) - else: - cur.execute(sql, params) - return cur - except Exception: - try: - con.rollback() - except Exception: # pragma: no cover - pass + pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + args = _convert_params(sql, params) + return pandas_sql.execute(*args) - print('Error on sql %s' % sql) - raise - -def _safe_fetch(cur): - try: - result = cur.fetchall() - if not isinstance(result, list): - result = list(result) - return result - except Exception as e: # pragma: no cover - excName = e.__class__.__name__ - if excName == 'OperationalError': - return [] - - -def tquery(sql, con=None, cur=None, retry=True): +def tquery(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): """ Returns list of tuples corresponding to each row in given sql query. @@ -81,62 +74,50 @@ def tquery(sql, con=None, cur=None, retry=True): ---------- sql: string SQL query to be executed - con: SQLConnection or DB API 2.0-compliant connection - cur: DB API 2.0 cursor - - Provide a specific connection or a specific cursor if you are executing a - lot of sequential statements and want to commit outside. + con: SQLAlchemy engine or DBAPI2 connection (legacy mode) + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object is given, a supported SQL flavor must also be provided + cur: depreciated, cursor is obtained from connection + params: list or tuple, optional + List of parameters to pass to execute method. + flavor : string {sqlite, mysql} specifying the flavor of SQL to use. + Ignored when using SQLAlchemy engine. Required when using DBAPI2 connection. """ - cur = execute(sql, con, cur=cur) - result = _safe_fetch(cur) - - if con is not None: - try: - cur.close() - con.commit() - except Exception as e: - excName = e.__class__.__name__ - if excName == 'OperationalError': # pragma: no cover - print('Failed to commit, may need to restart interpreter') - else: - raise - - traceback.print_exc() - if retry: - return tquery(sql, con=con, retry=False) - - if result and len(result[0]) == 1: - # python 3 compat - result = list(lzip(*result)[0]) - elif result is None: # pragma: no cover - result = [] - - return result + pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + args = _convert_params(sql, params) + return pandas_sql.tquery(*args) -def uquery(sql, con=None, cur=None, retry=True, params=None): +def uquery(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): """ Does the same thing as tquery, but instead of returning results, it returns the number of rows affected. Good for update queries. + + Parameters + ---------- + sql: string + SQL query to be executed + con: SQLAlchemy engine or DBAPI2 connection (legacy mode) + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object is given, a supported SQL flavor must also be provided + cur: depreciated, cursor is obtained from connection + params: list or tuple, optional + List of parameters to pass to execute method. + flavor : string {sqlite, mysql} specifying the flavor of SQL to use. + Ignored when using SQLAlchemy engine. Required when using DBAPI2 connection. """ - cur = execute(sql, con, cur=cur, retry=retry, params=params) + pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + args = _convert_params(sql, params) + return pandas_sql.uquery(*args) - result = cur.rowcount - try: - con.commit() - except Exception as e: - excName = e.__class__.__name__ - if excName != 'OperationalError': - raise - traceback.print_exc() - if retry: - print('Looks like your connection failed, reconnecting...') - return uquery(sql, con, retry=False) - return result +#------------------------------------------------------------------------------ +# Read and write to DataFrames -def read_frame(sql, con, index_col=None, coerce_float=True, params=None): +def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, params=[]): """ Returns a DataFrame corresponding to the result set of the query string. @@ -148,35 +129,30 @@ def read_frame(sql, con, index_col=None, coerce_float=True, params=None): ---------- sql: string SQL query to be executed - con: DB connection object, optional + con: SQLAlchemy engine or DBAPI2 connection (legacy mode) + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object is given, a supported SQL flavor must also be provided index_col: string, optional column name to use for the returned DataFrame object. + flavor : string specifying the flavor of SQL to use. Ignored when using + SQLAlchemy engine. Required when using DBAPI2 connection. coerce_float : boolean, default True Attempt to convert values to non-string, non-numeric objects (like decimal.Decimal) to floating point, useful for SQL result sets + cur: depreciated, cursor is obtained from connection params: list or tuple, optional List of parameters to pass to execute method. - """ - cur = execute(sql, con, params=params) - rows = _safe_fetch(cur) - columns = [col_desc[0] for col_desc in cur.description] - - cur.close() - con.commit() + flavor : string {sqlite, mysql} specifying the flavor of SQL to use. + Ignored when using SQLAlchemy engine. Required when using DBAPI2 connection. - result = DataFrame.from_records(rows, columns=columns, - coerce_float=coerce_float) - - if index_col is not None: - result = result.set_index(index_col) - - return result + """ -frame_query = read_frame -read_sql = read_frame + pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + return pandas_sql.read_sql(sql, index_col=index_col, params=params, coerce_float=coerce_float) -def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): +def to_sql(frame, name, con, flavor='sqlite', if_exists='fail'): """ Write records stored in a DataFrame to a SQL database. @@ -184,152 +160,484 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): ---------- frame: DataFrame name: name of SQL table - con: an open SQL database connection object - flavor: {'sqlite', 'mysql', 'oracle'}, default 'sqlite' + con: SQLAlchemy engine or DBAPI2 connection (legacy mode) + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object is given, a supported SQL flavor must also be provided + flavor: {'sqlite', 'mysql', 'postgres'}, default 'sqlite', ignored when using engine if_exists: {'fail', 'replace', 'append'}, default 'fail' fail: If table exists, do nothing. replace: If table exists, drop it, recreate it, and insert data. append: If table exists, insert data. Create if does not exist. """ + pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + pandas_sql.to_sql(frame, name, if_exists=if_exists) + + +# This is an awesome function +def read_table(table_name, con, meta=None, index_col=None, coerce_float=True): + """Given a table name and SQLAlchemy engine, return a DataFrame. + Type convertions will be done automatically + + Parameters + ---------- + table_name: name of SQL table in database + con: SQLAlchemy engine. Legacy mode not supported + meta: SQLAlchemy meta, optional. If omitted MetaData is reflected from engine + index_col: column to set as index, optional + coerce_float : boolean, default True + Attempt to convert values to non-string, non-numeric objects (like + decimal.Decimal) to floating point. Can result in loss of Precision. + + """ + pandas_sql = PandasSQLWithEngine(con, meta=meta) + table = pandas_sql.get_table(table_name) - if 'append' in kwargs: - import warnings - warnings.warn("append is deprecated, use if_exists instead", - FutureWarning) - if kwargs['append']: - if_exists = 'append' + if table is not None: + sql_select = table.select() + return pandas_sql.read_sql(sql_select, index_col=index_col, coerce_float=coerce_float) + else: + raise ValueError("Table %s not found with %s." % table_name, con) + + +def pandasSQL_builder(con, flavor=None, meta=None): + """ + Convenience function to return the correct PandasSQL subclass based on the + provided parameters + """ + try: + import sqlalchemy + + if isinstance(con, sqlalchemy.engine.Engine): + return PandasSQLWithEngine(con, meta=meta) else: - if_exists = 'fail' - exists = table_exists(name, con, flavor) - if if_exists == 'fail' and exists: - raise ValueError("Table '%s' already exists." % name) - - #create or drop-recreate if necessary - create = None - if exists and if_exists == 'replace': - create = "DROP TABLE %s" % name - elif not exists: - create = get_schema(frame, name, flavor) - - if create is not None: - cur = con.cursor() - cur.execute(create) + warnings.warn("Not a valid SQLAlchemy engine, attempting to use as legacy DBAPI connection") + if flavor is None: + raise ValueError("""PandasSQL must be created with an SQLAlchemy engine + or a DBAPI2 connection and SQL flavour""") + else: + return PandasSQLWithCon(con, flavor) + + except ImportError: + warnings.warn("SQLAlchemy not installed, using legacy mode") + if flavor is None: + raise SQLAlchemyRequired + else: + return PandasSQLWithCon(con, flavor) + + +class PandasSQL(PandasObject): + """ + Subclasses Should define read_sql and to_sql + """ + def read_sql(self, *args, **kwargs): + raise ValueError("PandasSQL must be created with an engine," + " connection or cursor.") + + def to_sql(self, *args, **kwargs): + raise ValueError("PandasSQL must be created with an engine," + " connection or cursor.") + + def _create_sql_schema(self, frame, name, keys): + raise ValueError("PandasSQL must be created with an engine," + " connection or cursor.") + + def _frame_from_data_and_columns(self, data, columns, index_col=None, coerce_float=True): + df = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) + if index_col is not None: + df.set_index(index_col, inplace=True) + return df + + def _safe_col_names(self, col_names): + return [s.replace(' ', '_').strip() for s in col_names] # may not be safe enough... + + +class PandasSQLWithEngine(PandasSQL): + """ + This class enables convertion between DataFrame and SQL databases + using SQLAlchemy to handle DataBase abstraction + """ + def __init__(self, engine, meta=None): + self.engine = engine + if not meta: + from sqlalchemy.schema import MetaData + meta = MetaData(self.engine) + meta.reflect(self.engine) + + self.meta = meta + + def execute(self, *args, **kwargs): + """Simple passthrough to SQLAlchemy engine""" + return self.engine.execute(*args, **kwargs) + + def tquery(self, *args, **kwargs): + """Accepts same args as execute""" + result = self.execute(*args, **kwargs) + return result.fetchall() + + def uquery(self, *args, **kwargs): + """Accepts same args as execute""" + result = self.execute(*args, **kwargs) + return result.rowcount + + def read_sql(self, sql, index_col=None, coerce_float=True, params=[]): + args = _convert_params(sql, params) + result = self.execute(*args) + data = result.fetchall() + columns = result.keys() + + return self._frame_from_data_and_columns(data, columns, + index_col=index_col, + coerce_float=coerce_float) + + def to_sql(self, frame, name, if_exists='fail'): + if self.engine.has_table(name): + if if_exists == 'fail': + raise ValueError("Table '%s' already exists." % name) + elif if_exists == 'replace': + #TODO: this triggers a full refresh of metadata, could probably avoid this. + self._drop_table(name) + self._create_table(frame, name) + elif if_exists == 'append': + pass # table exists and will automatically be appended to + else: + self._create_table(frame, name) + self._write(frame, name) + + def _write(self, frame, table_name): + table = self.get_table(table_name) + ins = table.insert() + # TODO: do this in one pass + # TODO this should be done globally first (or work out how to pass np + # dtypes to sql) + + def maybe_asscalar(i): + try: + return np.asscalar(i) + except AttributeError: + return i + + for t in frame.iterrows(): + self.engine.execute(ins, **dict((k, maybe_asscalar(v)) + for k, v in t[1].iteritems())) + # TODO more efficient, I'm *sure* this was just working with tuples + + def has_table(self, name): + return self.engine.has_table(name) + + def get_table(self, table_name): + if self.engine.has_table(table_name): + return self.meta.tables[table_name] + else: + return None + + def _drop_table(self, table_name): + if self.engine.has_table(table_name): + self.get_table(table_name).drop() + self.meta.clear() + self.meta.reflect() + #print(table.exists()) + + def _create_table(self, frame, table_name, keys=None): + table = self._create_sqlalchemy_table(frame, table_name, keys) + table.create() + + def _create_sql_schema(self, frame, table_name, keys=None): + table = self._create_sqlalchemy_table(frame, table_name, keys) + return str(table.compile()) + + def _create_sqlalchemy_table(self, frame, table_name, keys=None): + from sqlalchemy import Table, Column + if keys is None: + keys = [] + + safe_columns = self._safe_col_names(frame.dtypes.index) + column_types = map(self._lookup_type, frame.dtypes) + + columns = [(col_name, col_sqltype, col_name in keys) + for col_name, col_sqltype in zip(safe_columns, column_types)] + + columns = [Column(name, typ, primary_key=pk) for name, typ, pk in columns] + + return Table(table_name, self.meta, *columns) + + def _lookup_type(self, dtype): + from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date + + pytype = dtype.type + + if issubclass(pytype, np.floating): + return Float + if issubclass(pytype, np.integer): + # TODO: Refine integer size. + return Integer + if issubclass(pytype, np.datetime64) or pytype is datetime: + # Caution: np.datetime64 is also a subclass of np.number. + return DateTime + if pytype is date: + return Date + if issubclass(pytype, np.bool_): + return Boolean + return Text + + +# ---- SQL without SQLAlchemy --- +# Flavour specific sql strings and handler class for access to DBs without SQLAlchemy installed + +# SQL type convertions for each DB +_SQL_TYPES = { + 'text': { + 'mysql': 'VARCHAR (63)', + 'sqlite': 'TEXT', + 'postgres': 'text' + }, + 'float': { + 'mysql': 'FLOAT', + 'sqlite': 'REAL', + 'postgres': 'real' + }, + 'int': { + 'mysql': 'BIGINT', + 'sqlite': 'INTEGER', + 'postgres': 'integer' + }, + 'datetime': { + 'mysql': 'DATETIME', + 'sqlite': 'TIMESTAMP', + 'postgres': 'timestamp' + }, + 'date': { + 'mysql': 'DATE', + 'sqlite': 'TIMESTAMP', + 'postgres': 'date' + }, + 'bool': { + 'mysql': 'BOOLEAN', + 'sqlite': 'INTEGER', + 'postgres': 'boolean' + } +} + +# SQL enquote and wildcard symbols +_SQL_SYMB = { + 'mysql': { + 'br_l': '`', + 'br_r': '`', + 'wld': '%s' + }, + 'sqlite': { + 'br_l': '[', + 'br_r': ']', + 'wld': '?' + }, + 'postgres': { + 'br_l': '', + 'br_r': '', + 'wld': '?' + } +} + + +class PandasSQLWithCon(PandasSQL): + def __init__(self, con, flavor): + self.con = con + if flavor not in ['sqlite', 'mysql', 'postgres']: + raise NotImplementedError + else: + self.flavor = flavor + + def execute(self, *args, **kwargs): + try: + cur = self.con.cursor() + if kwargs: + cur.execute(*args, **kwargs) + else: + cur.execute(*args) + return cur + except Exception as e: + try: + self.con.rollback() + except Exception: # pragma: no cover + ex = DatabaseError( + "Execution failed on sql: %s\n%s\nunable to rollback" % (args[0], e)) + raise_with_traceback(ex) + + ex = DatabaseError("Execution failed on sql: %s" % args[0]) + raise_with_traceback(ex) + + def tquery(self, *args): + cur = self.execute(*args) + result = self._fetchall_as_list(cur) + + # This makes into tuples + if result and len(result[0]) == 1: + # python 3 compat + result = list(lzip(*result)[0]) + elif result is None: # pragma: no cover + result = [] + return result + + def uquery(self, *args): + """ + Does the same thing as tquery, but instead of returning results, it + returns the number of rows affected. Good for update queries. + """ + cur = self.execute(*args) + return cur.rowcount + + def read_sql(self, sql, index_col=None, coerce_float=True, params=[], flavor='sqlite'): + args = _convert_params(sql, params) + cursor = self.execute(*args) + columns = [col_desc[0] for col_desc in cursor.description] + data = self._fetchall_as_list(cursor) + cursor.close() + + return self._frame_from_data_and_columns(data, columns, + index_col=index_col, + coerce_float=coerce_float) + + def to_sql(self, frame, name, con=None, if_exists='fail'): + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame: DataFrame + name: name of SQL table + con: an open SQL database connection object + flavor: {'sqlite', 'mysql', 'postgres'}, default 'sqlite' + if_exists: {'fail', 'replace', 'append'}, default 'fail' + fail: If table exists, do nothing. + replace: If table exists, drop it, recreate it, and insert data. + append: If table exists, insert data. Create if does not exist. + """ + if self.has_table(name): + if if_exists == 'fail': + raise ValueError("Table '%s' already exists." % name) + elif if_exists == 'replace': + self._drop_table(name) + self._create_table(frame, name) + elif if_exists == "append": + pass # should just add... + else: + self._create_table(frame, name) + + self._write(frame, name) + + def _fetchall_as_list(self, cur): + '''ensures result of fetchall is a list''' + result = cur.fetchall() + if not isinstance(result, list): + result = list(result) + return result + + def _write(self, frame, table_name): + # Replace spaces in DataFrame column names with _. + safe_names = self._safe_col_names(frame.columns) + + br_l = _SQL_SYMB[self.flavor]['br_l'] # left val quote char + br_r = _SQL_SYMB[self.flavor]['br_r'] # right val quote char + wld = _SQL_SYMB[self.flavor]['wld'] # wildcard char + + bracketed_names = [br_l + column + br_r for column in safe_names] + col_names = ','.join(bracketed_names) + wildcards = ','.join([wld] * len(safe_names)) + insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % ( + table_name, col_names, wildcards) + + # pandas types are badly handled if there is only 1 col (Issue #3628) + if len(frame.columns) != 1: + data = [tuple(x) for x in frame.values] + else: + data = [tuple(x) for x in frame.values.tolist()] + + cur = self.con.cursor() + cur.executemany(insert_query, data) cur.close() - cur = con.cursor() - # Replace spaces in DataFrame column names with _. - safe_names = [s.replace(' ', '_').strip() for s in frame.columns] - flavor_picker = {'sqlite': _write_sqlite, - 'mysql': _write_mysql} - - func = flavor_picker.get(flavor, None) - if func is None: - raise NotImplementedError - func(frame, name, safe_names, cur) - cur.close() - con.commit() - - -def _write_sqlite(frame, table, names, cur): - bracketed_names = ['[' + column + ']' for column in names] - col_names = ','.join(bracketed_names) - wildcards = ','.join(['?'] * len(names)) - insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % ( - table, col_names, wildcards) - # pandas types are badly handled if there is only 1 column ( Issue #3628 ) - if not len(frame.columns) == 1: - data = [tuple(x) for x in frame.values] - else: - data = [tuple(x) for x in frame.values.tolist()] - cur.executemany(insert_query, data) - - -def _write_mysql(frame, table, names, cur): - bracketed_names = ['`' + column + '`' for column in names] - col_names = ','.join(bracketed_names) - wildcards = ','.join([r'%s'] * len(names)) - insert_query = "INSERT INTO %s (%s) VALUES (%s)" % ( - table, col_names, wildcards) - data = [tuple(x) for x in frame.values] - cur.executemany(insert_query, data) - - -def table_exists(name, con, flavor): - flavor_map = { - 'sqlite': ("SELECT name FROM sqlite_master " - "WHERE type='table' AND name='%s';") % name, - 'mysql': "SHOW TABLES LIKE '%s'" % name} - query = flavor_map.get(flavor, None) - if query is None: - raise NotImplementedError - return len(tquery(query, con)) > 0 - - -def get_sqltype(pytype, flavor): - sqltype = {'mysql': 'VARCHAR (63)', - 'sqlite': 'TEXT'} - - if issubclass(pytype, np.floating): - sqltype['mysql'] = 'FLOAT' - sqltype['sqlite'] = 'REAL' - - if issubclass(pytype, np.integer): - #TODO: Refine integer size. - sqltype['mysql'] = 'BIGINT' - sqltype['sqlite'] = 'INTEGER' - - if issubclass(pytype, np.datetime64) or pytype is datetime: - # Caution: np.datetime64 is also a subclass of np.number. - sqltype['mysql'] = 'DATETIME' - sqltype['sqlite'] = 'TIMESTAMP' - - if pytype is datetime.date: - sqltype['mysql'] = 'DATE' - sqltype['sqlite'] = 'TIMESTAMP' - - if issubclass(pytype, np.bool_): - sqltype['sqlite'] = 'INTEGER' - - return sqltype[flavor] - - -def get_schema(frame, name, flavor, keys=None): - "Return a CREATE TABLE statement to suit the contents of a DataFrame." - lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) - # Replace spaces in DataFrame column names with _. - safe_columns = [s.replace(' ', '_').strip() for s in frame.dtypes.index] - column_types = lzip(safe_columns, map(lookup_type, frame.dtypes)) - if flavor == 'sqlite': - columns = ',\n '.join('[%s] %s' % x for x in column_types) - else: - columns = ',\n '.join('`%s` %s' % x for x in column_types) - - keystr = '' - if keys is not None: - if isinstance(keys, compat.string_types): - keys = (keys,) - keystr = ', PRIMARY KEY (%s)' % ','.join(keys) - template = """CREATE TABLE %(name)s ( - %(columns)s - %(keystr)s - );""" - create_statement = template % {'name': name, 'columns': columns, - 'keystr': keystr} - return create_statement - - -def sequence2dict(seq): - """Helper function for cx_Oracle. - - For each element in the sequence, creates a dictionary item equal - to the element and keyed by the position of the item in the list. - >>> sequence2dict(("Matt", 1)) - {'1': 'Matt', '2': 1} - - Source: - http://www.gingerandjohn.com/archives/2004/02/26/cx_oracle-executemany-example/ + def _create_table(self, frame, name, keys=None): + create_sql = self._create_sql_schema(frame, name, keys) + self.execute(create_sql) + + def has_table(self, name): + flavor_map = { + 'sqlite': ("SELECT name FROM sqlite_master " + "WHERE type='table' AND name='%s';") % name, + 'mysql': "SHOW TABLES LIKE '%s'" % name} + query = flavor_map.get(self.flavor) + if query is None: + raise NotImplementedError + return len(self.tquery(query)) > 0 + + def _drop_table(self, name): + # Previously this worried about connection tp cursor then closing... + drop_sql = "DROP TABLE %s" % name + self.execute(drop_sql) + + def _create_sql_schema(self, frame, table_name, keys=None): + "Return a CREATE TABLE statement to suit the contents of a DataFrame." + + lookup_type = lambda dtype: self._get_sqltype(dtype.type) + # Replace spaces in DataFrame column names with _. + safe_columns = self._safe_col_names(frame.dtypes.index) + + column_types = lzip(safe_columns, map(lookup_type, frame.dtypes)) + + br_l = _SQL_SYMB[self.flavor]['br_l'] # left val quote char + br_r = _SQL_SYMB[self.flavor]['br_r'] # right val quote char + col_template = br_l + '%s' + br_r + ' %s' + columns = ',\n '.join(col_template % x for x in column_types) + + keystr = '' + if keys is not None: + if isinstance(keys, compat.string_types): + keys = (keys,) + keystr = ', PRIMARY KEY (%s)' % ','.join(keys) + template = """CREATE TABLE %(name)s ( + %(columns)s + %(keystr)s + );""" + create_statement = template % {'name': table_name, 'columns': columns, + 'keystr': keystr} + return create_statement + + def _get_sqltype(self, pytype): + pytype_name = "text" + if issubclass(pytype, np.floating): + pytype_name = "float" + elif issubclass(pytype, np.integer): + pytype_name = "int" + elif issubclass(pytype, np.datetime64) or pytype is datetime: + # Caution: np.datetime64 is also a subclass of np.number. + pytype_name = "datetime" + elif pytype is datetime.date: + pytype_name = "date" + elif issubclass(pytype, np.bool_): + pytype_name = "bool" + + return _SQL_TYPES[pytype_name][self.flavor] + + +# legacy names +def get_schema(frame, name, con=None, flavor='sqlite', engine=None): + """ + Get the SQL db table schema for the given frame + + Parameters + ---------- + frame: DataFrame + name: name of SQL table + con: an open SQL database connection object + engine: an SQLAlchemy engine - replaces connection and flavor + flavor: {'sqlite', 'mysql', 'postgres'}, default 'sqlite' + """ - d = {} - for k, v in zip(range(1, 1 + len(seq)), seq): - d[str(k)] = v - return d + pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + return pandas_sql._create_sql_schema() + + + +#TODO: add depreciation warnings +read_frame = read_sql +write_frame = to_sql + diff --git a/pandas/io/sql_legacy.py b/pandas/io/sql_legacy.py new file mode 100644 index 0000000000000..a8a5d968dd02d --- /dev/null +++ b/pandas/io/sql_legacy.py @@ -0,0 +1,332 @@ +""" +Collection of query wrappers / abstractions to both facilitate data +retrieval and to reduce dependency on DB-specific API. +""" +from datetime import datetime, date + +import numpy as np +import traceback + +from pandas.core.datetools import format as date_format +from pandas.core.api import DataFrame, isnull + +#------------------------------------------------------------------------------ +# Helper execution function + + +def execute(sql, con, retry=True, cur=None, params=None): + """ + Execute the given SQL query using the provided connection object. + + Parameters + ---------- + sql: string + Query to be executed + con: database connection instance + Database connection. Must implement PEP249 (Database API v2.0). + retry: bool + Not currently implemented + cur: database cursor, optional + Must implement PEP249 (Datbase API v2.0). If cursor is not provided, + one will be obtained from the database connection. + params: list or tuple, optional + List of parameters to pass to execute method. + + Returns + ------- + Cursor object + """ + try: + if cur is None: + cur = con.cursor() + + if params is None: + cur.execute(sql) + else: + cur.execute(sql, params) + return cur + except Exception: + try: + con.rollback() + except Exception: # pragma: no cover + pass + + print ('Error on sql %s' % sql) + raise + + +def _safe_fetch(cur): + try: + result = cur.fetchall() + if not isinstance(result, list): + result = list(result) + return result + except Exception, e: # pragma: no cover + excName = e.__class__.__name__ + if excName == 'OperationalError': + return [] + + +def tquery(sql, con=None, cur=None, retry=True): + """ + Returns list of tuples corresponding to each row in given sql + query. + + If only one column selected, then plain list is returned. + + Parameters + ---------- + sql: string + SQL query to be executed + con: SQLConnection or DB API 2.0-compliant connection + cur: DB API 2.0 cursor + + Provide a specific connection or a specific cursor if you are executing a + lot of sequential statements and want to commit outside. + """ + cur = execute(sql, con, cur=cur) + result = _safe_fetch(cur) + + if con is not None: + try: + cur.close() + con.commit() + except Exception as e: + excName = e.__class__.__name__ + if excName == 'OperationalError': # pragma: no cover + print ('Failed to commit, may need to restart interpreter') + else: + raise + + traceback.print_exc() + if retry: + return tquery(sql, con=con, retry=False) + + if result and len(result[0]) == 1: + # python 3 compat + result = list(list(zip(*result))[0]) + elif result is None: # pragma: no cover + result = [] + + return result + + +def uquery(sql, con=None, cur=None, retry=True, params=None): + """ + Does the same thing as tquery, but instead of returning results, it + returns the number of rows affected. Good for update queries. + """ + cur = execute(sql, con, cur=cur, retry=retry, params=params) + + result = cur.rowcount + try: + con.commit() + except Exception as e: + excName = e.__class__.__name__ + if excName != 'OperationalError': + raise + + traceback.print_exc() + if retry: + print ('Looks like your connection failed, reconnecting...') + return uquery(sql, con, retry=False) + return result + + +def read_frame(sql, con, index_col=None, coerce_float=True, params=None): + """ + Returns a DataFrame corresponding to the result set of the query + string. + + Optionally provide an index_col parameter to use one of the + columns as the index. Otherwise will be 0 to len(results) - 1. + + Parameters + ---------- + sql: string + SQL query to be executed + con: DB connection object, optional + index_col: string, optional + column name to use for the returned DataFrame object. + coerce_float : boolean, default True + Attempt to convert values to non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets + params: list or tuple, optional + List of parameters to pass to execute method. + """ + cur = execute(sql, con, params=params) + rows = _safe_fetch(cur) + columns = [col_desc[0] for col_desc in cur.description] + + cur.close() + con.commit() + + result = DataFrame.from_records(rows, columns=columns, + coerce_float=coerce_float) + + if index_col is not None: + result = result.set_index(index_col) + + return result + +frame_query = read_frame +read_sql = read_frame + + +def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame: DataFrame + name: name of SQL table + con: an open SQL database connection object + flavor: {'sqlite', 'mysql', 'oracle'}, default 'sqlite' + if_exists: {'fail', 'replace', 'append'}, default 'fail' + fail: If table exists, do nothing. + replace: If table exists, drop it, recreate it, and insert data. + append: If table exists, insert data. Create if does not exist. + """ + + if 'append' in kwargs: + import warnings + warnings.warn("append is deprecated, use if_exists instead", + FutureWarning) + if kwargs['append']: + if_exists='append' + else: + if_exists='fail' + exists = table_exists(name, con, flavor) + if if_exists == 'fail' and exists: + raise ValueError, "Table '%s' already exists." % name + + #create or drop-recreate if necessary + create = None + if exists and if_exists == 'replace': + create = "DROP TABLE %s" % name + elif not exists: + create = get_schema(frame, name, flavor) + + if create is not None: + cur = con.cursor() + cur.execute(create) + cur.close() + + cur = con.cursor() + # Replace spaces in DataFrame column names with _. + safe_names = [s.replace(' ', '_').strip() for s in frame.columns] + flavor_picker = {'sqlite' : _write_sqlite, + 'mysql' : _write_mysql} + + func = flavor_picker.get(flavor, None) + if func is None: + raise NotImplementedError + func(frame, name, safe_names, cur) + cur.close() + con.commit() + + +def _write_sqlite(frame, table, names, cur): + bracketed_names = ['[' + column + ']' for column in names] + col_names = ','.join(bracketed_names) + wildcards = ','.join(['?'] * len(names)) + insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % ( + table, col_names, wildcards) + # pandas types are badly handled if there is only 1 column ( Issue #3628 ) + if not len(frame.columns )==1 : + data = [tuple(x) for x in frame.values] + else : + data = [tuple(x) for x in frame.values.tolist()] + cur.executemany(insert_query, data) + + +def _write_mysql(frame, table, names, cur): + bracketed_names = ['`' + column + '`' for column in names] + col_names = ','.join(bracketed_names) + wildcards = ','.join([r'%s'] * len(names)) + insert_query = "INSERT INTO %s (%s) VALUES (%s)" % ( + table, col_names, wildcards) + data = [tuple(x) for x in frame.values] + cur.executemany(insert_query, data) + + +def table_exists(name, con, flavor): + flavor_map = { + 'sqlite': ("SELECT name FROM sqlite_master " + "WHERE type='table' AND name='%s';") % name, + 'mysql' : "SHOW TABLES LIKE '%s'" % name} + query = flavor_map.get(flavor, None) + if query is None: + raise NotImplementedError + return len(tquery(query, con)) > 0 + + +def get_sqltype(pytype, flavor): + sqltype = {'mysql': 'VARCHAR (63)', + 'sqlite': 'TEXT'} + + if issubclass(pytype, np.floating): + sqltype['mysql'] = 'FLOAT' + sqltype['sqlite'] = 'REAL' + + if issubclass(pytype, np.integer): + #TODO: Refine integer size. + sqltype['mysql'] = 'BIGINT' + sqltype['sqlite'] = 'INTEGER' + + if issubclass(pytype, np.datetime64) or pytype is datetime: + # Caution: np.datetime64 is also a subclass of np.number. + sqltype['mysql'] = 'DATETIME' + sqltype['sqlite'] = 'TIMESTAMP' + + if pytype is datetime.date: + sqltype['mysql'] = 'DATE' + sqltype['sqlite'] = 'TIMESTAMP' + + if issubclass(pytype, np.bool_): + sqltype['sqlite'] = 'INTEGER' + + return sqltype[flavor] + + +def get_schema(frame, name, flavor, keys=None): + "Return a CREATE TABLE statement to suit the contents of a DataFrame." + lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) + # Replace spaces in DataFrame column names with _. + safe_columns = [s.replace(' ', '_').strip() for s in frame.dtypes.index] + column_types = zip(safe_columns, map(lookup_type, frame.dtypes)) + if flavor == 'sqlite': + columns = ',\n '.join('[%s] %s' % x for x in column_types) + else: + columns = ',\n '.join('`%s` %s' % x for x in column_types) + + keystr = '' + if keys is not None: + if isinstance(keys, basestring): + keys = (keys,) + keystr = ', PRIMARY KEY (%s)' % ','.join(keys) + template = """CREATE TABLE %(name)s ( + %(columns)s + %(keystr)s + );""" + create_statement = template % {'name': name, 'columns': columns, + 'keystr': keystr} + return create_statement + + +def sequence2dict(seq): + """Helper function for cx_Oracle. + + For each element in the sequence, creates a dictionary item equal + to the element and keyed by the position of the item in the list. + >>> sequence2dict(("Matt", 1)) + {'1': 'Matt', '2': 1} + + Source: + http://www.gingerandjohn.com/archives/2004/02/26/cx_oracle-executemany-example/ + """ + d = {} + for k,v in zip(range(1, 1 + len(seq)), seq): + d[str(k)] = v + return d diff --git a/pandas/io/tests/data/iris.csv b/pandas/io/tests/data/iris.csv new file mode 100644 index 0000000000000..c19b9c3688515 --- /dev/null +++ b/pandas/io/tests/data/iris.csv @@ -0,0 +1,151 @@ +SepalLength,SepalWidth,PetalLength,PetalWidth,Name +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica \ No newline at end of file diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 38770def8eb7c..d99b0d20a04fd 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1,490 +1,406 @@ from __future__ import print_function +import unittest import sqlite3 -import sys - -import warnings - -import nose +import csv +import os import numpy as np -from pandas.core.datetools import format as date_format -from pandas.core.api import DataFrame, isnull -from pandas.compat import StringIO, range, lrange -import pandas.compat as compat +#from pandas.core.datetools import format as date_format +from pandas import DataFrame +from pandas.compat import range, lrange, iteritems + import pandas.io.sql as sql import pandas.util.testing as tm -from pandas import Series, Index, DataFrame -from datetime import datetime - -_formatters = { - datetime: lambda dt: "'%s'" % date_format(dt), - str: lambda x: "'%s'" % x, - np.str_: lambda x: "'%s'" % x, - compat.text_type: lambda x: "'%s'" % x, - compat.binary_type: lambda x: "'%s'" % x, - float: lambda x: "%.8f" % x, - int: lambda x: "%s" % x, - type(None): lambda x: "NULL", - np.float64: lambda x: "%.10f" % x, - bool: lambda x: "'%s'" % x, -} - -def format_query(sql, *args): - """ - - """ - processed_args = [] - for arg in args: - if isinstance(arg, float) and isnull(arg): - arg = None - - formatter = _formatters[type(arg)] - processed_args.append(formatter(arg)) - - return sql % tuple(processed_args) - -def _skip_if_no_MySQLdb(): - try: - import MySQLdb - except ImportError: - raise nose.SkipTest('MySQLdb not installed, skipping') - -class TestSQLite(tm.TestCase): + +import sqlalchemy + + +class TestSQLAlchemy(unittest.TestCase): + ''' + Test the sqlalchemy backend against an in-memory sqlite database. + Assume that sqlalchemy takes case of the DB specifics + ''' def setUp(self): - self.db = sqlite3.connect(':memory:') - - def test_basic(self): - frame = tm.makeTimeDataFrame() - self._check_roundtrip(frame) - - def test_write_row_by_row(self): - frame = tm.makeTimeDataFrame() - frame.ix[0, 0] = np.nan - create_sql = sql.get_schema(frame, 'test', 'sqlite') - cur = self.db.cursor() - cur.execute(create_sql) - - cur = self.db.cursor() - - ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" - for idx, row in frame.iterrows(): - fmt_sql = format_query(ins, *row) - sql.tquery(fmt_sql, cur=cur) - - self.db.commit() - - result = sql.read_frame("select * from test", con=self.db) - result.index = frame.index - tm.assert_frame_equal(result, frame) - - def test_execute(self): - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'sqlite') - cur = self.db.cursor() - cur.execute(create_sql) - ins = "INSERT INTO test VALUES (?, ?, ?, ?)" - - row = frame.ix[0] - sql.execute(ins, self.db, params=tuple(row)) - self.db.commit() - - result = sql.read_frame("select * from test", self.db) - result.index = frame.index[:1] - tm.assert_frame_equal(result, frame[:1]) - - def test_schema(self): - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'sqlite') - lines = create_sql.splitlines() - for l in lines: - tokens = l.split(' ') - if len(tokens) == 2 and tokens[0] == 'A': - self.assert_(tokens[1] == 'DATETIME') - - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'sqlite', keys=['A', 'B'],) - lines = create_sql.splitlines() - self.assert_('PRIMARY KEY (A,B)' in create_sql) - cur = self.db.cursor() - cur.execute(create_sql) - - def test_execute_fail(self): - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a, b) - ); - """ - cur = self.db.cursor() - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) + self.engine = sqlalchemy.create_engine('sqlite:///:memory:') + self._load_iris_data(self.engine) - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.execute, - 'INSERT INTO test VALUES("foo", "bar", 7)', - self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_execute_closed_connection(self): - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a, b) - ); - """ - cur = self.db.cursor() - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - self.db.close() - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.tquery, "select * from test", - con=self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_na_roundtrip(self): - pass - - def _check_roundtrip(self, frame): - sql.write_frame(frame, name='test_table', con=self.db) - result = sql.read_frame("select * from test_table", self.db) - - # HACK! Change this once indexes are handled properly. - result.index = frame.index - - expected = frame - tm.assert_frame_equal(result, expected) - - frame['txt'] = ['a'] * len(frame) - frame2 = frame.copy() - frame2['Idx'] = Index(lrange(len(frame2))) + 10 - sql.write_frame(frame2, name='test_table2', con=self.db) - result = sql.read_frame("select * from test_table2", self.db, - index_col='Idx') - expected = frame.copy() - expected.index = Index(lrange(len(frame2))) + 10 - expected.index.name = 'Idx' - print(expected.index.names) - print(result.index.names) - tm.assert_frame_equal(expected, result) + self.test_frame_time = tm.makeTimeDataFrame() + self._load_test1_data() + + def _load_iris_data(self, engine): + self.dirpath = tm.get_data_path() + iris_csv_file = os.path.join(self.dirpath, 'iris.csv') + engine.execute("""CREATE TABLE iris ( + `SepalLength` REAL, + `SepalWidth` REAL, + `PetalLength` REAL, + `PetalWidth` REAL, + `Name` TEXT + )""") + + with open(iris_csv_file, 'rU') as iris_csv: + r = csv.reader(iris_csv) + next(r) # skip header row + ins = """ + INSERT INTO iris + VALUES(?, ?, ?, ?, ?) + """ + for row in r: + engine.execute(ins, *row) + + def _load_test1_data(self): + test1_csv_file = os.path.join(self.dirpath, 'test1.csv') + + with open(test1_csv_file, 'rU') as test1_csv: + dr = csv.DictReader(test1_csv) + self.test_frame1 = DataFrame(list(dr)) + + def _test_iris_loaded_frame(self, iris_frame): + pytype = iris_frame.dtypes[0].type + row = iris_frame.iloc[0] + + self.assertTrue(issubclass(pytype, np.floating), 'Loaded frame has incorrect type') + tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) + + def test_read_sql(self): + iris_frame = sql.read_sql("SELECT * FROM iris", con=self.engine) + self._test_iris_loaded_frame(iris_frame) + + def test_read_table(self): + iris_frame = sql.read_table("iris", con=self.engine) + self._test_iris_loaded_frame(iris_frame) + + def test_to_sql(self): + # Nuke table + self.engine.execute("DROP TABLE IF EXISTS test_frame1") + + sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine) + self.assertTrue(self.engine.has_table('test_frame1'), 'Table not written to DB') + + # Nuke table + self.engine.execute("DROP TABLE IF EXISTS test_frame1") + + def test_to_sql_fail(self): + # Nuke table + self.engine.execute("DROP TABLE IF EXISTS test_frame1") + + sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine, if_exists='fail') + self.assertTrue(self.engine.has_table('test_frame1'), 'Table not written to DB') + + self.assertRaises(ValueError, sql.to_sql, self.test_frame1, 'test_frame1', con=self.engine, if_exists='fail') + + # Nuke table + self.engine.execute("DROP TABLE IF EXISTS test_frame1") + + def test_to_sql_replace(self): + # Nuke table just in case + self.engine.execute("DROP TABLE IF EXISTS test_frame1") + sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine, if_exists='fail') + # Add to table again + sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine, if_exists='replace') + self.assertTrue(self.engine.has_table('test_frame1'), 'Table not written to DB') + + num_entries = len(self.test_frame1) + + result = self.engine.execute("SELECT count(*) AS count_1 FROM test_frame1").fetchone() + num_rows = result[0] + + self.assertEqual(num_rows, num_entries, "not the same number of rows as entries") + + # Nuke table + self.engine.execute("DROP TABLE IF EXISTS test_frame1") + + def test_to_sql_append(self): + # Nuke table just in case + self.engine.execute("DROP TABLE IF EXISTS test_frame1") + sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine, if_exists='fail') + # Add to table again + sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine, if_exists='append') + self.assertTrue(self.engine.has_table('test_frame1'), 'Table not written to DB') + + num_entries = 2*len(self.test_frame1) + result = self.engine.execute("SELECT count(*) AS count_1 FROM test_frame1").fetchone() + num_rows = result[0] + + self.assertEqual(num_rows, num_entries, "not the same number of rows as entries") + + # Nuke table + self.engine.execute("DROP TABLE IF EXISTS test_frame1") + + def test_create_table(self): + temp_engine = sqlalchemy.create_engine('sqlite:///:memory:') + temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) + + pandasSQL = sql.PandasSQLWithEngine(temp_engine) + pandasSQL._create_table(temp_frame, 'temp_frame') + + self.assertTrue(temp_engine.has_table('temp_frame'), 'Table not written to DB') + + def test_drop_table(self): + temp_engine = sqlalchemy.create_engine('sqlite:///:memory:') + + temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) + + pandasSQL = sql.PandasSQLWithEngine(temp_engine) + pandasSQL._create_table(temp_frame, 'temp_frame') + + self.assertTrue(temp_engine.has_table('temp_frame'), 'Table not written to DB') + + pandasSQL._drop_table('temp_frame') + + self.assertFalse(temp_engine.has_table('temp_frame'), 'Table not deleted from DB') + + def test_roundtrip(self): + #temp_engine = sqlalchemy.create_engine('sqlite:///:memory:') + + sql.to_sql(self.test_frame1, 'test_frame_roundtrip', con=self.engine) + result = sql.read_table('test_frame_roundtrip', con=self.engine) + + # HACK! + result.index = self.test_frame1.index + + tm.assert_frame_equal(result, self.test_frame1) + + def test_execute_sql(self): + # drop_sql = "DROP TABLE IF EXISTS test" # should already be done + iris_results = sql.execute("SELECT * FROM iris", con=self.engine) + row = iris_results.fetchone() + tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def test_tquery(self): - frame = tm.makeTimeDataFrame() - sql.write_frame(frame, name='test_table', con=self.db) - result = sql.tquery("select A from test_table", self.db) - expected = frame.A - result = Series(result, frame.index) - tm.assert_series_equal(result, expected) + iris_results = sql.tquery("SELECT * FROM iris", con=self.engine) + row = iris_results[0] + tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) - try: - sys.stdout = StringIO() - self.assertRaises(sqlite3.OperationalError, sql.tquery, - 'select * from blah', con=self.db) +# --- Test SQLITE fallback - self.assertRaises(sqlite3.OperationalError, sql.tquery, - 'select * from blah', con=self.db, retry=True) - finally: - sys.stdout = sys.__stdout__ - def test_uquery(self): - frame = tm.makeTimeDataFrame() - sql.write_frame(frame, name='test_table', con=self.db) - stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' - self.assertEqual(sql.uquery(stmt, con=self.db), 1) +class TestSQLite(unittest.TestCase): + ''' + Test the sqlalchemy backend against an in-memory sqlite database. + Assume that sqlalchemy takes case of the DB specifics + ''' - try: - sys.stdout = StringIO() - - self.assertRaises(sqlite3.OperationalError, sql.tquery, - 'insert into blah values (1)', con=self.db) - - self.assertRaises(sqlite3.OperationalError, sql.tquery, - 'insert into blah values (1)', con=self.db, - retry=True) - finally: - sys.stdout = sys.__stdout__ - - def test_keyword_as_column_names(self): - ''' - ''' - df = DataFrame({'From':np.ones(5)}) - sql.write_frame(df, con = self.db, name = 'testkeywords') - - def test_onecolumn_of_integer(self): - ''' - GH 3628 - a column_of_integers dataframe should transfer well to sql - ''' - mono_df=DataFrame([1 , 2], columns=['c0']) - sql.write_frame(mono_df, con = self.db, name = 'mono_df') - # computing the sum via sql - con_x=self.db - the_sum=sum([my_c0[0] for my_c0 in con_x.execute("select * from mono_df")]) - # it should not fail, and gives 3 ( Issue #3628 ) - self.assertEqual(the_sum , 3) - - result = sql.read_frame("select * from mono_df",con_x) - tm.assert_frame_equal(result,mono_df) - - -class TestMySQL(tm.TestCase): + def setUp(self): + self.conn = sqlite3.connect(':memory:') + self.pandasSQL = sql.PandasSQLWithCon(self.conn, 'sqlite') + + self._load_iris_data(self.conn) + + self.test_frame_time = tm.makeTimeDataFrame() + self._load_test1_data() + + def _load_iris_data(self, conn): + self.dirpath = tm.get_data_path() + iris_csv_file = os.path.join(self.dirpath, 'iris.csv') + cur = conn.cursor() + cur.execute("""CREATE TABLE iris ( + `SepalLength` REAL, + `SepalWidth` REAL, + `PetalLength` REAL, + `PetalWidth` REAL, + `Name` TEXT + )""") + + with open(iris_csv_file, 'rU') as iris_csv: + r = csv.reader(iris_csv) + next(r) # skip header row + ins = """ + INSERT INTO iris + VALUES(?, ?, ?, ?, ?) + """ + for row in r: + cur.execute(ins, row) + conn.commit() + + def _load_test1_data(self): + test1_csv_file = os.path.join(self.dirpath, 'test1.csv') + + with open(test1_csv_file, 'rU') as test1_csv: + dr = csv.DictReader(test1_csv) + self.test_frame1 = DataFrame(list(dr)) + + def test_read_sql(self): + iris_frame = sql.read_sql("SELECT * FROM iris", con=self.conn) + pytype = iris_frame.dtypes[0].type + row = iris_frame.iloc[0] + + self.assertTrue(issubclass(pytype, np.floating), 'Loaded frame has incorrect type') + tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) + + def test_to_sql(self): + # Nuke table + cur = self.conn.cursor() + cur.execute("DROP TABLE IF EXISTS test_frame1") + self.conn.commit() + + sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, flavor='sqlite') + self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') + + # Nuke table + cur = self.conn.cursor() + cur.execute("DROP TABLE IF EXISTS test_frame1") + self.conn.commit() + + def test_to_sql_fail(self): + # Nuke table + cur = self.conn.cursor() + cur.execute("DROP TABLE IF EXISTS test_frame1") + self.conn.commit() + sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, if_exists='fail', flavor='sqlite') + self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') + + self.assertRaises(ValueError, sql.to_sql, self.test_frame1, 'test_frame1', con=self.conn, if_exists='fail') + + # Nuke table + cur = self.conn.cursor() + cur.execute("DROP TABLE IF EXISTS test_frame1") + self.conn.commit() + + def test_to_sql_replace(self): + # Nuke table just in case + cur = self.conn.cursor() + cur.execute("DROP TABLE IF EXISTS test_frame1") + self.conn.commit() + sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, if_exists='fail', flavor='sqlite') + # Add to table again + sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, if_exists='replace') + self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') + + num_entries = len(self.test_frame1) + + result = self.conn.execute("SELECT count(*) AS count_1 FROM test_frame1").fetchone() + num_rows = result[0] + + self.assertEqual(num_rows, num_entries, "not the same number of rows as entries") + # Nuke table + cur = self.conn.cursor() + cur.execute("DROP TABLE IF EXISTS test_frame1") + self.conn.commit() + + def test_to_sql_append(self): + # Nuke table just in case + cur = self.conn.cursor() + cur.execute("DROP TABLE IF EXISTS test_frame1") + self.conn.commit() + + sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, if_exists='fail', flavor='sqlite') + + # Add to table again + sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, if_exists='append') + self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') + + num_entries = 2*len(self.test_frame1) + result = self.conn.execute("SELECT count(*) AS count_1 FROM test_frame1").fetchone() + num_rows = result[0] + + self.assertEqual(num_rows, num_entries, "not the same number of rows as entries") + + # Nuke table + cur = self.conn.cursor() + cur.execute("DROP TABLE IF EXISTS test_frame1") + self.conn.commit() + + def test_create_table(self): + temp_conn = sqlite3.connect(':memory:') + temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) + + pandasSQL = sql.PandasSQLWithCon(temp_conn, 'sqlite') + pandasSQL._create_table(temp_frame, 'temp_frame') + + self.assertTrue(pandasSQL.has_table('temp_frame'), 'Table not written to DB') + + def test_drop_table(self): + temp_conn = sqlite3.connect(':memory:') + + temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) + + pandasSQL = sql.PandasSQLWithCon(temp_conn, 'sqlite') + pandasSQL._create_table(temp_frame, 'temp_frame') + + self.assertTrue(pandasSQL.has_table('temp_frame'), 'Table not written to DB') + + pandasSQL._drop_table('temp_frame') + + self.assertFalse(pandasSQL.has_table('temp_frame'), 'Table not deleted from DB') + + def test_roundtrip(self): + + sql.to_sql(self.test_frame1, 'test_frame_roundtrip', con=self.conn, flavor='sqlite') + result = sql.read_sql('SELECT * FROM test_frame_roundtrip', con=self.conn, flavor='sqlite') + + # HACK! + result.index = self.test_frame1.index + + tm.assert_frame_equal(result, self.test_frame1) + + def test_execute_sql(self): + # drop_sql = "DROP TABLE IF EXISTS test" # should already be done + iris_results = sql.execute("SELECT * FROM iris", con=self.conn, flavor='sqlite') + row = iris_results.fetchone() + tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) + + def test_tquery(self): + iris_results = sql.tquery("SELECT * FROM iris", con=self.conn, flavor='sqlite') + row = iris_results[0] + tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) + + + + +""" +class TestSQLA_pymysql(TestSQLAlchemy): def setUp(self): - _skip_if_no_MySQLdb() - import MySQLdb + raise nose.SkipTest("MySQLdb was not installed") + + def set_flavor_engine(self): + # if can't import should skip all tests try: - # Try Travis defaults. - # No real user should allow root access with a blank password. - self.db = MySQLdb.connect(host='localhost', user='root', passwd='', - db='pandas_nosetest') - except: - pass - else: - return + import pymysql + except ImportError: + raise nose.SkipTest("pymysql was not installed") + try: - self.db = MySQLdb.connect(read_default_group='pandas') - except MySQLdb.ProgrammingError as e: + self.engine = sqlalchemy.create_engine("mysql+pymysql://root:@localhost/pandas_nosetest") + except pymysql.Error as e: raise nose.SkipTest( - "Create a group of connection parameters under the heading " + "Cannot connect to database. " + "Create a group of conn parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") - except MySQLdb.Error as e: + except pymysql.ProgrammingError as e: raise nose.SkipTest( - "Cannot connect to database. " "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") - def test_basic(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - self._check_roundtrip(frame) - - def test_write_row_by_row(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - frame.ix[0, 0] = np.nan - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = sql.get_schema(frame, 'test', 'mysql') - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" - for idx, row in frame.iterrows(): - fmt_sql = format_query(ins, *row) - sql.tquery(fmt_sql, cur=cur) - - self.db.commit() - - result = sql.read_frame("select * from test", con=self.db) - result.index = frame.index - tm.assert_frame_equal(result, frame) - - def test_execute(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = sql.get_schema(frame, 'test', 'mysql') - cur = self.db.cursor() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Unknown table.*") - cur.execute(drop_sql) - cur.execute(create_sql) - ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" - - row = frame.ix[0] - sql.execute(ins, self.db, params=tuple(row)) - self.db.commit() - - result = sql.read_frame("select * from test", self.db) - result.index = frame.index[:1] - tm.assert_frame_equal(result, frame[:1]) - - def test_schema(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'mysql') - lines = create_sql.splitlines() - for l in lines: - tokens = l.split(' ') - if len(tokens) == 2 and tokens[0] == 'A': - self.assert_(tokens[1] == 'DATETIME') - - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = sql.get_schema(frame, 'test', 'mysql', keys=['A', 'B'],) - lines = create_sql.splitlines() - self.assert_('PRIMARY KEY (A,B)' in create_sql) - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - - def test_execute_fail(self): - _skip_if_no_MySQLdb() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a(5), b(5)) - ); - """ - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) - - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.execute, - 'INSERT INTO test VALUES("foo", "bar", 7)', - self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_execute_closed_connection(self): - _skip_if_no_MySQLdb() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a(5), b(5)) - ); - """ - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - self.db.close() - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.tquery, "select * from test", - con=self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_na_roundtrip(self): - _skip_if_no_MySQLdb() - pass - - def _check_roundtrip(self, frame): - _skip_if_no_MySQLdb() - drop_sql = "DROP TABLE IF EXISTS test_table" - cur = self.db.cursor() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Unknown table.*") - cur.execute(drop_sql) - sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') - result = sql.read_frame("select * from test_table", self.db) - - # HACK! Change this once indexes are handled properly. - result.index = frame.index - result.index.name = frame.index.name - - expected = frame - tm.assert_frame_equal(result, expected) - - frame['txt'] = ['a'] * len(frame) - frame2 = frame.copy() - index = Index(lrange(len(frame2))) + 10 - frame2['Idx'] = index - drop_sql = "DROP TABLE IF EXISTS test_table2" - cur = self.db.cursor() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Unknown table.*") - cur.execute(drop_sql) - sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql') - result = sql.read_frame("select * from test_table2", self.db, - index_col='Idx') - expected = frame.copy() - - # HACK! Change this once indexes are handled properly. - expected.index = index - expected.index.names = result.index.names - tm.assert_frame_equal(expected, result) - - def test_tquery(self): - try: - import MySQLdb - except ImportError: - raise nose.SkipTest("no MySQLdb") - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test_table" - cur = self.db.cursor() - cur.execute(drop_sql) - sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') - result = sql.tquery("select A from test_table", self.db) - expected = frame.A - result = Series(result, frame.index) - tm.assert_series_equal(result, expected) - try: - sys.stdout = StringIO() - self.assertRaises(MySQLdb.ProgrammingError, sql.tquery, - 'select * from blah', con=self.db) - - self.assertRaises(MySQLdb.ProgrammingError, sql.tquery, - 'select * from blah', con=self.db, retry=True) - finally: - sys.stdout = sys.__stdout__ +class TestSQLA_MySQLdb(TestSQLAlchemy): + def setUp(self): + raise nose.SkipTest("MySQLdb was not installed") - def test_uquery(self): + def set_flavor_engine(self): + # if can't import should skip all tests try: import MySQLdb except ImportError: - raise nose.SkipTest("no MySQLdb") - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test_table" - cur = self.db.cursor() - cur.execute(drop_sql) - sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') - stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' - self.assertEqual(sql.uquery(stmt, con=self.db), 1) + raise nose.SkipTest("MySQLdb was not installed") try: - sys.stdout = StringIO() - - self.assertRaises(MySQLdb.ProgrammingError, sql.tquery, - 'insert into blah values (1)', con=self.db) - - self.assertRaises(MySQLdb.ProgrammingError, sql.tquery, - 'insert into blah values (1)', con=self.db, - retry=True) - finally: - sys.stdout = sys.__stdout__ - - def test_keyword_as_column_names(self): - ''' - ''' - _skip_if_no_MySQLdb() - df = DataFrame({'From':np.ones(5)}) - sql.write_frame(df, con = self.db, name = 'testkeywords', - if_exists='replace', flavor='mysql') - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) + self.engine = sqlalchemy.create_engine("mysql+mysqldb://root:@localhost/pandas_nosetest") + except MySQLdb.Error: + raise nose.SkipTest( + "Cannot connect to database. " + "Create a group of connection parameters under the heading " + "[pandas] in your system's mysql default file, " + "typically located at ~/.my.cnf or /etc/.my.cnf. ") + except MySQLdb.ProgrammingError: + raise nose.SkipTest( + "Create a group of connection parameters under the heading " + "[pandas] in your system's mysql default file, " + "typically located at ~/.my.cnf or /etc/.my.cnf. ") +""" diff --git a/pandas/io/tests/test_sql_legacy.py b/pandas/io/tests/test_sql_legacy.py new file mode 100644 index 0000000000000..3c6e992097d30 --- /dev/null +++ b/pandas/io/tests/test_sql_legacy.py @@ -0,0 +1,497 @@ +from __future__ import with_statement +from pandas.compat import StringIO +import unittest +import sqlite3 +import sys + +import warnings + +import nose + +import numpy as np + +from pandas.core.datetools import format as date_format +from pandas.core.api import DataFrame, isnull +from pandas.compat import StringIO, range, lrange +import pandas.compat as compat + +import pandas.io.sql as sql +from pandas.io.sql import DatabaseError +import pandas.util.testing as tm +from pandas import Series, Index, DataFrame +from datetime import datetime + +_formatters = { + datetime: lambda dt: "'%s'" % date_format(dt), + str: lambda x: "'%s'" % x, + np.str_: lambda x: "'%s'" % x, + compat.text_type: lambda x: "'%s'" % x, + compat.binary_type: lambda x: "'%s'" % x, + float: lambda x: "%.8f" % x, + int: lambda x: "%s" % x, + type(None): lambda x: "NULL", + np.float64: lambda x: "%.10f" % x, + bool: lambda x: "'%s'" % x, +} + +def format_query(sql, *args): + """ + + """ + processed_args = [] + for arg in args: + if isinstance(arg, float) and isnull(arg): + arg = None + + formatter = _formatters[type(arg)] + processed_args.append(formatter(arg)) + + return sql % tuple(processed_args) + +def _skip_if_no_MySQLdb(): + try: + import MySQLdb + except ImportError: + raise nose.SkipTest('MySQLdb not installed, skipping') + +class TestSQLite(unittest.TestCase): + + def setUp(self): + self.db = sqlite3.connect(':memory:') + + def test_basic(self): + frame = tm.makeTimeDataFrame() + self._check_roundtrip(frame) + + def test_write_row_by_row(self): + frame = tm.makeTimeDataFrame() + frame.ix[0, 0] = np.nan + create_sql = sql.get_schema(frame, 'test', 'sqlite') + cur = self.db.cursor() + cur.execute(create_sql) + + cur = self.db.cursor() + + ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" + for idx, row in frame.iterrows(): + fmt_sql = format_query(ins, *row) + sql.tquery(fmt_sql, cur=cur) + + self.db.commit() + + result = sql.read_frame("select * from test", con=self.db) + result.index = frame.index + tm.assert_frame_equal(result, frame) + + def test_execute(self): + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, 'test', 'sqlite') + cur = self.db.cursor() + cur.execute(create_sql) + ins = "INSERT INTO test VALUES (?, ?, ?, ?)" + + row = frame.ix[0] + sql.execute(ins, self.db, params=tuple(row)) + self.db.commit() + + result = sql.read_frame("select * from test", self.db) + result.index = frame.index[:1] + tm.assert_frame_equal(result, frame[:1]) + + def test_schema(self): + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, 'test', 'sqlite') + lines = create_sql.splitlines() + for l in lines: + tokens = l.split(' ') + if len(tokens) == 2 and tokens[0] == 'A': + self.assert_(tokens[1] == 'DATETIME') + + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, 'test', 'sqlite', keys=['A', 'B'],) + lines = create_sql.splitlines() + self.assert_('PRIMARY KEY (A,B)' in create_sql) + cur = self.db.cursor() + cur.execute(create_sql) + + def test_execute_fail(self): + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a, b) + ); + """ + cur = self.db.cursor() + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) + sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) + + try: + sys.stdout = StringIO() + self.assertRaises(Exception, sql.execute, + 'INSERT INTO test VALUES("foo", "bar", 7)', + self.db) + finally: + sys.stdout = sys.__stdout__ + + def test_execute_closed_connection(self): + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a, b) + ); + """ + cur = self.db.cursor() + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) + self.db.close() + try: + sys.stdout = StringIO() + self.assertRaises(Exception, sql.tquery, "select * from test", + con=self.db) + finally: + sys.stdout = sys.__stdout__ + + def test_na_roundtrip(self): + pass + + def _check_roundtrip(self, frame): + sql.write_frame(frame, name='test_table', con=self.db) + result = sql.read_frame("select * from test_table", self.db) + + # HACK! Change this once indexes are handled properly. + result.index = frame.index + + expected = frame + tm.assert_frame_equal(result, expected) + + frame['txt'] = ['a'] * len(frame) + frame2 = frame.copy() + frame2['Idx'] = Index(lrange(len(frame2))) + 10 + sql.write_frame(frame2, name='test_table2', con=self.db) + result = sql.read_frame("select * from test_table2", self.db, + index_col='Idx') + expected = frame.copy() + expected.index = Index(lrange(len(frame2))) + 10 + expected.index.name = 'Idx' + print(expected.index.names) + print(result.index.names) + tm.assert_frame_equal(expected, result) + + def test_tquery(self): + frame = tm.makeTimeDataFrame() + sql.write_frame(frame, name='test_table', con=self.db) + result = sql.tquery("select A from test_table", self.db) + expected = frame.A + result = Series(result, frame.index) + tm.assert_series_equal(result, expected) + + try: + sys.stdout = StringIO() + self.assertRaises(DatabaseError, sql.tquery, + 'select * from blah', con=self.db) + + self.assertRaises(DatabaseError, sql.tquery, + 'select * from blah', con=self.db, retry=True) + finally: + sys.stdout = sys.__stdout__ + + def test_uquery(self): + frame = tm.makeTimeDataFrame() + sql.write_frame(frame, name='test_table', con=self.db) + stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' + self.assertEqual(sql.uquery(stmt, con=self.db), 1) + + try: + sys.stdout = StringIO() + + self.assertRaises(DatabaseError, sql.tquery, + 'insert into blah values (1)', con=self.db) + + self.assertRaises(DatabaseError, sql.tquery, + 'insert into blah values (1)', con=self.db, + retry=True) + finally: + sys.stdout = sys.__stdout__ + + def test_keyword_as_column_names(self): + ''' + ''' + df = DataFrame({'From':np.ones(5)}) + sql.write_frame(df, con = self.db, name = 'testkeywords') + + def test_onecolumn_of_integer(self): + ''' + GH 3628 + a column_of_integers dataframe should transfer well to sql + ''' + mono_df=DataFrame([1 , 2], columns=['c0']) + sql.write_frame(mono_df, con = self.db, name = 'mono_df') + # computing the sum via sql + con_x=self.db + the_sum=sum([my_c0[0] for my_c0 in con_x.execute("select * from mono_df")]) + # it should not fail, and gives 3 ( Issue #3628 ) + self.assertEqual(the_sum , 3) + + result = sql.read_frame("select * from mono_df",con_x) + tm.assert_frame_equal(result,mono_df) + + +class TestMySQL(unittest.TestCase): + + def setUp(self): + _skip_if_no_MySQLdb() + import MySQLdb + try: + # Try Travis defaults. + # No real user should allow root access with a blank password. + self.db = MySQLdb.connect(host='localhost', user='root', passwd='', + db='pandas_nosetest') + except: + pass + else: + return + try: + self.db = MySQLdb.connect(read_default_group='pandas') + except MySQLdb.ProgrammingError as e: + raise nose.SkipTest( + "Create a group of connection parameters under the heading " + "[pandas] in your system's mysql default file, " + "typically located at ~/.my.cnf or /etc/.my.cnf. ") + except MySQLdb.Error as e: + raise nose.SkipTest( + "Cannot connect to database. " + "Create a group of connection parameters under the heading " + "[pandas] in your system's mysql default file, " + "typically located at ~/.my.cnf or /etc/.my.cnf. ") + + def test_basic(self): + _skip_if_no_MySQLdb() + frame = tm.makeTimeDataFrame() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "For more robust support.*") + self._check_roundtrip(frame) + + def test_write_row_by_row(self): + _skip_if_no_MySQLdb() + frame = tm.makeTimeDataFrame() + frame.ix[0, 0] = np.nan + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = sql.get_schema(frame, 'test', 'mysql') + cur = self.db.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" + for idx, row in frame.iterrows(): + fmt_sql = format_query(ins, *row) + sql.tquery(fmt_sql, cur=cur) + + self.db.commit() + + result = sql.read_frame("select * from test", con=self.db) + result.index = frame.index + tm.assert_frame_equal(result, frame) + + def test_execute(self): + _skip_if_no_MySQLdb() + frame = tm.makeTimeDataFrame() + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = sql.get_schema(frame, 'test', 'mysql') + cur = self.db.cursor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Unknown table.*") + cur.execute(drop_sql) + cur.execute(create_sql) + ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" + + row = frame.ix[0] + sql.execute(ins, self.db, params=tuple(row)) + self.db.commit() + + result = sql.read_frame("select * from test", self.db) + result.index = frame.index[:1] + tm.assert_frame_equal(result, frame[:1]) + + def test_schema(self): + _skip_if_no_MySQLdb() + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, 'test', 'mysql') + lines = create_sql.splitlines() + for l in lines: + tokens = l.split(' ') + if len(tokens) == 2 and tokens[0] == 'A': + self.assert_(tokens[1] == 'DATETIME') + + frame = tm.makeTimeDataFrame() + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = sql.get_schema(frame, 'test', 'mysql', keys=['A', 'B'],) + lines = create_sql.splitlines() + self.assert_('PRIMARY KEY (A,B)' in create_sql) + cur = self.db.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + + def test_execute_fail(self): + _skip_if_no_MySQLdb() + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a(5), b(5)) + ); + """ + cur = self.db.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) + sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) + + try: + sys.stdout = StringIO() + self.assertRaises(Exception, sql.execute, + 'INSERT INTO test VALUES("foo", "bar", 7)', + self.db) + finally: + sys.stdout = sys.__stdout__ + + def test_execute_closed_connection(self): + _skip_if_no_MySQLdb() + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a(5), b(5)) + ); + """ + cur = self.db.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) + self.db.close() + try: + sys.stdout = StringIO() + self.assertRaises(Exception, sql.tquery, "select * from test", + con=self.db) + finally: + sys.stdout = sys.__stdout__ + + def test_na_roundtrip(self): + _skip_if_no_MySQLdb() + pass + + def _check_roundtrip(self, frame): + _skip_if_no_MySQLdb() + drop_sql = "DROP TABLE IF EXISTS test_table" + cur = self.db.cursor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Unknown table.*") + cur.execute(drop_sql) + sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') + result = sql.read_frame("select * from test_table", self.db) + + # HACK! Change this once indexes are handled properly. + result.index = frame.index + result.index.name = frame.index.name + + expected = frame + tm.assert_frame_equal(result, expected) + + frame['txt'] = ['a'] * len(frame) + frame2 = frame.copy() + index = Index(lrange(len(frame2))) + 10 + frame2['Idx'] = index + drop_sql = "DROP TABLE IF EXISTS test_table2" + cur = self.db.cursor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Unknown table.*") + cur.execute(drop_sql) + sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql') + result = sql.read_frame("select * from test_table2", self.db, + index_col='Idx') + expected = frame.copy() + + # HACK! Change this once indexes are handled properly. + expected.index = index + expected.index.names = result.index.names + tm.assert_frame_equal(expected, result) + + def test_tquery(self): + try: + import MySQLdb + except ImportError: + raise nose.SkipTest + frame = tm.makeTimeDataFrame() + drop_sql = "DROP TABLE IF EXISTS test_table" + cur = self.db.cursor() + cur.execute(drop_sql) + sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') + result = sql.tquery("select A from test_table", self.db) + expected = frame.A + result = Series(result, frame.index) + tm.assert_series_equal(result, expected) + + try: + sys.stdout = StringIO() + self.assertRaises(DatabaseError, sql.tquery, + 'select * from blah', con=self.db) + + self.assertRaises(DatabaseError, sql.tquery, + 'select * from blah', con=self.db, retry=True) + finally: + sys.stdout = sys.__stdout__ + + def test_uquery(self): + try: + import MySQLdb + except ImportError: + raise nose.SkipTest + frame = tm.makeTimeDataFrame() + drop_sql = "DROP TABLE IF EXISTS test_table" + cur = self.db.cursor() + cur.execute(drop_sql) + sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') + stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' + self.assertEqual(sql.uquery(stmt, con=self.db), 1) + + try: + sys.stdout = StringIO() + + self.assertRaises(DatabaseError, sql.tquery, + 'insert into blah values (1)', con=self.db) + + self.assertRaises(DatabaseError, sql.tquery, + 'insert into blah values (1)', con=self.db, + retry=True) + finally: + sys.stdout = sys.__stdout__ + + def test_keyword_as_column_names(self): + ''' + ''' + _skip_if_no_MySQLdb() + df = DataFrame({'From':np.ones(5)}) + sql.write_frame(df, name='testkeywords', con=self.db, + if_exists='replace', flavor='mysql') + +if __name__ == '__main__': + # unittest.main() + # nose.runmodule(argv=[__file__,'-vvs','-x', '--pdb-failure'], + # exit=False) + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) From 60590f2b3ff5c1ebab369994d8dafc42cfbebd46 Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Tue, 14 Jan 2014 22:11:09 +0000 Subject: [PATCH 02/16] ENH #4163 Added tests and documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial draft of doc updates minor doc updates Added tests and reduced code repetition. Updated Docs. Added test coverage for legacy names Documentation updates, more tests Added depreciation warnings for legacy names. Updated docs and test doc build ENH #4163 - finalized tests and docs, ready for wider useā€¦ TST added sqlalchemy to TravisCI build dep for py 2.7 and 3.3 TST Import sqlalchemy on Travis. DOC add docstrings to read sql ENH read_sql connects via Connection, Engine, file path, or :memory: string CLN Separate legacy code into new file, and fallback so that all old tests pass. ENH #4163 added version added coment ENH #4163 added depreciation warning for tquery and uquery ENH #4163 Documentation and tests --- ci/requirements-2.6.txt | 1 + ci/requirements-2.7.txt | 1 + ci/requirements-2.7_LOCALE.txt | 1 + ci/requirements-3.3.txt | 1 + doc/source/io.rst | 163 +++++++--- pandas/io/sql.py | 89 ++++-- pandas/io/sql_legacy.py | 332 ------------------- pandas/io/tests/test_sql.py | 416 +++++++++++++----------- pandas/io/tests/test_sql_legacy.py | 497 ----------------------------- 9 files changed, 409 insertions(+), 1092 deletions(-) delete mode 100644 pandas/io/sql_legacy.py delete mode 100644 pandas/io/tests/test_sql_legacy.py diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt index 751d034ef97f5..d04c27c947731 100644 --- a/ci/requirements-2.6.txt +++ b/ci/requirements-2.6.txt @@ -5,3 +5,4 @@ pytz==2013b http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz html5lib==1.0b2 bigquery==2.0.17 +sqlalchemy==0.8.1 diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt index 186d13c6dec7c..ed52dbb5b93ab 100644 --- a/ci/requirements-2.7.txt +++ b/ci/requirements-2.7.txt @@ -19,3 +19,4 @@ scipy==0.10.0 beautifulsoup4==4.2.1 statsmodels==0.5.0 bigquery==2.0.17 +sqlalchemy==0.8.1 diff --git a/ci/requirements-2.7_LOCALE.txt b/ci/requirements-2.7_LOCALE.txt index 06574cdd6b299..8e0ff5f34bddc 100644 --- a/ci/requirements-2.7_LOCALE.txt +++ b/ci/requirements-2.7_LOCALE.txt @@ -17,3 +17,4 @@ scipy==0.10.0 beautifulsoup4==4.2.1 statsmodels==0.5.0 bigquery==2.0.17 +sqlalchemy==0.8.1 diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt index 480fde477d88b..73009b572c4c2 100644 --- a/ci/requirements-3.3.txt +++ b/ci/requirements-3.3.txt @@ -14,3 +14,4 @@ lxml==3.2.1 scipy==0.12.0 beautifulsoup4==4.2.1 statsmodels==0.4.3 +sqlalchemy==0.9.1 diff --git a/doc/source/io.rst b/doc/source/io.rst index cba81a8b39600..c7cb23219fc4c 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3011,13 +3011,48 @@ SQL Queries ----------- The :mod:`pandas.io.sql` module provides a collection of query wrappers to both -facilitate data retrieval and to reduce dependency on DB-specific API. These -wrappers only support the Python database adapters which respect the `Python -DB-API `__. See some -:ref:`cookbook examples ` for some advanced strategies +facilitate data retrieval and to reduce dependency on DB-specific API. Database abstraction +is provided by SQLAlchemy if installed, in addition you will need a driver library for +your database. -For example, suppose you want to query some data with different types from a -table such as: +.. versionadded:: 0.14.0 + + +If SQLAlchemy is not installed a legacy fallback is provided for sqlite and mysql. +These legacy modes require Python database adapters which respect the `Python +DB-API `__. + +See also some :ref:`cookbook examples ` for some advanced strategies. + +The key functions are: +:func:`~pandas.io.sql.to_sql` +:func:`~pandas.io.sql.read_sql` +:func:`~pandas.io.sql.read_table` + + + + +In the following example, we use the `SQlite `__ SQL database +engine. You can use a temporary SQLite database where data are stored in +"memory". + +To connect with SQLAlchemy you use the :func:`create_engine` function to create an engine +object from database URI. You only need to create the engine once per database you are +connecting to. + +For more information on :func:`create_engine` and the URI formatting, see the examples +below and the SQLAlchemy `documentation `__ + +.. code-block:: python + + from sqlalchemy import create_engine + from pandas.io import sql + # Create your connection. + engine = create_engine('sqlite:///:memory:') + + +Assuming the following data is in a DataFrame "data", we can insert it into +the database using :func:`~pandas.io.sql.to_sql`. +-----+------------+-------+-------+-------+ @@ -3031,81 +3066,107 @@ table such as: +-----+------------+-------+-------+-------+ -Functions from :mod:`pandas.io.sql` can extract some data into a DataFrame. In -the following example, we use the `SQlite `__ SQL database -engine. You can use a temporary SQLite database where data are stored in -"memory". Just do: - -.. code-block:: python - - import sqlite3 - from pandas.io import sql - # Create your connection. - cnx = sqlite3.connect(':memory:') - .. ipython:: python :suppress: - import sqlite3 + from sqlalchemy import create_engine from pandas.io import sql - cnx = sqlite3.connect(':memory:') + engine = create_engine('sqlite:///:memory:') .. ipython:: python :suppress: + + c = ['id', 'Date', 'Col_1', 'Col_2', 'Col_3'] + d = [(26, datetime.datetime(2010,10,18), 'X', 27.5, True), + (42, datetime.datetime(2010,10,19), 'Y', -12.5, False), + (63, datetime.datetime(2010,10,20), 'Z', 5.73, True)] - cu = cnx.cursor() - # Create a table named 'data'. - cu.execute("""CREATE TABLE data(id integer, - date date, - Col_1 string, - Col_2 float, - Col_3 bool);""") - cu.executemany('INSERT INTO data VALUES (?,?,?,?,?)', - [(26, datetime.datetime(2010,10,18), 'X', 27.5, True), - (42, datetime.datetime(2010,10,19), 'Y', -12.5, False), - (63, datetime.datetime(2010,10,20), 'Z', 5.73, True)]) + data = DataFrame(d, columns=c) +.. ipython:: python + + sql.to_sql(data, 'data', engine) -Let ``data`` be the name of your SQL table. With a query and your database -connection, just use the :func:`~pandas.io.sql.read_frame` function to get the -query results into a DataFrame: +You can read from the database simply by +specifying a table name using the :func:`~pandas.io.sql.read_table` function. .. ipython:: python - - sql.read_frame("SELECT * FROM data;", cnx) + + sql.read_table('data', engine) You can also specify the name of the column as the DataFrame index: .. ipython:: python - sql.read_frame("SELECT * FROM data;", cnx, index_col='id') - sql.read_frame("SELECT * FROM data;", cnx, index_col='date') + sql.read_table('data', engine, index_col='id') -Of course, you can specify a more "complex" query. +You can also query using raw SQL in the :func:`~pandas.io.sql.read_sql` function. .. ipython:: python + + sql.read_sql('SELECT * FROM data', engine) - sql.read_frame("SELECT id, Col_1, Col_2 FROM data WHERE id = 42;", cnx) +Of course, you can specify a more "complex" query. .. ipython:: python - :suppress: - cu.close() - cnx.close() + sql.read_frame("SELECT id, Col_1, Col_2 FROM data WHERE id = 42;", engine) There are a few other available functions: - - ``tquery`` returns a list of tuples corresponding to each row. - - ``uquery`` does the same thing as tquery, but instead of returning results - it returns the number of related rows. - - ``write_frame`` writes records stored in a DataFrame into the SQL table. - - ``has_table`` checks if a given SQLite table exists. +:func:`~pandas.io.sql.has_table` checks if a given table exists. -.. note:: +:func:`~pandas.io.sql.tquery` returns a list of tuples corresponding to each row. + +:func:`~pandas.io.sql.uquery` does the same thing as tquery, but instead of +returning results it returns the number of related rows. + +In addition, the class :class:`~pandas.io.sql.PandasSQLWithEngine` can be +instantiated directly for more manual control over the SQL interaction. + +Engine connection examples +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + from sqlalchemy import create_engine + + engine = create_engine('postgresql://scott:tiger@localhost:5432/mydatabase') + + engine = create_engine('mysql+mysqldb://scott:tiger@localhost/foo') + + engine = create_engine('oracle://scott:tiger@127.0.0.1:1521/sidname') + + engine = create_engine('mssql+pyodbc://mydsn') + + # sqlite:/// + # where is relative: + engine = create_engine('sqlite:///foo.db') + + # or absolute, starting with a slash: + engine = create_engine('sqlite:////absolute/path/to/foo.db') + + +Legacy +~~~~~~ +To use the sqlite support without SQLAlchemy, you can create connections like so: + +.. code-block:: python + + import sqlite3 + from pandas.io import sql + cnx = sqlite3.connect(':memory:') + +And then issue the following queries, remembering to also specify the flavor of SQL +you are using. + +.. code-block:: python + + sql.to_sql(data, 'data', cnx, flavor='sqlite') + + sql.read_sql("SELECT * FROM data", cnx, flavor='sqlite') - For now, writing your DataFrame into a database works only with - **SQLite**. Moreover, the **index** will currently be **dropped**. .. _io.bigquery: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 345fc4dde7d9a..d09847a4be481 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -18,10 +18,6 @@ class SQLAlchemyRequired(ImportError): pass -class LegacyMySQLConnection(Exception): - pass - - class DatabaseError(IOError): pass @@ -37,7 +33,7 @@ def _convert_params(sql, params): return args -def execute(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): +def execute(sql, con, cur=None, params=[], flavor='sqlite'): """ Execute the given SQL query using the provided connection object. @@ -52,7 +48,8 @@ def execute(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): cur: depreciated, cursor is obtained from connection params: list or tuple, optional List of parameters to pass to execute method. - flavor : string {sqlite, mysql} specifying the flavor of SQL to use. + flavor : string "sqlite", "mysql" + Specifies the flavor of SQL to use. Ignored when using SQLAlchemy engine. Required when using DBAPI2 connection. Returns ------- @@ -63,7 +60,7 @@ def execute(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): return pandas_sql.execute(*args) -def tquery(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): +def tquery(sql, con, cur=None, params=[], flavor='sqlite'): """ Returns list of tuples corresponding to each row in given sql query. @@ -81,9 +78,15 @@ def tquery(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): cur: depreciated, cursor is obtained from connection params: list or tuple, optional List of parameters to pass to execute method. - flavor : string {sqlite, mysql} specifying the flavor of SQL to use. + flavor : string "sqlite", "mysql" + Specifies the flavor of SQL to use. Ignored when using SQLAlchemy engine. Required when using DBAPI2 connection. + Returns + ------- + Results Iterable """ + warnings.warn("tquery is depreciated, and will be removed in future versions", DeprecationWarning) + pandas_sql = pandasSQL_builder(con=con, flavor=flavor) args = _convert_params(sql, params) return pandas_sql.tquery(*args) @@ -105,9 +108,14 @@ def uquery(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): cur: depreciated, cursor is obtained from connection params: list or tuple, optional List of parameters to pass to execute method. - flavor : string {sqlite, mysql} specifying the flavor of SQL to use. + flavor : string "sqlite", "mysql" + Specifies the flavor of SQL to use. Ignored when using SQLAlchemy engine. Required when using DBAPI2 connection. + Returns + ------- + Number of affected rows """ + warnings.warn("uquery is depreciated, and will be removed in future versions", DeprecationWarning) pandas_sql = pandasSQL_builder(con=con, flavor=flavor) args = _convert_params(sql, params) return pandas_sql.uquery(*args) @@ -123,7 +131,7 @@ def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, param string. Optionally provide an index_col parameter to use one of the - columns as the index. Otherwise will be 0 to len(results) - 1. + columns as the index, otherwise default integer index will be used Parameters ---------- @@ -143,11 +151,11 @@ def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, param cur: depreciated, cursor is obtained from connection params: list or tuple, optional List of parameters to pass to execute method. - flavor : string {sqlite, mysql} specifying the flavor of SQL to use. - Ignored when using SQLAlchemy engine. Required when using DBAPI2 connection. + Returns + ------- + DataFrame """ - pandas_sql = pandasSQL_builder(con=con, flavor=flavor) return pandas_sql.read_sql(sql, index_col=index_col, params=params, coerce_float=coerce_float) @@ -174,6 +182,27 @@ def to_sql(frame, name, con, flavor='sqlite', if_exists='fail'): pandas_sql.to_sql(frame, name, if_exists=if_exists) +def has_table(table_name, con, meta=None, flavor='sqlite'): + """ + Check if DB has named table + + Parameters + ---------- + frame: DataFrame + name: name of SQL table + con: SQLAlchemy engine or DBAPI2 connection (legacy mode) + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object is given, a supported SQL flavor name must also be provided + flavor: {'sqlite', 'mysql'}, default 'sqlite', ignored when using engine + Returns + ------- + boolean + """ + pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + return pandas_sql.has_table(table_name) + + # This is an awesome function def read_table(table_name, con, meta=None, index_col=None, coerce_float=True): """Given a table name and SQLAlchemy engine, return a DataFrame. @@ -188,7 +217,9 @@ def read_table(table_name, con, meta=None, index_col=None, coerce_float=True): coerce_float : boolean, default True Attempt to convert values to non-string, non-numeric objects (like decimal.Decimal) to floating point. Can result in loss of Precision. - + Returns + ------- + DataFrame """ pandas_sql = PandasSQLWithEngine(con, meta=meta) table = pandas_sql.get_table(table_name) @@ -197,7 +228,7 @@ def read_table(table_name, con, meta=None, index_col=None, coerce_float=True): sql_select = table.select() return pandas_sql.read_sql(sql_select, index_col=index_col, coerce_float=coerce_float) else: - raise ValueError("Table %s not found with %s." % table_name, con) + raise ValueError("Table %s not found" % table_name, con) def pandasSQL_builder(con, flavor=None, meta=None): @@ -211,7 +242,7 @@ def pandasSQL_builder(con, flavor=None, meta=None): if isinstance(con, sqlalchemy.engine.Engine): return PandasSQLWithEngine(con, meta=meta) else: - warnings.warn("Not a valid SQLAlchemy engine, attempting to use as legacy DBAPI connection") + warnings.warn("Not an SQLAlchemy engine, attempting to use as legacy DBAPI connection") if flavor is None: raise ValueError("""PandasSQL must be created with an SQLAlchemy engine or a DBAPI2 connection and SQL flavour""") @@ -496,7 +527,7 @@ def read_sql(self, sql, index_col=None, coerce_float=True, params=[], flavor='sq index_col=index_col, coerce_float=coerce_float) - def to_sql(self, frame, name, con=None, if_exists='fail'): + def to_sql(self, frame, name, if_exists='fail'): """ Write records stored in a DataFrame to a SQL database. @@ -504,7 +535,6 @@ def to_sql(self, frame, name, con=None, if_exists='fail'): ---------- frame: DataFrame name: name of SQL table - con: an open SQL database connection object flavor: {'sqlite', 'mysql', 'postgres'}, default 'sqlite' if_exists: {'fail', 'replace', 'append'}, default 'fail' fail: If table exists, do nothing. @@ -618,8 +648,8 @@ def _get_sqltype(self, pytype): return _SQL_TYPES[pytype_name][self.flavor] -# legacy names -def get_schema(frame, name, con=None, flavor='sqlite', engine=None): +# legacy names, with depreciation warnings and copied docs +def get_schema(frame, name, con, flavor='sqlite'): """ Get the SQL db table schema for the given frame @@ -636,8 +666,21 @@ def get_schema(frame, name, con=None, flavor='sqlite', engine=None): return pandas_sql._create_sql_schema() +def read_frame(*args, **kwargs): + """DEPRECIATED - use read_sql + """ + warnings.warn("read_frame is depreciated, use read_sql", DeprecationWarning) + return read_sql(*args, **kwargs) + + +def write_frame(*args, **kwargs): + """DEPRECIATED - use to_sql + """ + warnings.warn("write_frame is depreciated, use to_sql", DeprecationWarning) + return to_sql(*args, **kwargs) + -#TODO: add depreciation warnings -read_frame = read_sql -write_frame = to_sql +#Append wrapped function docstrings +read_frame.__doc__ += read_sql.__doc__ +write_frame.__doc__ += to_sql.__doc__ diff --git a/pandas/io/sql_legacy.py b/pandas/io/sql_legacy.py deleted file mode 100644 index a8a5d968dd02d..0000000000000 --- a/pandas/io/sql_legacy.py +++ /dev/null @@ -1,332 +0,0 @@ -""" -Collection of query wrappers / abstractions to both facilitate data -retrieval and to reduce dependency on DB-specific API. -""" -from datetime import datetime, date - -import numpy as np -import traceback - -from pandas.core.datetools import format as date_format -from pandas.core.api import DataFrame, isnull - -#------------------------------------------------------------------------------ -# Helper execution function - - -def execute(sql, con, retry=True, cur=None, params=None): - """ - Execute the given SQL query using the provided connection object. - - Parameters - ---------- - sql: string - Query to be executed - con: database connection instance - Database connection. Must implement PEP249 (Database API v2.0). - retry: bool - Not currently implemented - cur: database cursor, optional - Must implement PEP249 (Datbase API v2.0). If cursor is not provided, - one will be obtained from the database connection. - params: list or tuple, optional - List of parameters to pass to execute method. - - Returns - ------- - Cursor object - """ - try: - if cur is None: - cur = con.cursor() - - if params is None: - cur.execute(sql) - else: - cur.execute(sql, params) - return cur - except Exception: - try: - con.rollback() - except Exception: # pragma: no cover - pass - - print ('Error on sql %s' % sql) - raise - - -def _safe_fetch(cur): - try: - result = cur.fetchall() - if not isinstance(result, list): - result = list(result) - return result - except Exception, e: # pragma: no cover - excName = e.__class__.__name__ - if excName == 'OperationalError': - return [] - - -def tquery(sql, con=None, cur=None, retry=True): - """ - Returns list of tuples corresponding to each row in given sql - query. - - If only one column selected, then plain list is returned. - - Parameters - ---------- - sql: string - SQL query to be executed - con: SQLConnection or DB API 2.0-compliant connection - cur: DB API 2.0 cursor - - Provide a specific connection or a specific cursor if you are executing a - lot of sequential statements and want to commit outside. - """ - cur = execute(sql, con, cur=cur) - result = _safe_fetch(cur) - - if con is not None: - try: - cur.close() - con.commit() - except Exception as e: - excName = e.__class__.__name__ - if excName == 'OperationalError': # pragma: no cover - print ('Failed to commit, may need to restart interpreter') - else: - raise - - traceback.print_exc() - if retry: - return tquery(sql, con=con, retry=False) - - if result and len(result[0]) == 1: - # python 3 compat - result = list(list(zip(*result))[0]) - elif result is None: # pragma: no cover - result = [] - - return result - - -def uquery(sql, con=None, cur=None, retry=True, params=None): - """ - Does the same thing as tquery, but instead of returning results, it - returns the number of rows affected. Good for update queries. - """ - cur = execute(sql, con, cur=cur, retry=retry, params=params) - - result = cur.rowcount - try: - con.commit() - except Exception as e: - excName = e.__class__.__name__ - if excName != 'OperationalError': - raise - - traceback.print_exc() - if retry: - print ('Looks like your connection failed, reconnecting...') - return uquery(sql, con, retry=False) - return result - - -def read_frame(sql, con, index_col=None, coerce_float=True, params=None): - """ - Returns a DataFrame corresponding to the result set of the query - string. - - Optionally provide an index_col parameter to use one of the - columns as the index. Otherwise will be 0 to len(results) - 1. - - Parameters - ---------- - sql: string - SQL query to be executed - con: DB connection object, optional - index_col: string, optional - column name to use for the returned DataFrame object. - coerce_float : boolean, default True - Attempt to convert values to non-string, non-numeric objects (like - decimal.Decimal) to floating point, useful for SQL result sets - params: list or tuple, optional - List of parameters to pass to execute method. - """ - cur = execute(sql, con, params=params) - rows = _safe_fetch(cur) - columns = [col_desc[0] for col_desc in cur.description] - - cur.close() - con.commit() - - result = DataFrame.from_records(rows, columns=columns, - coerce_float=coerce_float) - - if index_col is not None: - result = result.set_index(index_col) - - return result - -frame_query = read_frame -read_sql = read_frame - - -def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): - """ - Write records stored in a DataFrame to a SQL database. - - Parameters - ---------- - frame: DataFrame - name: name of SQL table - con: an open SQL database connection object - flavor: {'sqlite', 'mysql', 'oracle'}, default 'sqlite' - if_exists: {'fail', 'replace', 'append'}, default 'fail' - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. - """ - - if 'append' in kwargs: - import warnings - warnings.warn("append is deprecated, use if_exists instead", - FutureWarning) - if kwargs['append']: - if_exists='append' - else: - if_exists='fail' - exists = table_exists(name, con, flavor) - if if_exists == 'fail' and exists: - raise ValueError, "Table '%s' already exists." % name - - #create or drop-recreate if necessary - create = None - if exists and if_exists == 'replace': - create = "DROP TABLE %s" % name - elif not exists: - create = get_schema(frame, name, flavor) - - if create is not None: - cur = con.cursor() - cur.execute(create) - cur.close() - - cur = con.cursor() - # Replace spaces in DataFrame column names with _. - safe_names = [s.replace(' ', '_').strip() for s in frame.columns] - flavor_picker = {'sqlite' : _write_sqlite, - 'mysql' : _write_mysql} - - func = flavor_picker.get(flavor, None) - if func is None: - raise NotImplementedError - func(frame, name, safe_names, cur) - cur.close() - con.commit() - - -def _write_sqlite(frame, table, names, cur): - bracketed_names = ['[' + column + ']' for column in names] - col_names = ','.join(bracketed_names) - wildcards = ','.join(['?'] * len(names)) - insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % ( - table, col_names, wildcards) - # pandas types are badly handled if there is only 1 column ( Issue #3628 ) - if not len(frame.columns )==1 : - data = [tuple(x) for x in frame.values] - else : - data = [tuple(x) for x in frame.values.tolist()] - cur.executemany(insert_query, data) - - -def _write_mysql(frame, table, names, cur): - bracketed_names = ['`' + column + '`' for column in names] - col_names = ','.join(bracketed_names) - wildcards = ','.join([r'%s'] * len(names)) - insert_query = "INSERT INTO %s (%s) VALUES (%s)" % ( - table, col_names, wildcards) - data = [tuple(x) for x in frame.values] - cur.executemany(insert_query, data) - - -def table_exists(name, con, flavor): - flavor_map = { - 'sqlite': ("SELECT name FROM sqlite_master " - "WHERE type='table' AND name='%s';") % name, - 'mysql' : "SHOW TABLES LIKE '%s'" % name} - query = flavor_map.get(flavor, None) - if query is None: - raise NotImplementedError - return len(tquery(query, con)) > 0 - - -def get_sqltype(pytype, flavor): - sqltype = {'mysql': 'VARCHAR (63)', - 'sqlite': 'TEXT'} - - if issubclass(pytype, np.floating): - sqltype['mysql'] = 'FLOAT' - sqltype['sqlite'] = 'REAL' - - if issubclass(pytype, np.integer): - #TODO: Refine integer size. - sqltype['mysql'] = 'BIGINT' - sqltype['sqlite'] = 'INTEGER' - - if issubclass(pytype, np.datetime64) or pytype is datetime: - # Caution: np.datetime64 is also a subclass of np.number. - sqltype['mysql'] = 'DATETIME' - sqltype['sqlite'] = 'TIMESTAMP' - - if pytype is datetime.date: - sqltype['mysql'] = 'DATE' - sqltype['sqlite'] = 'TIMESTAMP' - - if issubclass(pytype, np.bool_): - sqltype['sqlite'] = 'INTEGER' - - return sqltype[flavor] - - -def get_schema(frame, name, flavor, keys=None): - "Return a CREATE TABLE statement to suit the contents of a DataFrame." - lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) - # Replace spaces in DataFrame column names with _. - safe_columns = [s.replace(' ', '_').strip() for s in frame.dtypes.index] - column_types = zip(safe_columns, map(lookup_type, frame.dtypes)) - if flavor == 'sqlite': - columns = ',\n '.join('[%s] %s' % x for x in column_types) - else: - columns = ',\n '.join('`%s` %s' % x for x in column_types) - - keystr = '' - if keys is not None: - if isinstance(keys, basestring): - keys = (keys,) - keystr = ', PRIMARY KEY (%s)' % ','.join(keys) - template = """CREATE TABLE %(name)s ( - %(columns)s - %(keystr)s - );""" - create_statement = template % {'name': name, 'columns': columns, - 'keystr': keystr} - return create_statement - - -def sequence2dict(seq): - """Helper function for cx_Oracle. - - For each element in the sequence, creates a dictionary item equal - to the element and keyed by the position of the item in the list. - >>> sequence2dict(("Matt", 1)) - {'1': 'Matt', '2': 1} - - Source: - http://www.gingerandjohn.com/archives/2004/02/26/cx_oracle-executemany-example/ - """ - d = {} - for k,v in zip(range(1, 1 + len(seq)), seq): - d[str(k)] = v - return d diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index d99b0d20a04fd..3b5e0ff4dfb72 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -6,29 +6,24 @@ import numpy as np -#from pandas.core.datetools import format as date_format from pandas import DataFrame from pandas.compat import range, lrange, iteritems - +#from pandas.core.datetools import format as date_format import pandas.io.sql as sql import pandas.util.testing as tm -import sqlalchemy - -class TestSQLAlchemy(unittest.TestCase): - ''' - Test the sqlalchemy backend against an in-memory sqlite database. - Assume that sqlalchemy takes case of the DB specifics - ''' +try: + import sqlalchemy + SQLALCHEMY_INSTALLED = True +except ImportError: + SQLALCHEMY_INSTALLED = False - def setUp(self): - self.engine = sqlalchemy.create_engine('sqlite:///:memory:') - self._load_iris_data(self.engine) - self.test_frame_time = tm.makeTimeDataFrame() - self._load_test1_data() +class PandasSQLTest(unittest.TestCase): + """Base class with common private methods for + SQLAlchemy and fallback case test suits""" def _load_iris_data(self, engine): self.dirpath = tm.get_data_path() @@ -49,7 +44,14 @@ def _load_iris_data(self, engine): VALUES(?, ?, ?, ?, ?) """ for row in r: - engine.execute(ins, *row) + engine.execute(ins, row) + + def _check_iris_loaded_frame(self, iris_frame): + pytype = iris_frame.dtypes[0].type + row = iris_frame.iloc[0] + + self.assertTrue(issubclass(pytype, np.floating), 'Loaded frame has incorrect type') + tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def _load_test1_data(self): test1_csv_file = os.path.join(self.dirpath, 'test1.csv') @@ -58,106 +60,165 @@ def _load_test1_data(self): dr = csv.DictReader(test1_csv) self.test_frame1 = DataFrame(list(dr)) - def _test_iris_loaded_frame(self, iris_frame): - pytype = iris_frame.dtypes[0].type - row = iris_frame.iloc[0] + def _count_rows(self, table_name, con): + result = con.execute("SELECT count(*) AS count_1 FROM %s" % table_name).fetchone() + return result[0] - self.assertTrue(issubclass(pytype, np.floating), 'Loaded frame has incorrect type') - tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) + def _read_sql_iris(self): + iris_frame = self.pandasSQL.read_sql("SELECT * FROM iris") + self._check_iris_loaded_frame(iris_frame) - def test_read_sql(self): - iris_frame = sql.read_sql("SELECT * FROM iris", con=self.engine) - self._test_iris_loaded_frame(iris_frame) - - def test_read_table(self): - iris_frame = sql.read_table("iris", con=self.engine) - self._test_iris_loaded_frame(iris_frame) - - def test_to_sql(self): + def _to_sql(self): # Nuke table - self.engine.execute("DROP TABLE IF EXISTS test_frame1") + self.drop_table('test_frame1', self.conn) - sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine) - self.assertTrue(self.engine.has_table('test_frame1'), 'Table not written to DB') + self.pandasSQL.to_sql(self.test_frame1, 'test_frame1') + self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') # Nuke table - self.engine.execute("DROP TABLE IF EXISTS test_frame1") + self.drop_table('test_frame1', self.conn) - def test_to_sql_fail(self): + def _to_sql_fail(self): # Nuke table - self.engine.execute("DROP TABLE IF EXISTS test_frame1") + self.drop_table('test_frame1', self.conn) - sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine, if_exists='fail') - self.assertTrue(self.engine.has_table('test_frame1'), 'Table not written to DB') + self.pandasSQL.to_sql(self.test_frame1, 'test_frame1', if_exists='fail') + self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') - self.assertRaises(ValueError, sql.to_sql, self.test_frame1, 'test_frame1', con=self.engine, if_exists='fail') + self.assertRaises(ValueError, self.pandasSQL.to_sql, self.test_frame1, 'test_frame1', if_exists='fail') # Nuke table - self.engine.execute("DROP TABLE IF EXISTS test_frame1") + self.drop_table('test_frame1', self.conn) - def test_to_sql_replace(self): + def _to_sql_replace(self): # Nuke table just in case - self.engine.execute("DROP TABLE IF EXISTS test_frame1") - sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine, if_exists='fail') + self.drop_table('test_frame1', self.conn) + + self.pandasSQL.to_sql(self.test_frame1, 'test_frame1', if_exists='fail') # Add to table again - sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine, if_exists='replace') - self.assertTrue(self.engine.has_table('test_frame1'), 'Table not written to DB') + self.pandasSQL.to_sql(self.test_frame1, 'test_frame1', if_exists='replace') + self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') num_entries = len(self.test_frame1) - - result = self.engine.execute("SELECT count(*) AS count_1 FROM test_frame1").fetchone() - num_rows = result[0] + num_rows = self._count_rows('test_frame1', self.conn) self.assertEqual(num_rows, num_entries, "not the same number of rows as entries") # Nuke table - self.engine.execute("DROP TABLE IF EXISTS test_frame1") + self.drop_table('test_frame1', self.conn) - def test_to_sql_append(self): + def _to_sql_append(self): # Nuke table just in case - self.engine.execute("DROP TABLE IF EXISTS test_frame1") - sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine, if_exists='fail') + self.drop_table('test_frame1', self.conn) + + self.pandasSQL.to_sql(self.test_frame1, 'test_frame1', if_exists='fail') + # Add to table again - sql.to_sql(self.test_frame1, 'test_frame1', con=self.engine, if_exists='append') - self.assertTrue(self.engine.has_table('test_frame1'), 'Table not written to DB') + self.pandasSQL.to_sql(self.test_frame1, 'test_frame1', if_exists='append') + self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') num_entries = 2*len(self.test_frame1) - result = self.engine.execute("SELECT count(*) AS count_1 FROM test_frame1").fetchone() - num_rows = result[0] + num_rows = self._count_rows('test_frame1', self.conn) self.assertEqual(num_rows, num_entries, "not the same number of rows as entries") # Nuke table - self.engine.execute("DROP TABLE IF EXISTS test_frame1") + self.drop_table('test_frame1', self.conn) - def test_create_table(self): - temp_engine = sqlalchemy.create_engine('sqlite:///:memory:') - temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) + def _roundtrip(self): + self.pandasSQL.to_sql(self.test_frame1, 'test_frame_roundtrip') + result = self.pandasSQL.read_sql('SELECT * FROM test_frame_roundtrip') - pandasSQL = sql.PandasSQLWithEngine(temp_engine) - pandasSQL._create_table(temp_frame, 'temp_frame') + # HACK! + result.index = self.test_frame1.index - self.assertTrue(temp_engine.has_table('temp_frame'), 'Table not written to DB') + tm.assert_frame_equal(result, self.test_frame1) - def test_drop_table(self): - temp_engine = sqlalchemy.create_engine('sqlite:///:memory:') + def _execute_sql(self): + # drop_sql = "DROP TABLE IF EXISTS test" # should already be done + iris_results = self.pandasSQL.execute("SELECT * FROM iris") + row = iris_results.fetchone() + tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) - temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) + def _tquery(self): + iris_results = self.pandasSQL.tquery("SELECT * FROM iris") + row = iris_results[0] + tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) - pandasSQL = sql.PandasSQLWithEngine(temp_engine) - pandasSQL._create_table(temp_frame, 'temp_frame') - self.assertTrue(temp_engine.has_table('temp_frame'), 'Table not written to DB') +class TestSQLApi(PandasSQLTest): + """Test the public API as it would be used + directly, including legacy names - pandasSQL._drop_table('temp_frame') + Notes: + flavor can always be passed even in SQLAlchemy mode, + should be correctly ignored. - self.assertFalse(temp_engine.has_table('temp_frame'), 'Table not deleted from DB') + we don't use drop_table because that isn't part of the public api - def test_roundtrip(self): - #temp_engine = sqlalchemy.create_engine('sqlite:///:memory:') + """ + def connect(self): + if SQLALCHEMY_INSTALLED: + return sqlalchemy.create_engine('sqlite:///:memory:') + else: + return sqlite3.connect(':memory:') - sql.to_sql(self.test_frame1, 'test_frame_roundtrip', con=self.engine) - result = sql.read_table('test_frame_roundtrip', con=self.engine) + def setUp(self): + self.conn = self.connect() + self._load_iris_data(self.conn) + self._load_test1_data() + + def test_read_sql_iris(self): + iris_frame = sql.read_sql("SELECT * FROM iris", self.conn, flavor='sqlite') + self._check_iris_loaded_frame(iris_frame) + + def test_legacy_read_frame(self): + """Test legacy name read_frame""" + iris_frame = sql.read_frame("SELECT * FROM iris", self.conn, flavor='sqlite') + self._check_iris_loaded_frame(iris_frame) + + def test_to_sql(self): + sql.to_sql(self.test_frame1, 'test_frame1', self.conn, flavor='sqlite') + self.assertTrue(sql.has_table('test_frame1', self.conn, flavor='sqlite'), 'Table not written to DB') + + def test_to_sql_fail(self): + sql.to_sql(self.test_frame1, 'test_frame2', self.conn, flavor='sqlite', if_exists='fail') + self.assertTrue(sql.has_table('test_frame2', self.conn, flavor='sqlite'), 'Table not written to DB') + + self.assertRaises(ValueError, sql.to_sql, self.test_frame1, 'test_frame2', self.conn, flavor='sqlite', if_exists='fail') + + def test_to_sql_replace(self): + sql.to_sql(self.test_frame1, 'test_frame3', self.conn, flavor='sqlite', if_exists='fail') + # Add to table again + sql.to_sql(self.test_frame1, 'test_frame3', self.conn, flavor='sqlite', if_exists='replace') + self.assertTrue(sql.has_table('test_frame3', self.conn, flavor='sqlite'), 'Table not written to DB') + + num_entries = len(self.test_frame1) + num_rows = self._count_rows('test_frame3', self.conn) + + self.assertEqual(num_rows, num_entries, "not the same number of rows as entries") + + def test_to_sql_append(self): + sql.to_sql(self.test_frame1, 'test_frame4', self.conn, flavor='sqlite', if_exists='fail') + + # Add to table again + sql.to_sql(self.test_frame1, 'test_frame4', self.conn, flavor='sqlite', if_exists='append') + self.assertTrue(sql.has_table('test_frame4', self.conn, flavor='sqlite'), 'Table not written to DB') + + num_entries = 2*len(self.test_frame1) + num_rows = self._count_rows('test_frame4', self.conn) + + self.assertEqual(num_rows, num_entries, "not the same number of rows as entries") + + def test_legacy_write_frame(self): + """Test legacy write frame name. + Assume that functionality is already tested above so just do quick check that it basically works""" + sql.write_frame(self.test_frame1, 'test_frame_legacy', self.conn, flavor='sqlite') + self.assertTrue(sql.has_table('test_frame_legacy', self.conn, flavor='sqlite'), 'Table not written to DB') + + def test_roundtrip(self): + sql.to_sql(self.test_frame1, 'test_frame_roundtrip', con=self.conn, flavor='sqlite') + result = sql.read_sql('SELECT * FROM test_frame_roundtrip', con=self.conn, flavor='sqlite') # HACK! result.index = self.test_frame1.index @@ -166,147 +227,138 @@ def test_roundtrip(self): def test_execute_sql(self): # drop_sql = "DROP TABLE IF EXISTS test" # should already be done - iris_results = sql.execute("SELECT * FROM iris", con=self.engine) + iris_results = sql.execute("SELECT * FROM iris", con=self.conn, flavor='sqlite') row = iris_results.fetchone() tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def test_tquery(self): - iris_results = sql.tquery("SELECT * FROM iris", con=self.engine) + iris_results = sql.tquery("SELECT * FROM iris", con=self.conn, flavor='sqlite') row = iris_results[0] tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) -# --- Test SQLITE fallback - -class TestSQLite(unittest.TestCase): +class TestSQLAlchemy(PandasSQLTest): ''' Test the sqlalchemy backend against an in-memory sqlite database. Assume that sqlalchemy takes case of the DB specifics ''' + def connect(self): + return sqlalchemy.create_engine('sqlite:///:memory:') + + def drop_table(self, table_name, conn): + conn.execute("DROP TABLE IF EXISTS %s" % table_name) def setUp(self): - self.conn = sqlite3.connect(':memory:') - self.pandasSQL = sql.PandasSQLWithCon(self.conn, 'sqlite') + # Skip this test if SQLAlchemy not available + if not SQLALCHEMY_INSTALLED: + raise unittest.SkipTest('SQLAlchemy not installed') + + self.conn = self.connect() + self.pandasSQL = sql.PandasSQLWithEngine(self.conn) self._load_iris_data(self.conn) - self.test_frame_time = tm.makeTimeDataFrame() self._load_test1_data() - def _load_iris_data(self, conn): - self.dirpath = tm.get_data_path() - iris_csv_file = os.path.join(self.dirpath, 'iris.csv') - cur = conn.cursor() - cur.execute("""CREATE TABLE iris ( - `SepalLength` REAL, - `SepalWidth` REAL, - `PetalLength` REAL, - `PetalWidth` REAL, - `Name` TEXT - )""") + def test_read_sql(self): + self._read_sql_iris() - with open(iris_csv_file, 'rU') as iris_csv: - r = csv.reader(iris_csv) - next(r) # skip header row - ins = """ - INSERT INTO iris - VALUES(?, ?, ?, ?, ?) - """ - for row in r: - cur.execute(ins, row) - conn.commit() + def test_read_table(self): + iris_frame = sql.read_table("iris", con=self.conn) + self._check_iris_loaded_frame(iris_frame) - def _load_test1_data(self): - test1_csv_file = os.path.join(self.dirpath, 'test1.csv') + def test_read_table_absent(self): + self.assertRaises(ValueError, sql.read_table, "this_doesnt_exist", con=self.conn) - with open(test1_csv_file, 'rU') as test1_csv: - dr = csv.DictReader(test1_csv) - self.test_frame1 = DataFrame(list(dr)) + def test_to_sql(self): + self._to_sql() - def test_read_sql(self): - iris_frame = sql.read_sql("SELECT * FROM iris", con=self.conn) - pytype = iris_frame.dtypes[0].type - row = iris_frame.iloc[0] + def test_to_sql_fail(self): + self._to_sql_fail() - self.assertTrue(issubclass(pytype, np.floating), 'Loaded frame has incorrect type') - tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) + def test_to_sql_replace(self): + self._to_sql_replace() - def test_to_sql(self): - # Nuke table - cur = self.conn.cursor() - cur.execute("DROP TABLE IF EXISTS test_frame1") - self.conn.commit() - - sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, flavor='sqlite') - self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') + def test_to_sql_append(self): + self._to_sql_append() - # Nuke table - cur = self.conn.cursor() - cur.execute("DROP TABLE IF EXISTS test_frame1") - self.conn.commit() + def test_create_table(self): + temp_conn = self.connect() + temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) - def test_to_sql_fail(self): - # Nuke table - cur = self.conn.cursor() - cur.execute("DROP TABLE IF EXISTS test_frame1") - self.conn.commit() - sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, if_exists='fail', flavor='sqlite') - self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') + pandasSQL = sql.PandasSQLWithEngine(temp_conn) + pandasSQL._create_table(temp_frame, 'temp_frame') - self.assertRaises(ValueError, sql.to_sql, self.test_frame1, 'test_frame1', con=self.conn, if_exists='fail') + self.assertTrue(temp_conn.has_table('temp_frame'), 'Table not written to DB') - # Nuke table - cur = self.conn.cursor() - cur.execute("DROP TABLE IF EXISTS test_frame1") - self.conn.commit() + def test_drop_table(self): + temp_conn = self.connect() - def test_to_sql_replace(self): - # Nuke table just in case - cur = self.conn.cursor() - cur.execute("DROP TABLE IF EXISTS test_frame1") - self.conn.commit() - sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, if_exists='fail', flavor='sqlite') - # Add to table again - sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, if_exists='replace') - self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') + temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) - num_entries = len(self.test_frame1) + pandasSQL = sql.PandasSQLWithEngine(temp_conn) + pandasSQL._create_table(temp_frame, 'temp_frame') - result = self.conn.execute("SELECT count(*) AS count_1 FROM test_frame1").fetchone() - num_rows = result[0] + self.assertTrue(temp_conn.has_table('temp_frame'), 'Table not written to DB') - self.assertEqual(num_rows, num_entries, "not the same number of rows as entries") + pandasSQL._drop_table('temp_frame') - # Nuke table - cur = self.conn.cursor() - cur.execute("DROP TABLE IF EXISTS test_frame1") - self.conn.commit() + self.assertFalse(temp_conn.has_table('temp_frame'), 'Table not deleted from DB') - def test_to_sql_append(self): - # Nuke table just in case - cur = self.conn.cursor() - cur.execute("DROP TABLE IF EXISTS test_frame1") - self.conn.commit() + def test_roundtrip(self): + self._roundtrip() - sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, if_exists='fail', flavor='sqlite') + def test_execute_sql(self): + self._execute_sql() - # Add to table again - sql.to_sql(self.test_frame1, 'test_frame1', con=self.conn, if_exists='append') - self.assertTrue(self.pandasSQL.has_table('test_frame1'), 'Table not written to DB') + def test_tquery(self): + self._tquery() - num_entries = 2*len(self.test_frame1) - result = self.conn.execute("SELECT count(*) AS count_1 FROM test_frame1").fetchone() - num_rows = result[0] - self.assertEqual(num_rows, num_entries, "not the same number of rows as entries") +# --- Test SQLITE fallback - # Nuke table + +class TestSQLite(PandasSQLTest): + ''' + Test the sqlalchemy backend against an in-memory sqlite database. + Assume that sqlalchemy takes case of the DB specifics + ''' + def connect(self): + return sqlite3.connect(':memory:') + + def drop_table(self, table_name, conn): cur = self.conn.cursor() - cur.execute("DROP TABLE IF EXISTS test_frame1") + cur.execute("DROP TABLE IF EXISTS %s" % table_name) self.conn.commit() + def setUp(self): + self.conn = self.connect() + self.pandasSQL = sql.PandasSQLWithCon(self.conn, 'sqlite') + + self._load_iris_data(self.conn) + + self._load_test1_data() + + def test_invalid_flavor(self): + self.assertRaises(NotImplementedError, sql.PandasSQLWithCon, self.conn, 'oracle') + + def test_read_sql(self): + self._read_sql_iris() + + def test_to_sql(self): + self._to_sql() + + def test_to_sql_fail(self): + self._to_sql_fail() + + def test_to_sql_replace(self): + self._to_sql_replace() + + def test_to_sql_append(self): + self._to_sql_append() + def test_create_table(self): - temp_conn = sqlite3.connect(':memory:') + temp_conn = self.connect() temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) pandasSQL = sql.PandasSQLWithCon(temp_conn, 'sqlite') @@ -315,7 +367,7 @@ def test_create_table(self): self.assertTrue(pandasSQL.has_table('temp_frame'), 'Table not written to DB') def test_drop_table(self): - temp_conn = sqlite3.connect(':memory:') + temp_conn = self.connect() temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) @@ -329,27 +381,13 @@ def test_drop_table(self): self.assertFalse(pandasSQL.has_table('temp_frame'), 'Table not deleted from DB') def test_roundtrip(self): - - sql.to_sql(self.test_frame1, 'test_frame_roundtrip', con=self.conn, flavor='sqlite') - result = sql.read_sql('SELECT * FROM test_frame_roundtrip', con=self.conn, flavor='sqlite') - - # HACK! - result.index = self.test_frame1.index - - tm.assert_frame_equal(result, self.test_frame1) + self._roundtrip() def test_execute_sql(self): - # drop_sql = "DROP TABLE IF EXISTS test" # should already be done - iris_results = sql.execute("SELECT * FROM iris", con=self.conn, flavor='sqlite') - row = iris_results.fetchone() - tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) + self._execute_sql() def test_tquery(self): - iris_results = sql.tquery("SELECT * FROM iris", con=self.conn, flavor='sqlite') - row = iris_results[0] - tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) - - + self._tquery() """ diff --git a/pandas/io/tests/test_sql_legacy.py b/pandas/io/tests/test_sql_legacy.py deleted file mode 100644 index 3c6e992097d30..0000000000000 --- a/pandas/io/tests/test_sql_legacy.py +++ /dev/null @@ -1,497 +0,0 @@ -from __future__ import with_statement -from pandas.compat import StringIO -import unittest -import sqlite3 -import sys - -import warnings - -import nose - -import numpy as np - -from pandas.core.datetools import format as date_format -from pandas.core.api import DataFrame, isnull -from pandas.compat import StringIO, range, lrange -import pandas.compat as compat - -import pandas.io.sql as sql -from pandas.io.sql import DatabaseError -import pandas.util.testing as tm -from pandas import Series, Index, DataFrame -from datetime import datetime - -_formatters = { - datetime: lambda dt: "'%s'" % date_format(dt), - str: lambda x: "'%s'" % x, - np.str_: lambda x: "'%s'" % x, - compat.text_type: lambda x: "'%s'" % x, - compat.binary_type: lambda x: "'%s'" % x, - float: lambda x: "%.8f" % x, - int: lambda x: "%s" % x, - type(None): lambda x: "NULL", - np.float64: lambda x: "%.10f" % x, - bool: lambda x: "'%s'" % x, -} - -def format_query(sql, *args): - """ - - """ - processed_args = [] - for arg in args: - if isinstance(arg, float) and isnull(arg): - arg = None - - formatter = _formatters[type(arg)] - processed_args.append(formatter(arg)) - - return sql % tuple(processed_args) - -def _skip_if_no_MySQLdb(): - try: - import MySQLdb - except ImportError: - raise nose.SkipTest('MySQLdb not installed, skipping') - -class TestSQLite(unittest.TestCase): - - def setUp(self): - self.db = sqlite3.connect(':memory:') - - def test_basic(self): - frame = tm.makeTimeDataFrame() - self._check_roundtrip(frame) - - def test_write_row_by_row(self): - frame = tm.makeTimeDataFrame() - frame.ix[0, 0] = np.nan - create_sql = sql.get_schema(frame, 'test', 'sqlite') - cur = self.db.cursor() - cur.execute(create_sql) - - cur = self.db.cursor() - - ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" - for idx, row in frame.iterrows(): - fmt_sql = format_query(ins, *row) - sql.tquery(fmt_sql, cur=cur) - - self.db.commit() - - result = sql.read_frame("select * from test", con=self.db) - result.index = frame.index - tm.assert_frame_equal(result, frame) - - def test_execute(self): - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'sqlite') - cur = self.db.cursor() - cur.execute(create_sql) - ins = "INSERT INTO test VALUES (?, ?, ?, ?)" - - row = frame.ix[0] - sql.execute(ins, self.db, params=tuple(row)) - self.db.commit() - - result = sql.read_frame("select * from test", self.db) - result.index = frame.index[:1] - tm.assert_frame_equal(result, frame[:1]) - - def test_schema(self): - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'sqlite') - lines = create_sql.splitlines() - for l in lines: - tokens = l.split(' ') - if len(tokens) == 2 and tokens[0] == 'A': - self.assert_(tokens[1] == 'DATETIME') - - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'sqlite', keys=['A', 'B'],) - lines = create_sql.splitlines() - self.assert_('PRIMARY KEY (A,B)' in create_sql) - cur = self.db.cursor() - cur.execute(create_sql) - - def test_execute_fail(self): - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a, b) - ); - """ - cur = self.db.cursor() - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) - - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.execute, - 'INSERT INTO test VALUES("foo", "bar", 7)', - self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_execute_closed_connection(self): - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a, b) - ); - """ - cur = self.db.cursor() - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - self.db.close() - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.tquery, "select * from test", - con=self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_na_roundtrip(self): - pass - - def _check_roundtrip(self, frame): - sql.write_frame(frame, name='test_table', con=self.db) - result = sql.read_frame("select * from test_table", self.db) - - # HACK! Change this once indexes are handled properly. - result.index = frame.index - - expected = frame - tm.assert_frame_equal(result, expected) - - frame['txt'] = ['a'] * len(frame) - frame2 = frame.copy() - frame2['Idx'] = Index(lrange(len(frame2))) + 10 - sql.write_frame(frame2, name='test_table2', con=self.db) - result = sql.read_frame("select * from test_table2", self.db, - index_col='Idx') - expected = frame.copy() - expected.index = Index(lrange(len(frame2))) + 10 - expected.index.name = 'Idx' - print(expected.index.names) - print(result.index.names) - tm.assert_frame_equal(expected, result) - - def test_tquery(self): - frame = tm.makeTimeDataFrame() - sql.write_frame(frame, name='test_table', con=self.db) - result = sql.tquery("select A from test_table", self.db) - expected = frame.A - result = Series(result, frame.index) - tm.assert_series_equal(result, expected) - - try: - sys.stdout = StringIO() - self.assertRaises(DatabaseError, sql.tquery, - 'select * from blah', con=self.db) - - self.assertRaises(DatabaseError, sql.tquery, - 'select * from blah', con=self.db, retry=True) - finally: - sys.stdout = sys.__stdout__ - - def test_uquery(self): - frame = tm.makeTimeDataFrame() - sql.write_frame(frame, name='test_table', con=self.db) - stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' - self.assertEqual(sql.uquery(stmt, con=self.db), 1) - - try: - sys.stdout = StringIO() - - self.assertRaises(DatabaseError, sql.tquery, - 'insert into blah values (1)', con=self.db) - - self.assertRaises(DatabaseError, sql.tquery, - 'insert into blah values (1)', con=self.db, - retry=True) - finally: - sys.stdout = sys.__stdout__ - - def test_keyword_as_column_names(self): - ''' - ''' - df = DataFrame({'From':np.ones(5)}) - sql.write_frame(df, con = self.db, name = 'testkeywords') - - def test_onecolumn_of_integer(self): - ''' - GH 3628 - a column_of_integers dataframe should transfer well to sql - ''' - mono_df=DataFrame([1 , 2], columns=['c0']) - sql.write_frame(mono_df, con = self.db, name = 'mono_df') - # computing the sum via sql - con_x=self.db - the_sum=sum([my_c0[0] for my_c0 in con_x.execute("select * from mono_df")]) - # it should not fail, and gives 3 ( Issue #3628 ) - self.assertEqual(the_sum , 3) - - result = sql.read_frame("select * from mono_df",con_x) - tm.assert_frame_equal(result,mono_df) - - -class TestMySQL(unittest.TestCase): - - def setUp(self): - _skip_if_no_MySQLdb() - import MySQLdb - try: - # Try Travis defaults. - # No real user should allow root access with a blank password. - self.db = MySQLdb.connect(host='localhost', user='root', passwd='', - db='pandas_nosetest') - except: - pass - else: - return - try: - self.db = MySQLdb.connect(read_default_group='pandas') - except MySQLdb.ProgrammingError as e: - raise nose.SkipTest( - "Create a group of connection parameters under the heading " - "[pandas] in your system's mysql default file, " - "typically located at ~/.my.cnf or /etc/.my.cnf. ") - except MySQLdb.Error as e: - raise nose.SkipTest( - "Cannot connect to database. " - "Create a group of connection parameters under the heading " - "[pandas] in your system's mysql default file, " - "typically located at ~/.my.cnf or /etc/.my.cnf. ") - - def test_basic(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "For more robust support.*") - self._check_roundtrip(frame) - - def test_write_row_by_row(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - frame.ix[0, 0] = np.nan - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = sql.get_schema(frame, 'test', 'mysql') - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" - for idx, row in frame.iterrows(): - fmt_sql = format_query(ins, *row) - sql.tquery(fmt_sql, cur=cur) - - self.db.commit() - - result = sql.read_frame("select * from test", con=self.db) - result.index = frame.index - tm.assert_frame_equal(result, frame) - - def test_execute(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = sql.get_schema(frame, 'test', 'mysql') - cur = self.db.cursor() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Unknown table.*") - cur.execute(drop_sql) - cur.execute(create_sql) - ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" - - row = frame.ix[0] - sql.execute(ins, self.db, params=tuple(row)) - self.db.commit() - - result = sql.read_frame("select * from test", self.db) - result.index = frame.index[:1] - tm.assert_frame_equal(result, frame[:1]) - - def test_schema(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'mysql') - lines = create_sql.splitlines() - for l in lines: - tokens = l.split(' ') - if len(tokens) == 2 and tokens[0] == 'A': - self.assert_(tokens[1] == 'DATETIME') - - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = sql.get_schema(frame, 'test', 'mysql', keys=['A', 'B'],) - lines = create_sql.splitlines() - self.assert_('PRIMARY KEY (A,B)' in create_sql) - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - - def test_execute_fail(self): - _skip_if_no_MySQLdb() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a(5), b(5)) - ); - """ - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) - - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.execute, - 'INSERT INTO test VALUES("foo", "bar", 7)', - self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_execute_closed_connection(self): - _skip_if_no_MySQLdb() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a(5), b(5)) - ); - """ - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - self.db.close() - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.tquery, "select * from test", - con=self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_na_roundtrip(self): - _skip_if_no_MySQLdb() - pass - - def _check_roundtrip(self, frame): - _skip_if_no_MySQLdb() - drop_sql = "DROP TABLE IF EXISTS test_table" - cur = self.db.cursor() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Unknown table.*") - cur.execute(drop_sql) - sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') - result = sql.read_frame("select * from test_table", self.db) - - # HACK! Change this once indexes are handled properly. - result.index = frame.index - result.index.name = frame.index.name - - expected = frame - tm.assert_frame_equal(result, expected) - - frame['txt'] = ['a'] * len(frame) - frame2 = frame.copy() - index = Index(lrange(len(frame2))) + 10 - frame2['Idx'] = index - drop_sql = "DROP TABLE IF EXISTS test_table2" - cur = self.db.cursor() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Unknown table.*") - cur.execute(drop_sql) - sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql') - result = sql.read_frame("select * from test_table2", self.db, - index_col='Idx') - expected = frame.copy() - - # HACK! Change this once indexes are handled properly. - expected.index = index - expected.index.names = result.index.names - tm.assert_frame_equal(expected, result) - - def test_tquery(self): - try: - import MySQLdb - except ImportError: - raise nose.SkipTest - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test_table" - cur = self.db.cursor() - cur.execute(drop_sql) - sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') - result = sql.tquery("select A from test_table", self.db) - expected = frame.A - result = Series(result, frame.index) - tm.assert_series_equal(result, expected) - - try: - sys.stdout = StringIO() - self.assertRaises(DatabaseError, sql.tquery, - 'select * from blah', con=self.db) - - self.assertRaises(DatabaseError, sql.tquery, - 'select * from blah', con=self.db, retry=True) - finally: - sys.stdout = sys.__stdout__ - - def test_uquery(self): - try: - import MySQLdb - except ImportError: - raise nose.SkipTest - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test_table" - cur = self.db.cursor() - cur.execute(drop_sql) - sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') - stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' - self.assertEqual(sql.uquery(stmt, con=self.db), 1) - - try: - sys.stdout = StringIO() - - self.assertRaises(DatabaseError, sql.tquery, - 'insert into blah values (1)', con=self.db) - - self.assertRaises(DatabaseError, sql.tquery, - 'insert into blah values (1)', con=self.db, - retry=True) - finally: - sys.stdout = sys.__stdout__ - - def test_keyword_as_column_names(self): - ''' - ''' - _skip_if_no_MySQLdb() - df = DataFrame({'From':np.ones(5)}) - sql.write_frame(df, name='testkeywords', con=self.db, - if_exists='replace', flavor='mysql') - -if __name__ == '__main__': - # unittest.main() - # nose.runmodule(argv=[__file__,'-vvs','-x', '--pdb-failure'], - # exit=False) - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) From ac6bf427a1849f5e293bd7e27b6aeaebd7146a85 Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Mon, 20 Jan 2014 21:43:23 +0000 Subject: [PATCH 03/16] ENH #4163 Added more robust type coertion, datetime parsing, and parse date options. Updated optional dependancies Added columns optional arg to read_table, removed failing legacy tests. Added columns to doc ENH #4163 Fixed class renaming, expanded docs ENH #4163 Fixed tests in legacy mode --- README.md | 1 + doc/source/install.rst | 1 + doc/source/io.rst | 40 +++++-- pandas/io/sql.py | 227 ++++++++++++++++++++++++++++-------- pandas/io/tests/test_sql.py | 130 +++++++++++++++++---- 5 files changed, 318 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index 7bfd71597c421..f9f4de9ff1094 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ pip install pandas - [Cython](http://www.cython.org): Only necessary to build development version. Version 0.17.1 or higher. - [SciPy](http://www.scipy.org): miscellaneous statistical functions - [PyTables](http://www.pytables.org): necessary for HDF5-based storage +- [SQLAlchemy](http://www.sqlalchemy.org): for SQL database support. Version 0.8.1 or higher recommended. - [matplotlib](http://matplotlib.sourceforge.net/): for plotting - [statsmodels](http://statsmodels.sourceforge.net/) - Needed for parts of `pandas.stats` diff --git a/doc/source/install.rst b/doc/source/install.rst index 631973934cc3b..f67bdc10a457f 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -95,6 +95,7 @@ Optional Dependencies version. Version 0.17.1 or higher. * `SciPy `__: miscellaneous statistical functions * `PyTables `__: necessary for HDF5-based storage + * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. * `matplotlib `__: for plotting * `statsmodels `__ * Needed for parts of :mod:`pandas.stats` diff --git a/doc/source/io.rst b/doc/source/io.rst index c7cb23219fc4c..1568181b0a9b0 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3030,8 +3030,6 @@ The key functions are: :func:`~pandas.io.sql.read_table` - - In the following example, we use the `SQlite `__ SQL database engine. You can use a temporary SQLite database where data are stored in "memory". @@ -3087,20 +3085,48 @@ the database using :func:`~pandas.io.sql.to_sql`. sql.to_sql(data, 'data', engine) -You can read from the database simply by -specifying a table name using the :func:`~pandas.io.sql.read_table` function. + +You can read from the database simply by specifying a table +name using the :func:`~pandas.io.sql.read_table` function. + +.. note:: + + In order to use read_table, you MUST have the SQLAlchemy optional + dependency installed. .. ipython:: python sql.read_table('data', engine) -You can also specify the name of the column as the DataFrame index: +You can also specify the name of the column as the DataFrame index, +and specify a subset of columns to be read. .. ipython:: python sql.read_table('data', engine, index_col='id') + sql.read_table('data', engine, columns=['Col_1', 'Col_2']) + +And you can explicitly force columns to be parsed as dates: + +.. ipython:: python + + sql.read_table('data', engine, parse_dates=['Date']) + +If needed you can explicitly specifiy a format string, or a dict of arguments +to pass to :func:`pandas.tseries.tools.to_datetime`. + +.. code-block:: python + + sql.read_table('data', engine, parse_dates={'Date': '%Y-%m-%d'}) + sql.read_table('data', engine, parse_dates={'Date': {'format': '%Y-%m-%d %H:%M:%S'}}) + +Querying +~~~~~~~~ You can also query using raw SQL in the :func:`~pandas.io.sql.read_sql` function. +In this case you must use valid SQL for your database. +When using SQLAlchemy, you can also pass SQLAlchemy Expression language constructs, +which are database-agnostic. .. ipython:: python @@ -3117,10 +3143,6 @@ There are a few other available functions: :func:`~pandas.io.sql.has_table` checks if a given table exists. -:func:`~pandas.io.sql.tquery` returns a list of tuples corresponding to each row. - -:func:`~pandas.io.sql.uquery` does the same thing as tquery, but instead of -returning results it returns the number of related rows. In addition, the class :class:`~pandas.io.sql.PandasSQLWithEngine` can be instantiated directly for more manual control over the SQL interaction. diff --git a/pandas/io/sql.py b/pandas/io/sql.py index d09847a4be481..1a7f0665e439d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -12,7 +12,7 @@ from pandas.core.api import DataFrame from pandas.core.base import PandasObject - +from pandas.tseries.tools import to_datetime class SQLAlchemyRequired(ImportError): pass @@ -55,7 +55,7 @@ def execute(sql, con, cur=None, params=[], flavor='sqlite'): ------- Results Iterable """ - pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + pandas_sql = pandasSQL_builder(con, flavor=flavor) args = _convert_params(sql, params) return pandas_sql.execute(*args) @@ -87,7 +87,7 @@ def tquery(sql, con, cur=None, params=[], flavor='sqlite'): """ warnings.warn("tquery is depreciated, and will be removed in future versions", DeprecationWarning) - pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + pandas_sql = pandasSQL_builder(con, flavor=flavor) args = _convert_params(sql, params) return pandas_sql.tquery(*args) @@ -116,7 +116,7 @@ def uquery(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): Number of affected rows """ warnings.warn("uquery is depreciated, and will be removed in future versions", DeprecationWarning) - pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + pandas_sql = pandasSQL_builder(con, flavor=flavor) args = _convert_params(sql, params) return pandas_sql.uquery(*args) @@ -125,7 +125,7 @@ def uquery(sql, con, cur=None, params=[], engine=None, flavor='sqlite'): # Read and write to DataFrames -def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, params=[]): +def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, params=[], parse_dates=[]): """ Returns a DataFrame corresponding to the result set of the query string. @@ -151,13 +151,26 @@ def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, param cur: depreciated, cursor is obtained from connection params: list or tuple, optional List of parameters to pass to execute method. - + parse_dates: list or dict + List of column names to parse as dates + Or + Dict of {column_name: format string} where format string is + strftime compatible in case of parsing string times or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps + Or + Dict of {column_name: arg dict}, where the arg dict corresponds + to the keyword arguments of :func:`pandas.tseries.tools.to_datetime` + Especially useful with databases without native Datetime support, such as SQLite Returns ------- DataFrame """ - pandas_sql = pandasSQL_builder(con=con, flavor=flavor) - return pandas_sql.read_sql(sql, index_col=index_col, params=params, coerce_float=coerce_float) + pandas_sql = pandasSQL_builder(con, flavor=flavor) + return pandas_sql.read_sql(sql, + index_col=index_col, + params=params, + coerce_float=coerce_float, + parse_dates=parse_dates) def to_sql(frame, name, con, flavor='sqlite', if_exists='fail'): @@ -178,7 +191,7 @@ def to_sql(frame, name, con, flavor='sqlite', if_exists='fail'): replace: If table exists, drop it, recreate it, and insert data. append: If table exists, insert data. Create if does not exist. """ - pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + pandas_sql = pandasSQL_builder(con, flavor=flavor) pandas_sql.to_sql(frame, name, if_exists=if_exists) @@ -199,12 +212,11 @@ def has_table(table_name, con, meta=None, flavor='sqlite'): ------- boolean """ - pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + pandas_sql = pandasSQL_builder(con, flavor=flavor) return pandas_sql.has_table(table_name) -# This is an awesome function -def read_table(table_name, con, meta=None, index_col=None, coerce_float=True): +def read_table(table_name, con, meta=None, index_col=None, coerce_float=True, parse_dates=[], columns=[]): """Given a table name and SQLAlchemy engine, return a DataFrame. Type convertions will be done automatically @@ -217,16 +229,30 @@ def read_table(table_name, con, meta=None, index_col=None, coerce_float=True): coerce_float : boolean, default True Attempt to convert values to non-string, non-numeric objects (like decimal.Decimal) to floating point. Can result in loss of Precision. + parse_dates: list or dict + List of column names to parse as dates + Or + Dict of {column_name: format string} where format string is + strftime compatible in case of parsing string times or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps + Or + Dict of {column_name: arg dict}, where the arg dict corresponds + to the keyword arguments of :func:`pandas.tseries.tools.to_datetime` + Especially useful with databases without native Datetime support, such as SQLite + columns: list + List of column names to select from sql table Returns ------- DataFrame """ - pandas_sql = PandasSQLWithEngine(con, meta=meta) - table = pandas_sql.get_table(table_name) + pandas_sql = PandasSQLAlchemy(con, meta=meta) + table = pandas_sql.read_table(table_name, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates) if table is not None: - sql_select = table.select() - return pandas_sql.read_sql(sql_select, index_col=index_col, coerce_float=coerce_float) + return table else: raise ValueError("Table %s not found" % table_name, con) @@ -240,21 +266,21 @@ def pandasSQL_builder(con, flavor=None, meta=None): import sqlalchemy if isinstance(con, sqlalchemy.engine.Engine): - return PandasSQLWithEngine(con, meta=meta) + return PandasSQLAlchemy(con, meta=meta) else: warnings.warn("Not an SQLAlchemy engine, attempting to use as legacy DBAPI connection") if flavor is None: raise ValueError("""PandasSQL must be created with an SQLAlchemy engine or a DBAPI2 connection and SQL flavour""") else: - return PandasSQLWithCon(con, flavor) + return PandasSQLLegacy(con, flavor) except ImportError: warnings.warn("SQLAlchemy not installed, using legacy mode") if flavor is None: raise SQLAlchemyRequired else: - return PandasSQLWithCon(con, flavor) + return PandasSQLLegacy(con, flavor) class PandasSQL(PandasObject): @@ -262,16 +288,13 @@ class PandasSQL(PandasObject): Subclasses Should define read_sql and to_sql """ def read_sql(self, *args, **kwargs): - raise ValueError("PandasSQL must be created with an engine," - " connection or cursor.") + raise ValueError("PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor") def to_sql(self, *args, **kwargs): - raise ValueError("PandasSQL must be created with an engine," - " connection or cursor.") + raise ValueError("PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor") def _create_sql_schema(self, frame, name, keys): - raise ValueError("PandasSQL must be created with an engine," - " connection or cursor.") + raise ValueError("PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor") def _frame_from_data_and_columns(self, data, columns, index_col=None, coerce_float=True): df = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) @@ -282,8 +305,40 @@ def _frame_from_data_and_columns(self, data, columns, index_col=None, coerce_flo def _safe_col_names(self, col_names): return [s.replace(' ', '_').strip() for s in col_names] # may not be safe enough... + def _parse_date_columns(self, data_frame, parse_dates): + """ + """ + if parse_dates is True: + parse_dates = [] + + if not hasattr(parse_dates, '__iter__'): + parse_dates = [parse_dates] + + for col_name in parse_dates: + df_col = data_frame[col_name] + try: + fmt = parse_dates[col_name] + except TypeError: + fmt = None + data_frame[col_name] = self._parse_date_col(df_col, format=fmt) + + return data_frame + + def _parse_date_col(self, col, col_type=None, format=None): + if isinstance(format, dict): + return to_datetime(col, **format) + else: + if format in ['D', 's', 'ms', 'us', 'ns']: + return to_datetime(col, coerce=True, unit=format) + elif issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer): + #parse dates as timestamp + format = 's' if format is None else format + return to_datetime(col, coerce=True, unit=format) + else: + return to_datetime(col, coerce=True, format=format) + -class PandasSQLWithEngine(PandasSQL): +class PandasSQLAlchemy(PandasSQL): """ This class enables convertion between DataFrame and SQL databases using SQLAlchemy to handle DataBase abstraction @@ -311,15 +366,17 @@ def uquery(self, *args, **kwargs): result = self.execute(*args, **kwargs) return result.rowcount - def read_sql(self, sql, index_col=None, coerce_float=True, params=[]): + def read_sql(self, sql, index_col=None, coerce_float=True, parse_dates=[], params=[]): args = _convert_params(sql, params) result = self.execute(*args) data = result.fetchall() columns = result.keys() - return self._frame_from_data_and_columns(data, columns, - index_col=index_col, - coerce_float=coerce_float) + data_frame = self._frame_from_data_and_columns(data, columns, + index_col=index_col, + coerce_float=coerce_float) + + return self._parse_date_columns(data_frame, parse_dates) def to_sql(self, frame, name, if_exists='fail'): if self.engine.has_table(name): @@ -338,9 +395,6 @@ def to_sql(self, frame, name, if_exists='fail'): def _write(self, frame, table_name): table = self.get_table(table_name) ins = table.insert() - # TODO: do this in one pass - # TODO this should be done globally first (or work out how to pass np - # dtypes to sql) def maybe_asscalar(i): try: @@ -351,7 +405,6 @@ def maybe_asscalar(i): for t in frame.iterrows(): self.engine.execute(ins, **dict((k, maybe_asscalar(v)) for k, v in t[1].iteritems())) - # TODO more efficient, I'm *sure* this was just working with tuples def has_table(self, name): return self.engine.has_table(name) @@ -362,12 +415,35 @@ def get_table(self, table_name): else: return None + def read_table(self, table_name, index_col=None, coerce_float=True, parse_dates=[], columns=[]): + table = self.get_table(table_name) + + if table is not None: + + if columns is not None and len(columns) > 0: + from sqlalchemy import select + sql_select = select([table.c[n] for n in columns]) + else: + sql_select = table.select() + + result = self.execute(sql_select) + data = result.fetchall() + columns = result.keys() + + data_frame = self._frame_from_data_and_columns(data, columns, + index_col=index_col, + coerce_float=coerce_float) + + data_frame = self._harmonize_columns(data_frame, table, parse_dates) + return data_frame + else: + return None + def _drop_table(self, table_name): if self.engine.has_table(table_name): self.get_table(table_name).drop() self.meta.clear() self.meta.reflect() - #print(table.exists()) def _create_table(self, frame, table_name, keys=None): table = self._create_sqlalchemy_table(frame, table_name, keys) @@ -383,7 +459,7 @@ def _create_sqlalchemy_table(self, frame, table_name, keys=None): keys = [] safe_columns = self._safe_col_names(frame.dtypes.index) - column_types = map(self._lookup_type, frame.dtypes) + column_types = map(self._lookup_sql_type, frame.dtypes) columns = [(col_name, col_sqltype, col_name in keys) for col_name, col_sqltype in zip(safe_columns, column_types)] @@ -392,25 +468,81 @@ def _create_sqlalchemy_table(self, frame, table_name, keys=None): return Table(table_name, self.meta, *columns) - def _lookup_type(self, dtype): + def _lookup_sql_type(self, dtype): from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date pytype = dtype.type + if pytype is date: + return Date + if issubclass(pytype, np.datetime64) or pytype is datetime: + # Caution: np.datetime64 is also a subclass of np.number. + return DateTime if issubclass(pytype, np.floating): return Float if issubclass(pytype, np.integer): # TODO: Refine integer size. return Integer - if issubclass(pytype, np.datetime64) or pytype is datetime: - # Caution: np.datetime64 is also a subclass of np.number. - return DateTime - if pytype is date: - return Date if issubclass(pytype, np.bool_): return Boolean return Text + def _lookup_np_type(self, sqltype): + from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date + + if isinstance(sqltype, Float): + return float + if isinstance(sqltype, Integer): + # TODO: Refine integer size. + return int + if isinstance(sqltype, DateTime): + # Caution: np.datetime64 is also a subclass of np.number. + return datetime + if isinstance(sqltype, Date): + return date + if isinstance(sqltype, Boolean): + return bool + return object + + def _harmonize_columns(self, data_frame, sql_table, parse_dates=[]): + """ Make a data_frame's column type align with an sql_table column types + Need to work around limited NA value support. + Floats are always fine, ints must always + be floats if there are Null values. + Booleans are hard because converting bool column with None replaces + all Nones with false. Therefore only convert bool if there are no NA + values. + Datetimes should already be converted + to np.datetime if supported, but here we also force conversion + if required + """ + for sql_col in sql_table.columns: + col_name = sql_col.name + try: + df_col = data_frame[col_name] + col_type = self._lookup_np_type(sql_col.type) # the type the dataframe column should have + + if col_type is datetime or col_type is date: + if not issubclass(df_col.dtype.type, np.datetime64): + data_frame[col_name] = self._parse_date_col(df_col, col_type) + + elif col_type is float: + # floats support NA, can always convert! + data_frame[col_name].astype(col_type, copy=False) + + elif len(df_col) == df_col.count(): + # No NA values, can convert ints and bools + if col_type is int or col_type is bool: + data_frame[col_name].astype(col_type, copy=False) + except KeyError: + pass # this column not in results + + data_frame = self._parse_date_columns(data_frame, parse_dates) + + return data_frame + + + # ---- SQL without SQLAlchemy --- # Flavour specific sql strings and handler class for access to DBs without SQLAlchemy installed @@ -469,7 +601,7 @@ def _lookup_type(self, dtype): } -class PandasSQLWithCon(PandasSQL): +class PandasSQLLegacy(PandasSQL): def __init__(self, con, flavor): self.con = con if flavor not in ['sqlite', 'mysql', 'postgres']: @@ -516,16 +648,17 @@ def uquery(self, *args): cur = self.execute(*args) return cur.rowcount - def read_sql(self, sql, index_col=None, coerce_float=True, params=[], flavor='sqlite'): + def read_sql(self, sql, index_col=None, coerce_float=True, params=[], flavor='sqlite', parse_dates=[]): args = _convert_params(sql, params) cursor = self.execute(*args) columns = [col_desc[0] for col_desc in cursor.description] data = self._fetchall_as_list(cursor) cursor.close() - return self._frame_from_data_and_columns(data, columns, - index_col=index_col, - coerce_float=coerce_float) + data_frame = self._frame_from_data_and_columns(data, columns, + index_col=index_col, + coerce_float=coerce_float) + return self._parse_date_columns(data_frame, parse_dates=parse_dates) def to_sql(self, frame, name, if_exists='fail'): """ diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 3b5e0ff4dfb72..805dedadfd7c7 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -4,6 +4,7 @@ import csv import os +import nose import numpy as np from pandas import DataFrame @@ -23,12 +24,14 @@ class PandasSQLTest(unittest.TestCase): """Base class with common private methods for - SQLAlchemy and fallback case test suits""" + SQLAlchemy and fallback cases. + """ + + def _load_iris_data(self, conn): + iris_csv_file = os.path.join(tm.get_data_path(), 'iris.csv') - def _load_iris_data(self, engine): - self.dirpath = tm.get_data_path() - iris_csv_file = os.path.join(self.dirpath, 'iris.csv') - engine.execute("""CREATE TABLE iris ( + # Raw SQLite + conn.execute("""CREATE TABLE iris ( `SepalLength` REAL, `SepalWidth` REAL, `PetalLength` REAL, @@ -44,7 +47,7 @@ def _load_iris_data(self, engine): VALUES(?, ?, ?, ?, ?) """ for row in r: - engine.execute(ins, row) + conn.execute(ins, row) def _check_iris_loaded_frame(self, iris_frame): pytype = iris_frame.dtypes[0].type @@ -54,12 +57,34 @@ def _check_iris_loaded_frame(self, iris_frame): tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def _load_test1_data(self): - test1_csv_file = os.path.join(self.dirpath, 'test1.csv') + test1_csv_file = os.path.join(tm.get_data_path(), 'test1.csv') with open(test1_csv_file, 'rU') as test1_csv: dr = csv.DictReader(test1_csv) self.test_frame1 = DataFrame(list(dr)) + def _load_raw_sql(self, conn): + # Raw SQLite + conn.execute("""CREATE TABLE types_test_data ( + `TextCol` TEXT, + `DateCol` TEXT, + `IntDateCol` INTEGER, + `FloatCol` REAL, + `IntCol` INTEGER, + `BoolCol` INTEGER, + `IntColWithNull` INTEGER, + `BoolColWithNull` INTEGER + )""") + + ins = """ + INSERT INTO types_test_data + VALUES(?, ?, ?, ?, ?, ?, ?, ?) + """ + data = [('first', '2000-01-03 00:00:00', 535852800, 10.10, 1, False, 1, False), + ('first', '2000-01-04 00:00:00', 1356998400, 10.10, 1, False, None, None)] + for d in data: + conn.execute(ins, d) + def _count_rows(self, table_name, con): result = con.execute("SELECT count(*) AS count_1 FROM %s" % table_name).fetchone() return result[0] @@ -167,6 +192,7 @@ def setUp(self): self.conn = self.connect() self._load_iris_data(self.conn) self._load_test1_data() + self._load_raw_sql(self.conn) def test_read_sql_iris(self): iris_frame = sql.read_sql("SELECT * FROM iris", self.conn, flavor='sqlite') @@ -236,6 +262,24 @@ def test_tquery(self): row = iris_results[0] tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) + def test_date_parsing(self): + """ Test date parsing in read_sql """ + # No Parsing + df = sql.read_sql("SELECT * FROM types_test_data", self.conn, flavor='sqlite') + self.assertFalse(issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") + + df = sql.read_sql("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates=['DateCol']) + self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") + + df = sql.read_sql("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) + self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") + + df = sql.read_sql("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates=['IntDateCol']) + self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type") + + df = sql.read_sql("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates={'IntDateCol': 's'}) + self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type") + class TestSQLAlchemy(PandasSQLTest): ''' @@ -251,25 +295,19 @@ def drop_table(self, table_name, conn): def setUp(self): # Skip this test if SQLAlchemy not available if not SQLALCHEMY_INSTALLED: - raise unittest.SkipTest('SQLAlchemy not installed') + raise nose.SkipTest('SQLAlchemy not installed') self.conn = self.connect() - self.pandasSQL = sql.PandasSQLWithEngine(self.conn) + self.pandasSQL = sql.PandasSQLAlchemy(self.conn) self._load_iris_data(self.conn) + self._load_raw_sql(self.conn) self._load_test1_data() def test_read_sql(self): self._read_sql_iris() - def test_read_table(self): - iris_frame = sql.read_table("iris", con=self.conn) - self._check_iris_loaded_frame(iris_frame) - - def test_read_table_absent(self): - self.assertRaises(ValueError, sql.read_table, "this_doesnt_exist", con=self.conn) - def test_to_sql(self): self._to_sql() @@ -286,7 +324,7 @@ def test_create_table(self): temp_conn = self.connect() temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) - pandasSQL = sql.PandasSQLWithEngine(temp_conn) + pandasSQL = sql.PandasSQLAlchemy(temp_conn) pandasSQL._create_table(temp_frame, 'temp_frame') self.assertTrue(temp_conn.has_table('temp_frame'), 'Table not written to DB') @@ -296,7 +334,7 @@ def test_drop_table(self): temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) - pandasSQL = sql.PandasSQLWithEngine(temp_conn) + pandasSQL = sql.PandasSQLAlchemy(temp_conn) pandasSQL._create_table(temp_frame, 'temp_frame') self.assertTrue(temp_conn.has_table('temp_frame'), 'Table not written to DB') @@ -311,13 +349,55 @@ def test_roundtrip(self): def test_execute_sql(self): self._execute_sql() - def test_tquery(self): - self._tquery() + def test_read_table(self): + iris_frame = sql.read_table("iris", con=self.conn) + self._check_iris_loaded_frame(iris_frame) + def test_read_table_columns(self): + iris_frame = sql.read_table("iris", con=self.conn, columns=['SepalLength', 'SepalLength']) + tm.equalContents(iris_frame.columns.values, ['SepalLength', 'SepalLength']) -# --- Test SQLITE fallback + def test_read_table_absent(self): + self.assertRaises(ValueError, sql.read_table, "this_doesnt_exist", con=self.conn) + + def test_default_type_convertion(self): + """ Test default type conversion""" + df = sql.read_table("types_test_data", self.conn) + self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), "FloatCol loaded with incorrect type") + self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), "IntCol loaded with incorrect type") + self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer), "BoolCol loaded with incorrect type") + # Int column with NA values stays as float + self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), "IntColWithNull loaded with incorrect type") + # Non-native Bool column with NA values stays as float + self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), "BoolCol loaded with incorrect type") + def test_date_parsing(self): + """ Test date parsing """ + # No Parsing + df = sql.read_table("types_test_data", self.conn) + self.assertFalse(issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") + + df = sql.read_table("types_test_data", self.conn, parse_dates=['DateCol']) + self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") + + df = sql.read_table("types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) + self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") + + df = sql.read_table("types_test_data", self.conn, parse_dates={'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}}) + self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type") + + df = sql.read_table("types_test_data", self.conn, parse_dates=['IntDateCol']) + self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type") + + df = sql.read_table("types_test_data", self.conn, parse_dates={'IntDateCol': 's'}) + self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type") + + df = sql.read_table("types_test_data", self.conn, parse_dates={'IntDateCol': {'unit': 's'}}) + self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type") + + +# --- Test SQLITE fallback class TestSQLite(PandasSQLTest): ''' Test the sqlalchemy backend against an in-memory sqlite database. @@ -333,14 +413,14 @@ def drop_table(self, table_name, conn): def setUp(self): self.conn = self.connect() - self.pandasSQL = sql.PandasSQLWithCon(self.conn, 'sqlite') + self.pandasSQL = sql.PandasSQLLegacy(self.conn, 'sqlite') self._load_iris_data(self.conn) self._load_test1_data() def test_invalid_flavor(self): - self.assertRaises(NotImplementedError, sql.PandasSQLWithCon, self.conn, 'oracle') + self.assertRaises(NotImplementedError, sql.PandasSQLLegacy, self.conn, 'oracle') def test_read_sql(self): self._read_sql_iris() @@ -361,7 +441,7 @@ def test_create_table(self): temp_conn = self.connect() temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) - pandasSQL = sql.PandasSQLWithCon(temp_conn, 'sqlite') + pandasSQL = sql.PandasSQLLegacy(temp_conn, 'sqlite') pandasSQL._create_table(temp_frame, 'temp_frame') self.assertTrue(pandasSQL.has_table('temp_frame'), 'Table not written to DB') @@ -371,7 +451,7 @@ def test_drop_table(self): temp_frame = DataFrame({'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) - pandasSQL = sql.PandasSQLWithCon(temp_conn, 'sqlite') + pandasSQL = sql.PandasSQLLegacy(temp_conn, 'sqlite') pandasSQL._create_table(temp_frame, 'temp_frame') self.assertTrue(pandasSQL.has_table('temp_frame'), 'Table not written to DB') From 6314e6f11d7e5da7f4f414f33724df94148f2a4c Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Fri, 24 Jan 2014 16:05:10 +0000 Subject: [PATCH 04/16] ENH #4163 Tweaks to docs, avoid mutable default args, mysql tests --- doc/source/io.rst | 39 ++- pandas/io/sql.py | 121 ++++---- pandas/io/tests/test_sql.py | 537 +++++++++++++++++++++++------------- 3 files changed, 437 insertions(+), 260 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 1568181b0a9b0..405049965f453 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3048,8 +3048,10 @@ below and the SQLAlchemy `documentation Date: Mon, 27 Jan 2014 18:54:59 +0000 Subject: [PATCH 05/16] ENH #4163 Introduce DataFrame Index support. Refactor to introduce PandasSQLTable for cleaner OOP design --- pandas/io/sql.py | 642 ++++++++++++++++++++---------------- pandas/io/tests/test_sql.py | 39 ++- 2 files changed, 378 insertions(+), 303 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 25e005513cd69..f4ec78c31237a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -6,7 +6,6 @@ from datetime import datetime, date import warnings from pandas.compat import range, lzip, map, zip, raise_with_traceback -import pandas.compat as compat import numpy as np @@ -170,6 +169,7 @@ def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, to the keyword arguments of :func:`pandas.tseries.tools.to_datetime` Especially useful with databases without native Datetime support, such as SQLite + Returns ------- DataFrame @@ -182,7 +182,7 @@ def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, parse_dates=parse_dates) -def to_sql(frame, name, con, flavor='sqlite', if_exists='fail'): +def to_sql(frame, name, con, flavor='sqlite', if_exists='fail', index=True): """ Write records stored in a DataFrame to a SQL database. @@ -194,14 +194,17 @@ def to_sql(frame, name, con, flavor='sqlite', if_exists='fail'): Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object is given, a supported SQL flavor must also be provided - flavor: {'sqlite', 'mysql', 'postgres'}, default 'sqlite', ignored when using engine + flavor: {'sqlite', 'mysql', 'postgres'}, default 'sqlite' + ignored when SQLAlchemy engine. Required when using DBAPI2 connection. if_exists: {'fail', 'replace', 'append'}, default 'fail' fail: If table exists, do nothing. replace: If table exists, drop it, recreate it, and insert data. append: If table exists, insert data. Create if does not exist. + index : boolean, default True + Write DataFrame index as an column """ pandas_sql = pandasSQL_builder(con, flavor=flavor) - pandas_sql.to_sql(frame, name, if_exists=if_exists) + pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index) def has_table(table_name, con, meta=None, flavor='sqlite'): @@ -295,6 +298,237 @@ def pandasSQL_builder(con, flavor=None, meta=None): return PandasSQLLegacy(con, flavor) +def _safe_col_name(col_name): + return col_name.strip().replace(' ', '_') + + +def _parse_date_column(col, format=None): + if isinstance(format, dict): + return to_datetime(col, **format) + else: + if format in ['D', 's', 'ms', 'us', 'ns']: + return to_datetime(col, coerce=True, unit=format) + elif issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer): + # parse dates as timestamp + format = 's' if format is None else format + return to_datetime(col, coerce=True, unit=format) + else: + return to_datetime(col, coerce=True, format=format) + + +def _frame_from_data_and_columns(data, columns, index_col=None, + coerce_float=True): + df = DataFrame.from_records( + data, columns=columns, coerce_float=coerce_float) + if index_col is not None: + df.set_index(index_col, inplace=True) + return df + + +class PandasSQLTable(PandasObject): + + def __init__(self, name, pandas_sql_engine, frame=None, index=True, if_exists='fail', prefix='pandas'): + self.name = name + self.pd_sql = pandas_sql_engine + self.prefix = prefix + self.frame = frame + self.index = self._index_name(index) + + if frame is not None: + # We want to write a frame + if self.pd_sql.has_table(self.name): + if if_exists == 'fail': + raise ValueError("Table '%s' already exists." % name) + elif if_exists == 'replace': + self.pd_sql.drop_table(self.name) + self.table = self._create_table_statement() + self.create() + elif if_exists == 'append': + self.table = self.pd_sql.get_table(self.name) + if self.table is None: + self.table = self._create_table_statement() + else: + self.table = self._create_table_statement() + self.create() + else: + # no data provided, read-only mode + self.table = self.pd_sql.get_table(self.name) + + if self.table is None: + raise ValueError("Could not init table '%s'" % name) + + def exists(self): + return self.pd_sql.has_table(self.name) + + def sql_schema(self): + return str(self.table.compile()) + + def create(self): + self.table.create() + + def insert_statement(self): + return self.table.insert() + + def maybe_asscalar(self, i): + try: + return np.asscalar(i) + except AttributeError: + return i + + def insert(self): + ins = self.insert_statement() + + for t in self.frame.iterrows(): + data = dict((k, self.maybe_asscalar(v)) + for k, v in t[1].iteritems()) + if self.index is not None: + data[self.index] = self.maybe_asscalar(t[0]) + self.pd_sql.execute(ins, **data) + + def read(self, coerce_float=True, parse_dates=None, columns=None): + + if columns is not None and len(columns) > 0: + from sqlalchemy import select + cols = [self.table.c[n] for n in columns] + if self.index is not None: + cols.insert(0, self.table.c[self.index]) + sql_select = select(cols) + else: + sql_select = self.table.select() + + result = self.pd_sql.execute(sql_select) + data = result.fetchall() + column_names = result.keys() + + self.frame = _frame_from_data_and_columns(data, column_names, + index_col=self.index, + coerce_float=coerce_float) + + self._harmonize_columns(parse_dates=parse_dates) + + # Assume that if the index was in prefix_index format, we gave it a name + # and should return it nameless + if self.index == self.prefix + '_index': + self.frame.index.name = None + + return self.frame + + def _index_name(self, index): + if index is True: + if self.frame.index.name is not None: + return _safe_col_name(self.frame.index.name) + else: + return self.prefix + '_index' + elif isinstance(index, basestring): + return index + else: + return None + + def _create_table_statement(self): + from sqlalchemy import Table, Column + + safe_columns = map(_safe_col_name, self.frame.dtypes.index) + column_types = map(self._sqlalchemy_type, self.frame.dtypes) + + columns = [Column(name, typ) + for name, typ in zip(safe_columns, column_types)] + + if self.index is not None: + columns.insert(0, Column(self.index, + self._sqlalchemy_type( + self.frame.index.dtype), + index=True)) + + return Table(self.name, self.pd_sql.meta, *columns) + + def _harmonize_columns(self, parse_dates=None): + """ Make a data_frame's column type align with an sql_table column types + Need to work around limited NA value support. + Floats are always fine, ints must always + be floats if there are Null values. + Booleans are hard because converting bool column with None replaces + all Nones with false. Therefore only convert bool if there are no NA + values. + Datetimes should already be converted + to np.datetime if supported, but here we also force conversion + if required + """ + # handle non-list entries for parse_dates gracefully + if parse_dates is True or parse_dates is None or parse_dates is False: + parse_dates = [] + + if not hasattr(parse_dates, '__iter__'): + parse_dates = [parse_dates] + + for sql_col in self.table.columns: + col_name = sql_col.name + try: + df_col = self.frame[col_name] + # the type the dataframe column should have + col_type = self._numpy_type(sql_col.type) + + if col_type is datetime or col_type is date: + if not issubclass(df_col.dtype.type, np.datetime64): + self.frame[col_name] = _parse_date_column(df_col) + + elif col_type is float: + # floats support NA, can always convert! + self.frame[col_name].astype(col_type, copy=False) + + elif len(df_col) == df_col.count(): + # No NA values, can convert ints and bools + if col_type is int or col_type is bool: + self.frame[col_name].astype(col_type, copy=False) + + # Handle date parsing + if col_name in parse_dates: + try: + fmt = parse_dates[col_name] + except TypeError: + fmt = None + self.frame[col_name] = _parse_date_column( + df_col, format=fmt) + + except KeyError: + pass # this column not in results + + def _sqlalchemy_type(self, dtype): + from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date + + pytype = dtype.type + + if pytype is date: + return Date + if issubclass(pytype, np.datetime64) or pytype is datetime: + # Caution: np.datetime64 is also a subclass of np.number. + return DateTime + if issubclass(pytype, np.floating): + return Float + if issubclass(pytype, np.integer): + # TODO: Refine integer size. + return Integer + if issubclass(pytype, np.bool_): + return Boolean + return Text + + def _numpy_type(self, sqltype): + from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date + + if isinstance(sqltype, Float): + return float + if isinstance(sqltype, Integer): + # TODO: Refine integer size. + return int + if isinstance(sqltype, DateTime): + # Caution: np.datetime64 is also a subclass of np.number. + return datetime + if isinstance(sqltype, Date): + return date + if isinstance(sqltype, Boolean): + return bool + return object + + class PandasSQL(PandasObject): """ @@ -309,22 +543,6 @@ def to_sql(self, *args, **kwargs): raise ValueError( "PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor") - def _create_sql_schema(self, frame, name, keys): - raise ValueError( - "PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor") - - def _frame_from_data_and_columns(self, data, columns, index_col=None, - coerce_float=True): - df = DataFrame.from_records( - data, columns=columns, coerce_float=coerce_float) - if index_col is not None: - df.set_index(index_col, inplace=True) - return df - - def _safe_col_names(self, col_names): - # may not be safe enough... - return [s.replace(' ', '_').strip() for s in col_names] - def _parse_date_columns(self, data_frame, parse_dates): """ Force non-datetime columns to be read as such. Supports both string formatted and integer timestamp columns @@ -342,23 +560,10 @@ def _parse_date_columns(self, data_frame, parse_dates): fmt = parse_dates[col_name] except TypeError: fmt = None - data_frame[col_name] = self._parse_date_col(df_col, format=fmt) + data_frame[col_name] = _parse_date_column(df_col, format=fmt) return data_frame - def _parse_date_col(self, col, col_type=None, format=None): - if isinstance(format, dict): - return to_datetime(col, **format) - else: - if format in ['D', 's', 'ms', 'us', 'ns']: - return to_datetime(col, coerce=True, unit=format) - elif issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer): - # parse dates as timestamp - format = 's' if format is None else format - return to_datetime(col, coerce=True, unit=format) - else: - return to_datetime(col, coerce=True, format=format) - class PandasSQLAlchemy(PandasSQL): @@ -381,12 +586,10 @@ def execute(self, *args, **kwargs): return self.engine.execute(*args, **kwargs) def tquery(self, *args, **kwargs): - """Accepts same args as execute""" result = self.execute(*args, **kwargs) return result.fetchall() def uquery(self, *args, **kwargs): - """Accepts same args as execute""" result = self.execute(*args, **kwargs) return result.rowcount @@ -396,40 +599,16 @@ def read_sql(self, sql, index_col=None, coerce_float=True, parse_dates=None, par data = result.fetchall() columns = result.keys() - data_frame = self._frame_from_data_and_columns(data, columns, - index_col=index_col, - coerce_float=coerce_float) + data_frame = _frame_from_data_and_columns(data, columns, + index_col=index_col, + coerce_float=coerce_float) return self._parse_date_columns(data_frame, parse_dates) - def to_sql(self, frame, name, if_exists='fail'): - if self.engine.has_table(name): - if if_exists == 'fail': - raise ValueError("Table '%s' already exists." % name) - elif if_exists == 'replace': - # TODO: this triggers a full refresh of metadata, could - # probably avoid this. - self._drop_table(name) - self._create_table(frame, name) - elif if_exists == 'append': - pass # table exists and will automatically be appended to - else: - self._create_table(frame, name) - self._write(frame, name) - - def _write(self, frame, table_name): - table = self.get_table(table_name) - ins = table.insert() - - def maybe_asscalar(i): - try: - return np.asscalar(i) - except AttributeError: - return i - - for t in frame.iterrows(): - self.engine.execute(ins, **dict((k, maybe_asscalar(v)) - for k, v in t[1].iteritems())) + def to_sql(self, frame, name, if_exists='fail', index=True): + table = PandasSQLTable( + name, self, frame=frame, index=index, if_exists=if_exists) + table.insert() def has_table(self, name): return self.engine.has_table(name) @@ -442,135 +621,21 @@ def get_table(self, table_name): def read_table(self, table_name, index_col=None, coerce_float=True, parse_dates=None, columns=None): - table = self.get_table(table_name) - - if table is not None: - - if columns is not None and len(columns) > 0: - from sqlalchemy import select - sql_select = select([table.c[n] for n in columns]) - else: - sql_select = table.select() - result = self.execute(sql_select) - data = result.fetchall() - columns = result.keys() + table = PandasSQLTable(table_name, self, index=index_col) + return table.read(coerce_float=parse_dates, + parse_dates=parse_dates, columns=columns) - data_frame = self._frame_from_data_and_columns(data, columns, - index_col=index_col, - coerce_float=coerce_float) - - data_frame = self._harmonize_columns( - data_frame, table, parse_dates) - return data_frame - else: - return None - - def _drop_table(self, table_name): + def drop_table(self, table_name): if self.engine.has_table(table_name): self.get_table(table_name).drop() self.meta.clear() self.meta.reflect() - def _create_table(self, frame, table_name, keys=None): - table = self._create_sqlalchemy_table(frame, table_name, keys) - table.create() - - def _create_sql_schema(self, frame, table_name, keys=None): - table = self._create_sqlalchemy_table(frame, table_name, keys) + def _create_sql_schema(self, frame, table_name): + table = PandasSQLTable(table_name, self, frame=frame) return str(table.compile()) - def _create_sqlalchemy_table(self, frame, table_name, keys=None): - from sqlalchemy import Table, Column - if keys is None: - keys = [] - - safe_columns = self._safe_col_names(frame.dtypes.index) - column_types = map(self._lookup_sql_type, frame.dtypes) - - columns = [(col_name, col_sqltype, col_name in keys) - for col_name, col_sqltype in zip(safe_columns, column_types)] - - columns = [Column(name, typ, primary_key=pk) - for name, typ, pk in columns] - - return Table(table_name, self.meta, *columns) - - def _lookup_sql_type(self, dtype): - from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date - - pytype = dtype.type - - if pytype is date: - return Date - if issubclass(pytype, np.datetime64) or pytype is datetime: - # Caution: np.datetime64 is also a subclass of np.number. - return DateTime - if issubclass(pytype, np.floating): - return Float - if issubclass(pytype, np.integer): - # TODO: Refine integer size. - return Integer - if issubclass(pytype, np.bool_): - return Boolean - return Text - - def _lookup_np_type(self, sqltype): - from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date - - if isinstance(sqltype, Float): - return float - if isinstance(sqltype, Integer): - # TODO: Refine integer size. - return int - if isinstance(sqltype, DateTime): - # Caution: np.datetime64 is also a subclass of np.number. - return datetime - if isinstance(sqltype, Date): - return date - if isinstance(sqltype, Boolean): - return bool - return object - - def _harmonize_columns(self, data_frame, sql_table, parse_dates=None): - """ Make a data_frame's column type align with an sql_table column types - Need to work around limited NA value support. - Floats are always fine, ints must always - be floats if there are Null values. - Booleans are hard because converting bool column with None replaces - all Nones with false. Therefore only convert bool if there are no NA - values. - Datetimes should already be converted - to np.datetime if supported, but here we also force conversion - if required - """ - for sql_col in sql_table.columns: - col_name = sql_col.name - try: - df_col = data_frame[col_name] - # the type the dataframe column should have - col_type = self._lookup_np_type(sql_col.type) - - if col_type is datetime or col_type is date: - if not issubclass(df_col.dtype.type, np.datetime64): - data_frame[col_name] = self._parse_date_col( - df_col, col_type) - - elif col_type is float: - # floats support NA, can always convert! - data_frame[col_name].astype(col_type, copy=False) - - elif len(df_col) == df_col.count(): - # No NA values, can convert ints and bools - if col_type is int or col_type is bool: - data_frame[col_name].astype(col_type, copy=False) - except KeyError: - pass # this column not in results - - data_frame = self._parse_date_columns(data_frame, parse_dates) - - return data_frame - # ---- SQL without SQLAlchemy --- # Flavour specific sql strings and handler class for access to DBs without @@ -618,6 +683,89 @@ def _harmonize_columns(self, data_frame, sql_table, parse_dates=None): } +class PandasSQLTableLegacy(PandasSQLTable): + """Patch the PandasSQLTable for legacy support. + Instead of a table variable just use the Create Table + statement""" + def sql_schema(self): + return str(self.table) + + def create(self): + self.pd_sql.execute(self.table) + + def insert_statement(self): + # Replace spaces in DataFrame column names with _. + safe_names = [_safe_col_name(n) for n in self.frame.dtypes.index] + flv = self.pd_sql.flavor + br_l = _SQL_SYMB[flv]['br_l'] # left val quote char + br_r = _SQL_SYMB[flv]['br_r'] # right val quote char + wld = _SQL_SYMB[flv]['wld'] # wildcard char + + if self.index is not None: + safe_names.insert(0, self.index) + + bracketed_names = [br_l + column + br_r for column in safe_names] + col_names = ','.join(bracketed_names) + wildcards = ','.join([wld] * len(safe_names)) + insert_statement = 'INSERT INTO %s (%s) VALUES (%s)' % ( + self.name, col_names, wildcards) + return insert_statement + + def insert(self): + ins = self.insert_statement() + cur = self.pd_sql.con.cursor() + for r in self.frame.iterrows(): + data = [self.maybe_asscalar(v) for v in r[1].values] + if self.index is not None: + data.insert(0, self.maybe_asscalar(r[0])) + print(type(data[2])) + print(type(r[0])) + cur.execute(ins, tuple(data)) + cur.close() + + def _create_table_statement(self): + "Return a CREATE TABLE statement to suit the contents of a DataFrame." + + # Replace spaces in DataFrame column names with _. + safe_columns = [_safe_col_name(n) for n in self.frame.dtypes.index] + column_types = [self._sql_type_name(typ) for typ in self.frame.dtypes] + + if self.index is not None: + safe_columns.insert(0, self.index) + column_types.insert(0, self._sql_type_name(self.frame.index.dtype)) + flv = self.pd_sql.flavor + + br_l = _SQL_SYMB[flv]['br_l'] # left val quote char + br_r = _SQL_SYMB[flv]['br_r'] # right val quote char + + col_template = br_l + '%s' + br_r + ' %s' + + columns = ',\n '.join(col_template % + x for x in zip(safe_columns, column_types)) + template = """CREATE TABLE %(name)s ( + %(columns)s + );""" + create_statement = template % {'name': self.name, 'columns': columns} + return create_statement + + def _sql_type_name(self, dtype): + pytype = dtype.type + pytype_name = "text" + if issubclass(pytype, np.floating): + pytype_name = "float" + elif issubclass(pytype, np.integer): + pytype_name = "int" + elif issubclass(pytype, np.datetime64) or pytype is datetime: + # Caution: np.datetime64 is also a subclass of np.number. + pytype_name = "datetime" + elif pytype is datetime.date: + pytype_name = "date" + elif issubclass(pytype, np.bool_): + pytype_name = "bool" + + return _SQL_TYPES[pytype_name][self.pd_sql.flavor] + + class PandasSQLLegacy(PandasSQL): def __init__(self, con, flavor): @@ -659,10 +807,6 @@ def tquery(self, *args): return result def uquery(self, *args): - """ - Does the same thing as tquery, but instead of returning results, it - returns the number of rows affected. Good for update queries. - """ cur = self.execute(*args) return cur.rowcount @@ -674,12 +818,18 @@ def read_sql(self, sql, index_col=None, coerce_float=True, params=None, data = self._fetchall_as_list(cursor) cursor.close() - data_frame = self._frame_from_data_and_columns(data, columns, - index_col=index_col, - coerce_float=coerce_float) + data_frame = _frame_from_data_and_columns(data, columns, + index_col=index_col, + coerce_float=coerce_float) return self._parse_date_columns(data_frame, parse_dates=parse_dates) - def to_sql(self, frame, name, if_exists='fail'): + def _fetchall_as_list(self, cur): + result = cur.fetchall() + if not isinstance(result, list): + result = list(result) + return result + + def to_sql(self, frame, name, if_exists='fail', index=True): """ Write records stored in a DataFrame to a SQL database. @@ -693,53 +843,9 @@ def to_sql(self, frame, name, if_exists='fail'): replace: If table exists, drop it, recreate it, and insert data. append: If table exists, insert data. Create if does not exist. """ - if self.has_table(name): - if if_exists == 'fail': - raise ValueError("Table '%s' already exists." % name) - elif if_exists == 'replace': - self._drop_table(name) - self._create_table(frame, name) - elif if_exists == "append": - pass # should just add... - else: - self._create_table(frame, name) - - self._write(frame, name) - - def _fetchall_as_list(self, cur): - '''ensures result of fetchall is a list''' - result = cur.fetchall() - if not isinstance(result, list): - result = list(result) - return result - - def _write(self, frame, table_name): - # Replace spaces in DataFrame column names with _. - safe_names = self._safe_col_names(frame.columns) - - br_l = _SQL_SYMB[self.flavor]['br_l'] # left val quote char - br_r = _SQL_SYMB[self.flavor]['br_r'] # right val quote char - wld = _SQL_SYMB[self.flavor]['wld'] # wildcard char - - bracketed_names = [br_l + column + br_r for column in safe_names] - col_names = ','.join(bracketed_names) - wildcards = ','.join([wld] * len(safe_names)) - insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % ( - table_name, col_names, wildcards) - - # pandas types are badly handled if there is only 1 col (Issue #3628) - if len(frame.columns) != 1: - data = [tuple(x) for x in frame.values] - else: - data = [tuple(x) for x in frame.values.tolist()] - - cur = self.con.cursor() - cur.executemany(insert_query, data) - cur.close() - - def _create_table(self, frame, name, keys=None): - create_sql = self._create_sql_schema(frame, name, keys) - self.execute(create_sql) + table = PandasSQLTableLegacy( + name, self, frame=frame, index=index, if_exists=if_exists) + table.insert() def has_table(self, name): flavor_map = { @@ -747,58 +853,16 @@ def has_table(self, name): "WHERE type='table' AND name='%s';") % name, 'mysql': "SHOW TABLES LIKE '%s'" % name} query = flavor_map.get(self.flavor) - if query is None: - raise NotImplementedError + return len(self.tquery(query)) > 0 - def _drop_table(self, name): - # Previously this worried about connection tp cursor then closing... + def get_table(self, table_name): + return None # not supported in Legacy mode + + def drop_table(self, name): drop_sql = "DROP TABLE %s" % name self.execute(drop_sql) - def _create_sql_schema(self, frame, table_name, keys=None): - "Return a CREATE TABLE statement to suit the contents of a DataFrame." - - lookup_type = lambda dtype: self._get_sqltype(dtype.type) - # Replace spaces in DataFrame column names with _. - safe_columns = self._safe_col_names(frame.dtypes.index) - - column_types = lzip(safe_columns, map(lookup_type, frame.dtypes)) - - br_l = _SQL_SYMB[self.flavor]['br_l'] # left val quote char - br_r = _SQL_SYMB[self.flavor]['br_r'] # right val quote char - col_template = br_l + '%s' + br_r + ' %s' - columns = ',\n '.join(col_template % x for x in column_types) - - keystr = '' - if keys is not None: - if isinstance(keys, compat.string_types): - keys = (keys,) - keystr = ', PRIMARY KEY (%s)' % ','.join(keys) - template = """CREATE TABLE %(name)s ( - %(columns)s - %(keystr)s - );""" - create_statement = template % {'name': table_name, 'columns': columns, - 'keystr': keystr} - return create_statement - - def _get_sqltype(self, pytype): - pytype_name = "text" - if issubclass(pytype, np.floating): - pytype_name = "float" - elif issubclass(pytype, np.integer): - pytype_name = "int" - elif issubclass(pytype, np.datetime64) or pytype is datetime: - # Caution: np.datetime64 is also a subclass of np.number. - pytype_name = "datetime" - elif pytype is datetime.date: - pytype_name = "date" - elif issubclass(pytype, np.bool_): - pytype_name = "bool" - - return _SQL_TYPES[pytype_name][self.flavor] - # legacy names, with depreciation warnings and copied docs def get_schema(frame, name, con, flavor='sqlite'): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 024f1dbab8c4f..ee4f38cd6e9bf 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -115,11 +115,16 @@ def _check_iris_loaded_frame(self, iris_frame): tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def _load_test1_data(self): - test1_csv_file = os.path.join(tm.get_data_path(), 'test1.csv') + columns = ['index', 'A', 'B', 'C', 'D'] + data = [( + '2000-01-03 00:00:00', 0.980268513777, 3.68573087906, -0.364216805298, -1.15973806169), + ('2000-01-04 00:00:00', 1.04791624281, - + 0.0412318367011, -0.16181208307, 0.212549316967), + ('2000-01-05 00:00:00', 0.498580885705, + 0.731167677815, -0.537677223318, 1.34627041952), + ('2000-01-06 00:00:00', 1.12020151869, 1.56762092543, 0.00364077397681, 0.67525259227)] - with open(test1_csv_file, 'rU') as test1_csv: - dr = csv.DictReader(test1_csv) - self.test_frame1 = DataFrame(list(dr)) + self.test_frame1 = DataFrame(data, columns=columns) def _load_raw_sql(self): self.drop_table('types_test_data') @@ -209,8 +214,10 @@ def _roundtrip(self): self.pandasSQL.to_sql(self.test_frame1, 'test_frame_roundtrip') result = self.pandasSQL.read_sql('SELECT * FROM test_frame_roundtrip') - # HACK! - result.index = self.test_frame1.index + result.set_index('pandas_index', inplace=True) + #result.index.astype(int) + + result.index.name = None tm.assert_frame_equal(result, self.test_frame1) @@ -324,7 +331,9 @@ def test_roundtrip(self): # HACK! result.index = self.test_frame1.index - + result.set_index('pandas_index', inplace=True) + result.index.astype(int) + result.index.name = None tm.assert_frame_equal(result, self.test_frame1) def test_execute_sql(self): @@ -414,7 +423,7 @@ def test_create_table(self): {'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) pandasSQL = sql.PandasSQLAlchemy(temp_conn) - pandasSQL._create_table(temp_frame, 'temp_frame') + pandasSQL.to_sql(temp_frame, 'temp_frame') self.assertTrue( temp_conn.has_table('temp_frame'), 'Table not written to DB') @@ -426,12 +435,12 @@ def test_drop_table(self): {'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) pandasSQL = sql.PandasSQLAlchemy(temp_conn) - pandasSQL._create_table(temp_frame, 'temp_frame') + pandasSQL.to_sql(temp_frame, 'temp_frame') self.assertTrue( temp_conn.has_table('temp_frame'), 'Table not written to DB') - pandasSQL._drop_table('temp_frame') + pandasSQL.drop_table('temp_frame') self.assertFalse( temp_conn.has_table('temp_frame'), 'Table not deleted from DB') @@ -476,7 +485,8 @@ def test_default_type_convertion(self): def test_default_date_load(self): df = sql.read_table("types_test_data", self.conn) - # IMPORTANT - sqlite has no native date type, so shouldn't parse, but MySQL SHOULD be converted. + # IMPORTANT - sqlite has no native date type, so shouldn't parse, but + # MySQL SHOULD be converted. self.assertFalse( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") @@ -564,12 +574,12 @@ def test_create_and_drop_table(self): temp_frame = DataFrame( {'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) - self.pandasSQL._create_table(temp_frame, 'drop_test_frame') + self.pandasSQL.to_sql(temp_frame, 'drop_test_frame') self.assertTrue(self.pandasSQL.has_table( 'drop_test_frame'), 'Table not written to DB') - self.pandasSQL._drop_table('drop_test_frame') + self.pandasSQL.drop_table('drop_test_frame') self.assertFalse(self.pandasSQL.has_table( 'drop_test_frame'), 'Table not deleted from DB') @@ -660,6 +670,7 @@ def tearDown(self): def test_default_date_load(self): df = sql.read_table("types_test_data", self.conn) - # IMPORTANT - sqlite has no native date type, so shouldn't parse, but MySQL SHOULD be converted. + # IMPORTANT - sqlite has no native date type, so shouldn't parse, + # but MySQL SHOULD be converted. self.assertTrue( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") From 32b493a9537f0255bae633fbf569ff26ed97c8f6 Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Thu, 6 Feb 2014 17:03:20 +0000 Subject: [PATCH 06/16] ENH #4163 Fix bug in index + parse date interaction, added test case for problem --- pandas/io/sql.py | 159 ++++++++++++++++++++---------------- pandas/io/tests/test_sql.py | 41 ++++++++-- 2 files changed, 120 insertions(+), 80 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index f4ec78c31237a..ce315375dffff 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -23,7 +23,7 @@ class DatabaseError(IOError): #------------------------------------------------------------------------------ -# Helper execution functions +# Helper functions def _convert_params(sql, params): """convert sql and params args to DBAPI2.0 compliant format""" @@ -33,6 +33,47 @@ def _convert_params(sql, params): return args +def _safe_col_name(col_name): + #TODO: probably want to forbid database reserved names, such as "database" + return col_name.strip().replace(' ', '_') + + +def _handle_date_column(col, format=None): + if isinstance(format, dict): + return to_datetime(col, **format) + else: + if format in ['D', 's', 'ms', 'us', 'ns']: + return to_datetime(col, coerce=True, unit=format) + elif issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer): + # parse dates as timestamp + format = 's' if format is None else format + return to_datetime(col, coerce=True, unit=format) + else: + return to_datetime(col, coerce=True, format=format) + + +def _parse_date_columns(data_frame, parse_dates): + """ Force non-datetime columns to be read as such. + Supports both string formatted and integer timestamp columns + """ + # handle non-list entries for parse_dates gracefully + if parse_dates is True or parse_dates is None or parse_dates is False: + parse_dates = [] + + if not hasattr(parse_dates, '__iter__'): + parse_dates = [parse_dates] + + for col_name in parse_dates: + df_col = data_frame[col_name] + try: + fmt = parse_dates[col_name] + except TypeError: + fmt = None + data_frame[col_name] = _handle_date_column(df_col, format=fmt) + + return data_frame + + def execute(sql, con, cur=None, params=None, flavor='sqlite'): """ Execute the given SQL query using the provided connection object. @@ -44,7 +85,7 @@ def execute(sql, con, cur=None, params=None, flavor='sqlite'): con: SQLAlchemy engine or DBAPI2 connection (legacy mode) Using SQLAlchemy makes it possible to use any DB supported by that library. - If a DBAPI2 object is given, a supported SQL flavor must also be provided + If a DBAPI2 object, a supported SQL flavor must also be provided cur: depreciated, cursor is obtained from connection params: list or tuple, optional List of parameters to pass to execute method. @@ -283,9 +324,11 @@ def pandasSQL_builder(con, flavor=None, meta=None): return PandasSQLAlchemy(con, meta=meta) else: warnings.warn( - "Not an SQLAlchemy engine, attempting to use as legacy DBAPI connection") + """Not an SQLAlchemy engine, + attempting to use as legacy DBAPI connection""") if flavor is None: - raise ValueError("""PandasSQL must be created with an SQLAlchemy engine + raise ValueError( + """PandasSQL must be created with an SQLAlchemy engine or a DBAPI2 connection and SQL flavour""") else: return PandasSQLLegacy(con, flavor) @@ -298,36 +341,16 @@ def pandasSQL_builder(con, flavor=None, meta=None): return PandasSQLLegacy(con, flavor) -def _safe_col_name(col_name): - return col_name.strip().replace(' ', '_') - - -def _parse_date_column(col, format=None): - if isinstance(format, dict): - return to_datetime(col, **format) - else: - if format in ['D', 's', 'ms', 'us', 'ns']: - return to_datetime(col, coerce=True, unit=format) - elif issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer): - # parse dates as timestamp - format = 's' if format is None else format - return to_datetime(col, coerce=True, unit=format) - else: - return to_datetime(col, coerce=True, format=format) - - -def _frame_from_data_and_columns(data, columns, index_col=None, - coerce_float=True): - df = DataFrame.from_records( - data, columns=columns, coerce_float=coerce_float) - if index_col is not None: - df.set_index(index_col, inplace=True) - return df - - class PandasSQLTable(PandasObject): - - def __init__(self, name, pandas_sql_engine, frame=None, index=True, if_exists='fail', prefix='pandas'): + """ For mapping Pandas tables to SQL tables. + Uses fact that table is reflected by SQLAlchemy to + do better type convertions. + Also holds various flags needed to avoid having to + pass them between functions all the time. + """ + # TODO: support for multiIndex + def __init__(self, name, pandas_sql_engine, frame=None, index=True, + if_exists='fail', prefix='pandas'): self.name = name self.pd_sql = pandas_sql_engine self.prefix = prefix @@ -400,13 +423,15 @@ def read(self, coerce_float=True, parse_dates=None, columns=None): data = result.fetchall() column_names = result.keys() - self.frame = _frame_from_data_and_columns(data, column_names, - index_col=self.index, - coerce_float=coerce_float) + self.frame = DataFrame.from_records( + data, columns=column_names, coerce_float=coerce_float) self._harmonize_columns(parse_dates=parse_dates) - # Assume that if the index was in prefix_index format, we gave it a name + if self.index is not None: + self.frame.set_index(self.index, inplace=True) + + # Assume if the index in prefix_index format, we gave it a name # and should return it nameless if self.index == self.prefix + '_index': self.frame.index.name = None @@ -442,13 +467,14 @@ def _create_table_statement(self): return Table(self.name, self.pd_sql.meta, *columns) def _harmonize_columns(self, parse_dates=None): - """ Make a data_frame's column type align with an sql_table column types + """ Make a data_frame's column type align with an sql_table + column types Need to work around limited NA value support. Floats are always fine, ints must always be floats if there are Null values. Booleans are hard because converting bool column with None replaces - all Nones with false. Therefore only convert bool if there are no NA - values. + all Nones with false. Therefore only convert bool if there are no + NA values. Datetimes should already be converted to np.datetime if supported, but here we also force conversion if required @@ -469,7 +495,7 @@ def _harmonize_columns(self, parse_dates=None): if col_type is datetime or col_type is date: if not issubclass(df_col.dtype.type, np.datetime64): - self.frame[col_name] = _parse_date_column(df_col) + self.frame[col_name] = _handle_date_column(df_col) elif col_type is float: # floats support NA, can always convert! @@ -486,7 +512,7 @@ def _harmonize_columns(self, parse_dates=None): fmt = parse_dates[col_name] except TypeError: fmt = None - self.frame[col_name] = _parse_date_column( + self.frame[col_name] = _handle_date_column( df_col, format=fmt) except KeyError: @@ -543,27 +569,6 @@ def to_sql(self, *args, **kwargs): raise ValueError( "PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor") - def _parse_date_columns(self, data_frame, parse_dates): - """ Force non-datetime columns to be read as such. - Supports both string formatted and integer timestamp columns - """ - # handle non-list entries for parse_dates gracefully - if parse_dates is True or parse_dates is None or parse_dates is False: - parse_dates = [] - - if not hasattr(parse_dates, '__iter__'): - parse_dates = [parse_dates] - - for col_name in parse_dates: - df_col = data_frame[col_name] - try: - fmt = parse_dates[col_name] - except TypeError: - fmt = None - data_frame[col_name] = _parse_date_column(df_col, format=fmt) - - return data_frame - class PandasSQLAlchemy(PandasSQL): @@ -593,17 +598,23 @@ def uquery(self, *args, **kwargs): result = self.execute(*args, **kwargs) return result.rowcount - def read_sql(self, sql, index_col=None, coerce_float=True, parse_dates=None, params=None): + def read_sql(self, sql, index_col=None, coerce_float=True, + parse_dates=None, params=None): args = _convert_params(sql, params) + result = self.execute(*args) data = result.fetchall() columns = result.keys() - data_frame = _frame_from_data_and_columns(data, columns, - index_col=index_col, - coerce_float=coerce_float) + data_frame = DataFrame.from_records( + data, columns=columns, coerce_float=coerce_float) + + _parse_date_columns(data_frame, parse_dates) + + if index_col is not None: + data_frame.set_index(index_col, inplace=True) - return self._parse_date_columns(data_frame, parse_dates) + return data_frame def to_sql(self, frame, name, if_exists='fail', index=True): table = PandasSQLTable( @@ -818,10 +829,14 @@ def read_sql(self, sql, index_col=None, coerce_float=True, params=None, data = self._fetchall_as_list(cursor) cursor.close() - data_frame = _frame_from_data_and_columns(data, columns, - index_col=index_col, - coerce_float=coerce_float) - return self._parse_date_columns(data_frame, parse_dates=parse_dates) + data_frame = DataFrame.from_records( + data, columns=columns, coerce_float=coerce_float) + + _parse_date_columns(data_frame, parse_dates) + + if index_col is not None: + data_frame.set_index(index_col, inplace=True) + return data_frame def _fetchall_as_list(self, cur): result = cur.fetchall() diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index ee4f38cd6e9bf..c11d64302d955 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -215,7 +215,7 @@ def _roundtrip(self): result = self.pandasSQL.read_sql('SELECT * FROM test_frame_roundtrip') result.set_index('pandas_index', inplace=True) - #result.index.astype(int) + # result.index.astype(int) result.index.name = None @@ -327,7 +327,9 @@ def test_roundtrip(self): sql.to_sql(self.test_frame1, 'test_frame_roundtrip', con=self.conn, flavor='sqlite') result = sql.read_sql( - 'SELECT * FROM test_frame_roundtrip', con=self.conn, flavor='sqlite') + 'SELECT * FROM test_frame_roundtrip', + con=self.conn, + flavor='sqlite') # HACK! result.index = self.test_frame1.index @@ -355,28 +357,51 @@ def test_date_parsing(self): df = sql.read_sql( "SELECT * FROM types_test_data", self.conn, flavor='sqlite') self.assertFalse( - issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") + issubclass(df.DateCol.dtype.type, np.datetime64), + "DateCol loaded with incorrect type") df = sql.read_sql("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates=['DateCol']) self.assertTrue( - issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") + issubclass(df.DateCol.dtype.type, np.datetime64), + "DateCol loaded with incorrect type") df = sql.read_sql("SELECT * FROM types_test_data", self.conn, - flavor='sqlite', parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) + flavor='sqlite', + parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) self.assertTrue( - issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") + issubclass(df.DateCol.dtype.type, np.datetime64), + "DateCol loaded with incorrect type") df = sql.read_sql("SELECT * FROM types_test_data", - self.conn, flavor='sqlite', parse_dates=['IntDateCol']) + self.conn, flavor='sqlite', + parse_dates=['IntDateCol']) + self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type") df = sql.read_sql("SELECT * FROM types_test_data", - self.conn, flavor='sqlite', parse_dates={'IntDateCol': 's'}) + self.conn, flavor='sqlite', + parse_dates={'IntDateCol': 's'}) + self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type") + def test_date_and_index(self): + """ Test case where same column appears in parse_date and index_col""" + + df = sql.read_sql("SELECT * FROM types_test_data", + self.conn, flavor='sqlite', + parse_dates=['DateCol', 'IntDateCol'], + index_col='DateCol') + self.assertTrue( + issubclass(df.index.dtype.type, np.datetime64), + "DateCol loaded with incorrect type") + + self.assertTrue( + issubclass(df.IntDateCol.dtype.type, np.datetime64), + "IntDateCol loaded with incorrect type") + class TestSQLAlchemy(PandasSQLTest): From 0ea6483ef0c312be5b2c2d2d494fffe5bf925ea0 Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Thu, 11 Jul 2013 10:01:37 -0400 Subject: [PATCH 07/16] ENH #4163 Use SQLAlchemy for DB abstraction TST Import sqlalchemy on Travis. DOC add docstrings to read sql ENH read_sql connects via Connection, Engine, file path, or :memory: string CLN Separate legacy code into new file, and fallback so that all old tests pass. TST to use sqlachemy syntax in tests CLN sql into classes, legacy passes FIX few engine vs con calls CLN pep8 cleanup add postgres support for pandas.io.sql.get_schema WIP: cleaup of sql io module - imported correct SQLALCHEMY type, delete redundant PandasSQLWithCon TODO: renamed _engine_read_table, need to think of a better name. TODO: clean up get_conneciton function ENH: cleanup of SQL io TODO: check that legacy mode works TODO: run tests correctly enabled coerce_float option Cleanup and bug-fixing mainly on legacy mode sql. IMPORTANT - changed legacy to require connection rather than cursor. This is still not yet finalized. TODO: tests and doc Added Test coverage for basic functionality using in-memory SQLite database Simplified API by automatically distinguishing between engine and connection. Added warnings --- pandas/io/sql_legacy.py | 332 +++++++++++++++++++ pandas/io/tests/data/iris.csv | 151 +++++++++ pandas/io/tests/test_sql_legacy.py | 497 +++++++++++++++++++++++++++++ 3 files changed, 980 insertions(+) create mode 100644 pandas/io/sql_legacy.py create mode 100644 pandas/io/tests/data/iris.csv create mode 100644 pandas/io/tests/test_sql_legacy.py diff --git a/pandas/io/sql_legacy.py b/pandas/io/sql_legacy.py new file mode 100644 index 0000000000000..a8a5d968dd02d --- /dev/null +++ b/pandas/io/sql_legacy.py @@ -0,0 +1,332 @@ +""" +Collection of query wrappers / abstractions to both facilitate data +retrieval and to reduce dependency on DB-specific API. +""" +from datetime import datetime, date + +import numpy as np +import traceback + +from pandas.core.datetools import format as date_format +from pandas.core.api import DataFrame, isnull + +#------------------------------------------------------------------------------ +# Helper execution function + + +def execute(sql, con, retry=True, cur=None, params=None): + """ + Execute the given SQL query using the provided connection object. + + Parameters + ---------- + sql: string + Query to be executed + con: database connection instance + Database connection. Must implement PEP249 (Database API v2.0). + retry: bool + Not currently implemented + cur: database cursor, optional + Must implement PEP249 (Datbase API v2.0). If cursor is not provided, + one will be obtained from the database connection. + params: list or tuple, optional + List of parameters to pass to execute method. + + Returns + ------- + Cursor object + """ + try: + if cur is None: + cur = con.cursor() + + if params is None: + cur.execute(sql) + else: + cur.execute(sql, params) + return cur + except Exception: + try: + con.rollback() + except Exception: # pragma: no cover + pass + + print ('Error on sql %s' % sql) + raise + + +def _safe_fetch(cur): + try: + result = cur.fetchall() + if not isinstance(result, list): + result = list(result) + return result + except Exception, e: # pragma: no cover + excName = e.__class__.__name__ + if excName == 'OperationalError': + return [] + + +def tquery(sql, con=None, cur=None, retry=True): + """ + Returns list of tuples corresponding to each row in given sql + query. + + If only one column selected, then plain list is returned. + + Parameters + ---------- + sql: string + SQL query to be executed + con: SQLConnection or DB API 2.0-compliant connection + cur: DB API 2.0 cursor + + Provide a specific connection or a specific cursor if you are executing a + lot of sequential statements and want to commit outside. + """ + cur = execute(sql, con, cur=cur) + result = _safe_fetch(cur) + + if con is not None: + try: + cur.close() + con.commit() + except Exception as e: + excName = e.__class__.__name__ + if excName == 'OperationalError': # pragma: no cover + print ('Failed to commit, may need to restart interpreter') + else: + raise + + traceback.print_exc() + if retry: + return tquery(sql, con=con, retry=False) + + if result and len(result[0]) == 1: + # python 3 compat + result = list(list(zip(*result))[0]) + elif result is None: # pragma: no cover + result = [] + + return result + + +def uquery(sql, con=None, cur=None, retry=True, params=None): + """ + Does the same thing as tquery, but instead of returning results, it + returns the number of rows affected. Good for update queries. + """ + cur = execute(sql, con, cur=cur, retry=retry, params=params) + + result = cur.rowcount + try: + con.commit() + except Exception as e: + excName = e.__class__.__name__ + if excName != 'OperationalError': + raise + + traceback.print_exc() + if retry: + print ('Looks like your connection failed, reconnecting...') + return uquery(sql, con, retry=False) + return result + + +def read_frame(sql, con, index_col=None, coerce_float=True, params=None): + """ + Returns a DataFrame corresponding to the result set of the query + string. + + Optionally provide an index_col parameter to use one of the + columns as the index. Otherwise will be 0 to len(results) - 1. + + Parameters + ---------- + sql: string + SQL query to be executed + con: DB connection object, optional + index_col: string, optional + column name to use for the returned DataFrame object. + coerce_float : boolean, default True + Attempt to convert values to non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets + params: list or tuple, optional + List of parameters to pass to execute method. + """ + cur = execute(sql, con, params=params) + rows = _safe_fetch(cur) + columns = [col_desc[0] for col_desc in cur.description] + + cur.close() + con.commit() + + result = DataFrame.from_records(rows, columns=columns, + coerce_float=coerce_float) + + if index_col is not None: + result = result.set_index(index_col) + + return result + +frame_query = read_frame +read_sql = read_frame + + +def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame: DataFrame + name: name of SQL table + con: an open SQL database connection object + flavor: {'sqlite', 'mysql', 'oracle'}, default 'sqlite' + if_exists: {'fail', 'replace', 'append'}, default 'fail' + fail: If table exists, do nothing. + replace: If table exists, drop it, recreate it, and insert data. + append: If table exists, insert data. Create if does not exist. + """ + + if 'append' in kwargs: + import warnings + warnings.warn("append is deprecated, use if_exists instead", + FutureWarning) + if kwargs['append']: + if_exists='append' + else: + if_exists='fail' + exists = table_exists(name, con, flavor) + if if_exists == 'fail' and exists: + raise ValueError, "Table '%s' already exists." % name + + #create or drop-recreate if necessary + create = None + if exists and if_exists == 'replace': + create = "DROP TABLE %s" % name + elif not exists: + create = get_schema(frame, name, flavor) + + if create is not None: + cur = con.cursor() + cur.execute(create) + cur.close() + + cur = con.cursor() + # Replace spaces in DataFrame column names with _. + safe_names = [s.replace(' ', '_').strip() for s in frame.columns] + flavor_picker = {'sqlite' : _write_sqlite, + 'mysql' : _write_mysql} + + func = flavor_picker.get(flavor, None) + if func is None: + raise NotImplementedError + func(frame, name, safe_names, cur) + cur.close() + con.commit() + + +def _write_sqlite(frame, table, names, cur): + bracketed_names = ['[' + column + ']' for column in names] + col_names = ','.join(bracketed_names) + wildcards = ','.join(['?'] * len(names)) + insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % ( + table, col_names, wildcards) + # pandas types are badly handled if there is only 1 column ( Issue #3628 ) + if not len(frame.columns )==1 : + data = [tuple(x) for x in frame.values] + else : + data = [tuple(x) for x in frame.values.tolist()] + cur.executemany(insert_query, data) + + +def _write_mysql(frame, table, names, cur): + bracketed_names = ['`' + column + '`' for column in names] + col_names = ','.join(bracketed_names) + wildcards = ','.join([r'%s'] * len(names)) + insert_query = "INSERT INTO %s (%s) VALUES (%s)" % ( + table, col_names, wildcards) + data = [tuple(x) for x in frame.values] + cur.executemany(insert_query, data) + + +def table_exists(name, con, flavor): + flavor_map = { + 'sqlite': ("SELECT name FROM sqlite_master " + "WHERE type='table' AND name='%s';") % name, + 'mysql' : "SHOW TABLES LIKE '%s'" % name} + query = flavor_map.get(flavor, None) + if query is None: + raise NotImplementedError + return len(tquery(query, con)) > 0 + + +def get_sqltype(pytype, flavor): + sqltype = {'mysql': 'VARCHAR (63)', + 'sqlite': 'TEXT'} + + if issubclass(pytype, np.floating): + sqltype['mysql'] = 'FLOAT' + sqltype['sqlite'] = 'REAL' + + if issubclass(pytype, np.integer): + #TODO: Refine integer size. + sqltype['mysql'] = 'BIGINT' + sqltype['sqlite'] = 'INTEGER' + + if issubclass(pytype, np.datetime64) or pytype is datetime: + # Caution: np.datetime64 is also a subclass of np.number. + sqltype['mysql'] = 'DATETIME' + sqltype['sqlite'] = 'TIMESTAMP' + + if pytype is datetime.date: + sqltype['mysql'] = 'DATE' + sqltype['sqlite'] = 'TIMESTAMP' + + if issubclass(pytype, np.bool_): + sqltype['sqlite'] = 'INTEGER' + + return sqltype[flavor] + + +def get_schema(frame, name, flavor, keys=None): + "Return a CREATE TABLE statement to suit the contents of a DataFrame." + lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) + # Replace spaces in DataFrame column names with _. + safe_columns = [s.replace(' ', '_').strip() for s in frame.dtypes.index] + column_types = zip(safe_columns, map(lookup_type, frame.dtypes)) + if flavor == 'sqlite': + columns = ',\n '.join('[%s] %s' % x for x in column_types) + else: + columns = ',\n '.join('`%s` %s' % x for x in column_types) + + keystr = '' + if keys is not None: + if isinstance(keys, basestring): + keys = (keys,) + keystr = ', PRIMARY KEY (%s)' % ','.join(keys) + template = """CREATE TABLE %(name)s ( + %(columns)s + %(keystr)s + );""" + create_statement = template % {'name': name, 'columns': columns, + 'keystr': keystr} + return create_statement + + +def sequence2dict(seq): + """Helper function for cx_Oracle. + + For each element in the sequence, creates a dictionary item equal + to the element and keyed by the position of the item in the list. + >>> sequence2dict(("Matt", 1)) + {'1': 'Matt', '2': 1} + + Source: + http://www.gingerandjohn.com/archives/2004/02/26/cx_oracle-executemany-example/ + """ + d = {} + for k,v in zip(range(1, 1 + len(seq)), seq): + d[str(k)] = v + return d diff --git a/pandas/io/tests/data/iris.csv b/pandas/io/tests/data/iris.csv new file mode 100644 index 0000000000000..c19b9c3688515 --- /dev/null +++ b/pandas/io/tests/data/iris.csv @@ -0,0 +1,151 @@ +SepalLength,SepalWidth,PetalLength,PetalWidth,Name +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica \ No newline at end of file diff --git a/pandas/io/tests/test_sql_legacy.py b/pandas/io/tests/test_sql_legacy.py new file mode 100644 index 0000000000000..3c6e992097d30 --- /dev/null +++ b/pandas/io/tests/test_sql_legacy.py @@ -0,0 +1,497 @@ +from __future__ import with_statement +from pandas.compat import StringIO +import unittest +import sqlite3 +import sys + +import warnings + +import nose + +import numpy as np + +from pandas.core.datetools import format as date_format +from pandas.core.api import DataFrame, isnull +from pandas.compat import StringIO, range, lrange +import pandas.compat as compat + +import pandas.io.sql as sql +from pandas.io.sql import DatabaseError +import pandas.util.testing as tm +from pandas import Series, Index, DataFrame +from datetime import datetime + +_formatters = { + datetime: lambda dt: "'%s'" % date_format(dt), + str: lambda x: "'%s'" % x, + np.str_: lambda x: "'%s'" % x, + compat.text_type: lambda x: "'%s'" % x, + compat.binary_type: lambda x: "'%s'" % x, + float: lambda x: "%.8f" % x, + int: lambda x: "%s" % x, + type(None): lambda x: "NULL", + np.float64: lambda x: "%.10f" % x, + bool: lambda x: "'%s'" % x, +} + +def format_query(sql, *args): + """ + + """ + processed_args = [] + for arg in args: + if isinstance(arg, float) and isnull(arg): + arg = None + + formatter = _formatters[type(arg)] + processed_args.append(formatter(arg)) + + return sql % tuple(processed_args) + +def _skip_if_no_MySQLdb(): + try: + import MySQLdb + except ImportError: + raise nose.SkipTest('MySQLdb not installed, skipping') + +class TestSQLite(unittest.TestCase): + + def setUp(self): + self.db = sqlite3.connect(':memory:') + + def test_basic(self): + frame = tm.makeTimeDataFrame() + self._check_roundtrip(frame) + + def test_write_row_by_row(self): + frame = tm.makeTimeDataFrame() + frame.ix[0, 0] = np.nan + create_sql = sql.get_schema(frame, 'test', 'sqlite') + cur = self.db.cursor() + cur.execute(create_sql) + + cur = self.db.cursor() + + ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" + for idx, row in frame.iterrows(): + fmt_sql = format_query(ins, *row) + sql.tquery(fmt_sql, cur=cur) + + self.db.commit() + + result = sql.read_frame("select * from test", con=self.db) + result.index = frame.index + tm.assert_frame_equal(result, frame) + + def test_execute(self): + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, 'test', 'sqlite') + cur = self.db.cursor() + cur.execute(create_sql) + ins = "INSERT INTO test VALUES (?, ?, ?, ?)" + + row = frame.ix[0] + sql.execute(ins, self.db, params=tuple(row)) + self.db.commit() + + result = sql.read_frame("select * from test", self.db) + result.index = frame.index[:1] + tm.assert_frame_equal(result, frame[:1]) + + def test_schema(self): + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, 'test', 'sqlite') + lines = create_sql.splitlines() + for l in lines: + tokens = l.split(' ') + if len(tokens) == 2 and tokens[0] == 'A': + self.assert_(tokens[1] == 'DATETIME') + + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, 'test', 'sqlite', keys=['A', 'B'],) + lines = create_sql.splitlines() + self.assert_('PRIMARY KEY (A,B)' in create_sql) + cur = self.db.cursor() + cur.execute(create_sql) + + def test_execute_fail(self): + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a, b) + ); + """ + cur = self.db.cursor() + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) + sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) + + try: + sys.stdout = StringIO() + self.assertRaises(Exception, sql.execute, + 'INSERT INTO test VALUES("foo", "bar", 7)', + self.db) + finally: + sys.stdout = sys.__stdout__ + + def test_execute_closed_connection(self): + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a, b) + ); + """ + cur = self.db.cursor() + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) + self.db.close() + try: + sys.stdout = StringIO() + self.assertRaises(Exception, sql.tquery, "select * from test", + con=self.db) + finally: + sys.stdout = sys.__stdout__ + + def test_na_roundtrip(self): + pass + + def _check_roundtrip(self, frame): + sql.write_frame(frame, name='test_table', con=self.db) + result = sql.read_frame("select * from test_table", self.db) + + # HACK! Change this once indexes are handled properly. + result.index = frame.index + + expected = frame + tm.assert_frame_equal(result, expected) + + frame['txt'] = ['a'] * len(frame) + frame2 = frame.copy() + frame2['Idx'] = Index(lrange(len(frame2))) + 10 + sql.write_frame(frame2, name='test_table2', con=self.db) + result = sql.read_frame("select * from test_table2", self.db, + index_col='Idx') + expected = frame.copy() + expected.index = Index(lrange(len(frame2))) + 10 + expected.index.name = 'Idx' + print(expected.index.names) + print(result.index.names) + tm.assert_frame_equal(expected, result) + + def test_tquery(self): + frame = tm.makeTimeDataFrame() + sql.write_frame(frame, name='test_table', con=self.db) + result = sql.tquery("select A from test_table", self.db) + expected = frame.A + result = Series(result, frame.index) + tm.assert_series_equal(result, expected) + + try: + sys.stdout = StringIO() + self.assertRaises(DatabaseError, sql.tquery, + 'select * from blah', con=self.db) + + self.assertRaises(DatabaseError, sql.tquery, + 'select * from blah', con=self.db, retry=True) + finally: + sys.stdout = sys.__stdout__ + + def test_uquery(self): + frame = tm.makeTimeDataFrame() + sql.write_frame(frame, name='test_table', con=self.db) + stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' + self.assertEqual(sql.uquery(stmt, con=self.db), 1) + + try: + sys.stdout = StringIO() + + self.assertRaises(DatabaseError, sql.tquery, + 'insert into blah values (1)', con=self.db) + + self.assertRaises(DatabaseError, sql.tquery, + 'insert into blah values (1)', con=self.db, + retry=True) + finally: + sys.stdout = sys.__stdout__ + + def test_keyword_as_column_names(self): + ''' + ''' + df = DataFrame({'From':np.ones(5)}) + sql.write_frame(df, con = self.db, name = 'testkeywords') + + def test_onecolumn_of_integer(self): + ''' + GH 3628 + a column_of_integers dataframe should transfer well to sql + ''' + mono_df=DataFrame([1 , 2], columns=['c0']) + sql.write_frame(mono_df, con = self.db, name = 'mono_df') + # computing the sum via sql + con_x=self.db + the_sum=sum([my_c0[0] for my_c0 in con_x.execute("select * from mono_df")]) + # it should not fail, and gives 3 ( Issue #3628 ) + self.assertEqual(the_sum , 3) + + result = sql.read_frame("select * from mono_df",con_x) + tm.assert_frame_equal(result,mono_df) + + +class TestMySQL(unittest.TestCase): + + def setUp(self): + _skip_if_no_MySQLdb() + import MySQLdb + try: + # Try Travis defaults. + # No real user should allow root access with a blank password. + self.db = MySQLdb.connect(host='localhost', user='root', passwd='', + db='pandas_nosetest') + except: + pass + else: + return + try: + self.db = MySQLdb.connect(read_default_group='pandas') + except MySQLdb.ProgrammingError as e: + raise nose.SkipTest( + "Create a group of connection parameters under the heading " + "[pandas] in your system's mysql default file, " + "typically located at ~/.my.cnf or /etc/.my.cnf. ") + except MySQLdb.Error as e: + raise nose.SkipTest( + "Cannot connect to database. " + "Create a group of connection parameters under the heading " + "[pandas] in your system's mysql default file, " + "typically located at ~/.my.cnf or /etc/.my.cnf. ") + + def test_basic(self): + _skip_if_no_MySQLdb() + frame = tm.makeTimeDataFrame() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "For more robust support.*") + self._check_roundtrip(frame) + + def test_write_row_by_row(self): + _skip_if_no_MySQLdb() + frame = tm.makeTimeDataFrame() + frame.ix[0, 0] = np.nan + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = sql.get_schema(frame, 'test', 'mysql') + cur = self.db.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" + for idx, row in frame.iterrows(): + fmt_sql = format_query(ins, *row) + sql.tquery(fmt_sql, cur=cur) + + self.db.commit() + + result = sql.read_frame("select * from test", con=self.db) + result.index = frame.index + tm.assert_frame_equal(result, frame) + + def test_execute(self): + _skip_if_no_MySQLdb() + frame = tm.makeTimeDataFrame() + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = sql.get_schema(frame, 'test', 'mysql') + cur = self.db.cursor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Unknown table.*") + cur.execute(drop_sql) + cur.execute(create_sql) + ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" + + row = frame.ix[0] + sql.execute(ins, self.db, params=tuple(row)) + self.db.commit() + + result = sql.read_frame("select * from test", self.db) + result.index = frame.index[:1] + tm.assert_frame_equal(result, frame[:1]) + + def test_schema(self): + _skip_if_no_MySQLdb() + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, 'test', 'mysql') + lines = create_sql.splitlines() + for l in lines: + tokens = l.split(' ') + if len(tokens) == 2 and tokens[0] == 'A': + self.assert_(tokens[1] == 'DATETIME') + + frame = tm.makeTimeDataFrame() + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = sql.get_schema(frame, 'test', 'mysql', keys=['A', 'B'],) + lines = create_sql.splitlines() + self.assert_('PRIMARY KEY (A,B)' in create_sql) + cur = self.db.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + + def test_execute_fail(self): + _skip_if_no_MySQLdb() + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a(5), b(5)) + ); + """ + cur = self.db.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) + sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) + + try: + sys.stdout = StringIO() + self.assertRaises(Exception, sql.execute, + 'INSERT INTO test VALUES("foo", "bar", 7)', + self.db) + finally: + sys.stdout = sys.__stdout__ + + def test_execute_closed_connection(self): + _skip_if_no_MySQLdb() + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a(5), b(5)) + ); + """ + cur = self.db.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) + self.db.close() + try: + sys.stdout = StringIO() + self.assertRaises(Exception, sql.tquery, "select * from test", + con=self.db) + finally: + sys.stdout = sys.__stdout__ + + def test_na_roundtrip(self): + _skip_if_no_MySQLdb() + pass + + def _check_roundtrip(self, frame): + _skip_if_no_MySQLdb() + drop_sql = "DROP TABLE IF EXISTS test_table" + cur = self.db.cursor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Unknown table.*") + cur.execute(drop_sql) + sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') + result = sql.read_frame("select * from test_table", self.db) + + # HACK! Change this once indexes are handled properly. + result.index = frame.index + result.index.name = frame.index.name + + expected = frame + tm.assert_frame_equal(result, expected) + + frame['txt'] = ['a'] * len(frame) + frame2 = frame.copy() + index = Index(lrange(len(frame2))) + 10 + frame2['Idx'] = index + drop_sql = "DROP TABLE IF EXISTS test_table2" + cur = self.db.cursor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Unknown table.*") + cur.execute(drop_sql) + sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql') + result = sql.read_frame("select * from test_table2", self.db, + index_col='Idx') + expected = frame.copy() + + # HACK! Change this once indexes are handled properly. + expected.index = index + expected.index.names = result.index.names + tm.assert_frame_equal(expected, result) + + def test_tquery(self): + try: + import MySQLdb + except ImportError: + raise nose.SkipTest + frame = tm.makeTimeDataFrame() + drop_sql = "DROP TABLE IF EXISTS test_table" + cur = self.db.cursor() + cur.execute(drop_sql) + sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') + result = sql.tquery("select A from test_table", self.db) + expected = frame.A + result = Series(result, frame.index) + tm.assert_series_equal(result, expected) + + try: + sys.stdout = StringIO() + self.assertRaises(DatabaseError, sql.tquery, + 'select * from blah', con=self.db) + + self.assertRaises(DatabaseError, sql.tquery, + 'select * from blah', con=self.db, retry=True) + finally: + sys.stdout = sys.__stdout__ + + def test_uquery(self): + try: + import MySQLdb + except ImportError: + raise nose.SkipTest + frame = tm.makeTimeDataFrame() + drop_sql = "DROP TABLE IF EXISTS test_table" + cur = self.db.cursor() + cur.execute(drop_sql) + sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') + stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' + self.assertEqual(sql.uquery(stmt, con=self.db), 1) + + try: + sys.stdout = StringIO() + + self.assertRaises(DatabaseError, sql.tquery, + 'insert into blah values (1)', con=self.db) + + self.assertRaises(DatabaseError, sql.tquery, + 'insert into blah values (1)', con=self.db, + retry=True) + finally: + sys.stdout = sys.__stdout__ + + def test_keyword_as_column_names(self): + ''' + ''' + _skip_if_no_MySQLdb() + df = DataFrame({'From':np.ones(5)}) + sql.write_frame(df, name='testkeywords', con=self.db, + if_exists='replace', flavor='mysql') + +if __name__ == '__main__': + # unittest.main() + # nose.runmodule(argv=[__file__,'-vvs','-x', '--pdb-failure'], + # exit=False) + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) From 8ec0c334ce7646b1089a5916e583d79b757b5853 Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Tue, 14 Jan 2014 22:11:09 +0000 Subject: [PATCH 08/16] ENH #4163 Added tests and documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial draft of doc updates minor doc updates Added tests and reduced code repetition. Updated Docs. Added test coverage for legacy names Documentation updates, more tests Added depreciation warnings for legacy names. Updated docs and test doc build ENH #4163 - finalized tests and docs, ready for wider useā€¦ TST added sqlalchemy to TravisCI build dep for py 2.7 and 3.3 TST Import sqlalchemy on Travis. DOC add docstrings to read sql ENH read_sql connects via Connection, Engine, file path, or :memory: string CLN Separate legacy code into new file, and fallback so that all old tests pass. ENH #4163 added version added coment ENH #4163 added depreciation warning for tquery and uquery ENH #4163 Documentation and tests --- ci/requirements-2.7.txt | 1 + ci/requirements-2.7_LOCALE.txt | 1 + ci/requirements-3.3.txt | 1 + pandas/io/sql_legacy.py | 332 ------------------- pandas/io/tests/test_sql_legacy.py | 497 ----------------------------- 5 files changed, 3 insertions(+), 829 deletions(-) delete mode 100644 pandas/io/sql_legacy.py delete mode 100644 pandas/io/tests/test_sql_legacy.py diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt index c7cf69bc92927..f9ccc54fbbcb6 100644 --- a/ci/requirements-2.7.txt +++ b/ci/requirements-2.7.txt @@ -19,3 +19,4 @@ scipy==0.10.0 beautifulsoup4==4.2.1 statsmodels==0.5.0 bigquery==2.0.17 +sqlalchemy==0.8.1 diff --git a/ci/requirements-2.7_LOCALE.txt b/ci/requirements-2.7_LOCALE.txt index 763af87f7394c..e45c27141c907 100644 --- a/ci/requirements-2.7_LOCALE.txt +++ b/ci/requirements-2.7_LOCALE.txt @@ -15,3 +15,4 @@ scipy==0.10.0 beautifulsoup4==4.2.1 statsmodels==0.5.0 bigquery==2.0.17 +sqlalchemy==0.8.1 diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt index 480fde477d88b..73009b572c4c2 100644 --- a/ci/requirements-3.3.txt +++ b/ci/requirements-3.3.txt @@ -14,3 +14,4 @@ lxml==3.2.1 scipy==0.12.0 beautifulsoup4==4.2.1 statsmodels==0.4.3 +sqlalchemy==0.9.1 diff --git a/pandas/io/sql_legacy.py b/pandas/io/sql_legacy.py deleted file mode 100644 index a8a5d968dd02d..0000000000000 --- a/pandas/io/sql_legacy.py +++ /dev/null @@ -1,332 +0,0 @@ -""" -Collection of query wrappers / abstractions to both facilitate data -retrieval and to reduce dependency on DB-specific API. -""" -from datetime import datetime, date - -import numpy as np -import traceback - -from pandas.core.datetools import format as date_format -from pandas.core.api import DataFrame, isnull - -#------------------------------------------------------------------------------ -# Helper execution function - - -def execute(sql, con, retry=True, cur=None, params=None): - """ - Execute the given SQL query using the provided connection object. - - Parameters - ---------- - sql: string - Query to be executed - con: database connection instance - Database connection. Must implement PEP249 (Database API v2.0). - retry: bool - Not currently implemented - cur: database cursor, optional - Must implement PEP249 (Datbase API v2.0). If cursor is not provided, - one will be obtained from the database connection. - params: list or tuple, optional - List of parameters to pass to execute method. - - Returns - ------- - Cursor object - """ - try: - if cur is None: - cur = con.cursor() - - if params is None: - cur.execute(sql) - else: - cur.execute(sql, params) - return cur - except Exception: - try: - con.rollback() - except Exception: # pragma: no cover - pass - - print ('Error on sql %s' % sql) - raise - - -def _safe_fetch(cur): - try: - result = cur.fetchall() - if not isinstance(result, list): - result = list(result) - return result - except Exception, e: # pragma: no cover - excName = e.__class__.__name__ - if excName == 'OperationalError': - return [] - - -def tquery(sql, con=None, cur=None, retry=True): - """ - Returns list of tuples corresponding to each row in given sql - query. - - If only one column selected, then plain list is returned. - - Parameters - ---------- - sql: string - SQL query to be executed - con: SQLConnection or DB API 2.0-compliant connection - cur: DB API 2.0 cursor - - Provide a specific connection or a specific cursor if you are executing a - lot of sequential statements and want to commit outside. - """ - cur = execute(sql, con, cur=cur) - result = _safe_fetch(cur) - - if con is not None: - try: - cur.close() - con.commit() - except Exception as e: - excName = e.__class__.__name__ - if excName == 'OperationalError': # pragma: no cover - print ('Failed to commit, may need to restart interpreter') - else: - raise - - traceback.print_exc() - if retry: - return tquery(sql, con=con, retry=False) - - if result and len(result[0]) == 1: - # python 3 compat - result = list(list(zip(*result))[0]) - elif result is None: # pragma: no cover - result = [] - - return result - - -def uquery(sql, con=None, cur=None, retry=True, params=None): - """ - Does the same thing as tquery, but instead of returning results, it - returns the number of rows affected. Good for update queries. - """ - cur = execute(sql, con, cur=cur, retry=retry, params=params) - - result = cur.rowcount - try: - con.commit() - except Exception as e: - excName = e.__class__.__name__ - if excName != 'OperationalError': - raise - - traceback.print_exc() - if retry: - print ('Looks like your connection failed, reconnecting...') - return uquery(sql, con, retry=False) - return result - - -def read_frame(sql, con, index_col=None, coerce_float=True, params=None): - """ - Returns a DataFrame corresponding to the result set of the query - string. - - Optionally provide an index_col parameter to use one of the - columns as the index. Otherwise will be 0 to len(results) - 1. - - Parameters - ---------- - sql: string - SQL query to be executed - con: DB connection object, optional - index_col: string, optional - column name to use for the returned DataFrame object. - coerce_float : boolean, default True - Attempt to convert values to non-string, non-numeric objects (like - decimal.Decimal) to floating point, useful for SQL result sets - params: list or tuple, optional - List of parameters to pass to execute method. - """ - cur = execute(sql, con, params=params) - rows = _safe_fetch(cur) - columns = [col_desc[0] for col_desc in cur.description] - - cur.close() - con.commit() - - result = DataFrame.from_records(rows, columns=columns, - coerce_float=coerce_float) - - if index_col is not None: - result = result.set_index(index_col) - - return result - -frame_query = read_frame -read_sql = read_frame - - -def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): - """ - Write records stored in a DataFrame to a SQL database. - - Parameters - ---------- - frame: DataFrame - name: name of SQL table - con: an open SQL database connection object - flavor: {'sqlite', 'mysql', 'oracle'}, default 'sqlite' - if_exists: {'fail', 'replace', 'append'}, default 'fail' - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. - """ - - if 'append' in kwargs: - import warnings - warnings.warn("append is deprecated, use if_exists instead", - FutureWarning) - if kwargs['append']: - if_exists='append' - else: - if_exists='fail' - exists = table_exists(name, con, flavor) - if if_exists == 'fail' and exists: - raise ValueError, "Table '%s' already exists." % name - - #create or drop-recreate if necessary - create = None - if exists and if_exists == 'replace': - create = "DROP TABLE %s" % name - elif not exists: - create = get_schema(frame, name, flavor) - - if create is not None: - cur = con.cursor() - cur.execute(create) - cur.close() - - cur = con.cursor() - # Replace spaces in DataFrame column names with _. - safe_names = [s.replace(' ', '_').strip() for s in frame.columns] - flavor_picker = {'sqlite' : _write_sqlite, - 'mysql' : _write_mysql} - - func = flavor_picker.get(flavor, None) - if func is None: - raise NotImplementedError - func(frame, name, safe_names, cur) - cur.close() - con.commit() - - -def _write_sqlite(frame, table, names, cur): - bracketed_names = ['[' + column + ']' for column in names] - col_names = ','.join(bracketed_names) - wildcards = ','.join(['?'] * len(names)) - insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % ( - table, col_names, wildcards) - # pandas types are badly handled if there is only 1 column ( Issue #3628 ) - if not len(frame.columns )==1 : - data = [tuple(x) for x in frame.values] - else : - data = [tuple(x) for x in frame.values.tolist()] - cur.executemany(insert_query, data) - - -def _write_mysql(frame, table, names, cur): - bracketed_names = ['`' + column + '`' for column in names] - col_names = ','.join(bracketed_names) - wildcards = ','.join([r'%s'] * len(names)) - insert_query = "INSERT INTO %s (%s) VALUES (%s)" % ( - table, col_names, wildcards) - data = [tuple(x) for x in frame.values] - cur.executemany(insert_query, data) - - -def table_exists(name, con, flavor): - flavor_map = { - 'sqlite': ("SELECT name FROM sqlite_master " - "WHERE type='table' AND name='%s';") % name, - 'mysql' : "SHOW TABLES LIKE '%s'" % name} - query = flavor_map.get(flavor, None) - if query is None: - raise NotImplementedError - return len(tquery(query, con)) > 0 - - -def get_sqltype(pytype, flavor): - sqltype = {'mysql': 'VARCHAR (63)', - 'sqlite': 'TEXT'} - - if issubclass(pytype, np.floating): - sqltype['mysql'] = 'FLOAT' - sqltype['sqlite'] = 'REAL' - - if issubclass(pytype, np.integer): - #TODO: Refine integer size. - sqltype['mysql'] = 'BIGINT' - sqltype['sqlite'] = 'INTEGER' - - if issubclass(pytype, np.datetime64) or pytype is datetime: - # Caution: np.datetime64 is also a subclass of np.number. - sqltype['mysql'] = 'DATETIME' - sqltype['sqlite'] = 'TIMESTAMP' - - if pytype is datetime.date: - sqltype['mysql'] = 'DATE' - sqltype['sqlite'] = 'TIMESTAMP' - - if issubclass(pytype, np.bool_): - sqltype['sqlite'] = 'INTEGER' - - return sqltype[flavor] - - -def get_schema(frame, name, flavor, keys=None): - "Return a CREATE TABLE statement to suit the contents of a DataFrame." - lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) - # Replace spaces in DataFrame column names with _. - safe_columns = [s.replace(' ', '_').strip() for s in frame.dtypes.index] - column_types = zip(safe_columns, map(lookup_type, frame.dtypes)) - if flavor == 'sqlite': - columns = ',\n '.join('[%s] %s' % x for x in column_types) - else: - columns = ',\n '.join('`%s` %s' % x for x in column_types) - - keystr = '' - if keys is not None: - if isinstance(keys, basestring): - keys = (keys,) - keystr = ', PRIMARY KEY (%s)' % ','.join(keys) - template = """CREATE TABLE %(name)s ( - %(columns)s - %(keystr)s - );""" - create_statement = template % {'name': name, 'columns': columns, - 'keystr': keystr} - return create_statement - - -def sequence2dict(seq): - """Helper function for cx_Oracle. - - For each element in the sequence, creates a dictionary item equal - to the element and keyed by the position of the item in the list. - >>> sequence2dict(("Matt", 1)) - {'1': 'Matt', '2': 1} - - Source: - http://www.gingerandjohn.com/archives/2004/02/26/cx_oracle-executemany-example/ - """ - d = {} - for k,v in zip(range(1, 1 + len(seq)), seq): - d[str(k)] = v - return d diff --git a/pandas/io/tests/test_sql_legacy.py b/pandas/io/tests/test_sql_legacy.py deleted file mode 100644 index 3c6e992097d30..0000000000000 --- a/pandas/io/tests/test_sql_legacy.py +++ /dev/null @@ -1,497 +0,0 @@ -from __future__ import with_statement -from pandas.compat import StringIO -import unittest -import sqlite3 -import sys - -import warnings - -import nose - -import numpy as np - -from pandas.core.datetools import format as date_format -from pandas.core.api import DataFrame, isnull -from pandas.compat import StringIO, range, lrange -import pandas.compat as compat - -import pandas.io.sql as sql -from pandas.io.sql import DatabaseError -import pandas.util.testing as tm -from pandas import Series, Index, DataFrame -from datetime import datetime - -_formatters = { - datetime: lambda dt: "'%s'" % date_format(dt), - str: lambda x: "'%s'" % x, - np.str_: lambda x: "'%s'" % x, - compat.text_type: lambda x: "'%s'" % x, - compat.binary_type: lambda x: "'%s'" % x, - float: lambda x: "%.8f" % x, - int: lambda x: "%s" % x, - type(None): lambda x: "NULL", - np.float64: lambda x: "%.10f" % x, - bool: lambda x: "'%s'" % x, -} - -def format_query(sql, *args): - """ - - """ - processed_args = [] - for arg in args: - if isinstance(arg, float) and isnull(arg): - arg = None - - formatter = _formatters[type(arg)] - processed_args.append(formatter(arg)) - - return sql % tuple(processed_args) - -def _skip_if_no_MySQLdb(): - try: - import MySQLdb - except ImportError: - raise nose.SkipTest('MySQLdb not installed, skipping') - -class TestSQLite(unittest.TestCase): - - def setUp(self): - self.db = sqlite3.connect(':memory:') - - def test_basic(self): - frame = tm.makeTimeDataFrame() - self._check_roundtrip(frame) - - def test_write_row_by_row(self): - frame = tm.makeTimeDataFrame() - frame.ix[0, 0] = np.nan - create_sql = sql.get_schema(frame, 'test', 'sqlite') - cur = self.db.cursor() - cur.execute(create_sql) - - cur = self.db.cursor() - - ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" - for idx, row in frame.iterrows(): - fmt_sql = format_query(ins, *row) - sql.tquery(fmt_sql, cur=cur) - - self.db.commit() - - result = sql.read_frame("select * from test", con=self.db) - result.index = frame.index - tm.assert_frame_equal(result, frame) - - def test_execute(self): - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'sqlite') - cur = self.db.cursor() - cur.execute(create_sql) - ins = "INSERT INTO test VALUES (?, ?, ?, ?)" - - row = frame.ix[0] - sql.execute(ins, self.db, params=tuple(row)) - self.db.commit() - - result = sql.read_frame("select * from test", self.db) - result.index = frame.index[:1] - tm.assert_frame_equal(result, frame[:1]) - - def test_schema(self): - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'sqlite') - lines = create_sql.splitlines() - for l in lines: - tokens = l.split(' ') - if len(tokens) == 2 and tokens[0] == 'A': - self.assert_(tokens[1] == 'DATETIME') - - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'sqlite', keys=['A', 'B'],) - lines = create_sql.splitlines() - self.assert_('PRIMARY KEY (A,B)' in create_sql) - cur = self.db.cursor() - cur.execute(create_sql) - - def test_execute_fail(self): - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a, b) - ); - """ - cur = self.db.cursor() - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) - - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.execute, - 'INSERT INTO test VALUES("foo", "bar", 7)', - self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_execute_closed_connection(self): - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a, b) - ); - """ - cur = self.db.cursor() - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - self.db.close() - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.tquery, "select * from test", - con=self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_na_roundtrip(self): - pass - - def _check_roundtrip(self, frame): - sql.write_frame(frame, name='test_table', con=self.db) - result = sql.read_frame("select * from test_table", self.db) - - # HACK! Change this once indexes are handled properly. - result.index = frame.index - - expected = frame - tm.assert_frame_equal(result, expected) - - frame['txt'] = ['a'] * len(frame) - frame2 = frame.copy() - frame2['Idx'] = Index(lrange(len(frame2))) + 10 - sql.write_frame(frame2, name='test_table2', con=self.db) - result = sql.read_frame("select * from test_table2", self.db, - index_col='Idx') - expected = frame.copy() - expected.index = Index(lrange(len(frame2))) + 10 - expected.index.name = 'Idx' - print(expected.index.names) - print(result.index.names) - tm.assert_frame_equal(expected, result) - - def test_tquery(self): - frame = tm.makeTimeDataFrame() - sql.write_frame(frame, name='test_table', con=self.db) - result = sql.tquery("select A from test_table", self.db) - expected = frame.A - result = Series(result, frame.index) - tm.assert_series_equal(result, expected) - - try: - sys.stdout = StringIO() - self.assertRaises(DatabaseError, sql.tquery, - 'select * from blah', con=self.db) - - self.assertRaises(DatabaseError, sql.tquery, - 'select * from blah', con=self.db, retry=True) - finally: - sys.stdout = sys.__stdout__ - - def test_uquery(self): - frame = tm.makeTimeDataFrame() - sql.write_frame(frame, name='test_table', con=self.db) - stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' - self.assertEqual(sql.uquery(stmt, con=self.db), 1) - - try: - sys.stdout = StringIO() - - self.assertRaises(DatabaseError, sql.tquery, - 'insert into blah values (1)', con=self.db) - - self.assertRaises(DatabaseError, sql.tquery, - 'insert into blah values (1)', con=self.db, - retry=True) - finally: - sys.stdout = sys.__stdout__ - - def test_keyword_as_column_names(self): - ''' - ''' - df = DataFrame({'From':np.ones(5)}) - sql.write_frame(df, con = self.db, name = 'testkeywords') - - def test_onecolumn_of_integer(self): - ''' - GH 3628 - a column_of_integers dataframe should transfer well to sql - ''' - mono_df=DataFrame([1 , 2], columns=['c0']) - sql.write_frame(mono_df, con = self.db, name = 'mono_df') - # computing the sum via sql - con_x=self.db - the_sum=sum([my_c0[0] for my_c0 in con_x.execute("select * from mono_df")]) - # it should not fail, and gives 3 ( Issue #3628 ) - self.assertEqual(the_sum , 3) - - result = sql.read_frame("select * from mono_df",con_x) - tm.assert_frame_equal(result,mono_df) - - -class TestMySQL(unittest.TestCase): - - def setUp(self): - _skip_if_no_MySQLdb() - import MySQLdb - try: - # Try Travis defaults. - # No real user should allow root access with a blank password. - self.db = MySQLdb.connect(host='localhost', user='root', passwd='', - db='pandas_nosetest') - except: - pass - else: - return - try: - self.db = MySQLdb.connect(read_default_group='pandas') - except MySQLdb.ProgrammingError as e: - raise nose.SkipTest( - "Create a group of connection parameters under the heading " - "[pandas] in your system's mysql default file, " - "typically located at ~/.my.cnf or /etc/.my.cnf. ") - except MySQLdb.Error as e: - raise nose.SkipTest( - "Cannot connect to database. " - "Create a group of connection parameters under the heading " - "[pandas] in your system's mysql default file, " - "typically located at ~/.my.cnf or /etc/.my.cnf. ") - - def test_basic(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "For more robust support.*") - self._check_roundtrip(frame) - - def test_write_row_by_row(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - frame.ix[0, 0] = np.nan - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = sql.get_schema(frame, 'test', 'mysql') - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" - for idx, row in frame.iterrows(): - fmt_sql = format_query(ins, *row) - sql.tquery(fmt_sql, cur=cur) - - self.db.commit() - - result = sql.read_frame("select * from test", con=self.db) - result.index = frame.index - tm.assert_frame_equal(result, frame) - - def test_execute(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = sql.get_schema(frame, 'test', 'mysql') - cur = self.db.cursor() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Unknown table.*") - cur.execute(drop_sql) - cur.execute(create_sql) - ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" - - row = frame.ix[0] - sql.execute(ins, self.db, params=tuple(row)) - self.db.commit() - - result = sql.read_frame("select * from test", self.db) - result.index = frame.index[:1] - tm.assert_frame_equal(result, frame[:1]) - - def test_schema(self): - _skip_if_no_MySQLdb() - frame = tm.makeTimeDataFrame() - create_sql = sql.get_schema(frame, 'test', 'mysql') - lines = create_sql.splitlines() - for l in lines: - tokens = l.split(' ') - if len(tokens) == 2 and tokens[0] == 'A': - self.assert_(tokens[1] == 'DATETIME') - - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = sql.get_schema(frame, 'test', 'mysql', keys=['A', 'B'],) - lines = create_sql.splitlines() - self.assert_('PRIMARY KEY (A,B)' in create_sql) - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - - def test_execute_fail(self): - _skip_if_no_MySQLdb() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a(5), b(5)) - ); - """ - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.db) - - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.execute, - 'INSERT INTO test VALUES("foo", "bar", 7)', - self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_execute_closed_connection(self): - _skip_if_no_MySQLdb() - drop_sql = "DROP TABLE IF EXISTS test" - create_sql = """ - CREATE TABLE test - ( - a TEXT, - b TEXT, - c REAL, - PRIMARY KEY (a(5), b(5)) - ); - """ - cur = self.db.cursor() - cur.execute(drop_sql) - cur.execute(create_sql) - - sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db) - self.db.close() - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.tquery, "select * from test", - con=self.db) - finally: - sys.stdout = sys.__stdout__ - - def test_na_roundtrip(self): - _skip_if_no_MySQLdb() - pass - - def _check_roundtrip(self, frame): - _skip_if_no_MySQLdb() - drop_sql = "DROP TABLE IF EXISTS test_table" - cur = self.db.cursor() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Unknown table.*") - cur.execute(drop_sql) - sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') - result = sql.read_frame("select * from test_table", self.db) - - # HACK! Change this once indexes are handled properly. - result.index = frame.index - result.index.name = frame.index.name - - expected = frame - tm.assert_frame_equal(result, expected) - - frame['txt'] = ['a'] * len(frame) - frame2 = frame.copy() - index = Index(lrange(len(frame2))) + 10 - frame2['Idx'] = index - drop_sql = "DROP TABLE IF EXISTS test_table2" - cur = self.db.cursor() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Unknown table.*") - cur.execute(drop_sql) - sql.write_frame(frame2, name='test_table2', con=self.db, flavor='mysql') - result = sql.read_frame("select * from test_table2", self.db, - index_col='Idx') - expected = frame.copy() - - # HACK! Change this once indexes are handled properly. - expected.index = index - expected.index.names = result.index.names - tm.assert_frame_equal(expected, result) - - def test_tquery(self): - try: - import MySQLdb - except ImportError: - raise nose.SkipTest - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test_table" - cur = self.db.cursor() - cur.execute(drop_sql) - sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') - result = sql.tquery("select A from test_table", self.db) - expected = frame.A - result = Series(result, frame.index) - tm.assert_series_equal(result, expected) - - try: - sys.stdout = StringIO() - self.assertRaises(DatabaseError, sql.tquery, - 'select * from blah', con=self.db) - - self.assertRaises(DatabaseError, sql.tquery, - 'select * from blah', con=self.db, retry=True) - finally: - sys.stdout = sys.__stdout__ - - def test_uquery(self): - try: - import MySQLdb - except ImportError: - raise nose.SkipTest - frame = tm.makeTimeDataFrame() - drop_sql = "DROP TABLE IF EXISTS test_table" - cur = self.db.cursor() - cur.execute(drop_sql) - sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql') - stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' - self.assertEqual(sql.uquery(stmt, con=self.db), 1) - - try: - sys.stdout = StringIO() - - self.assertRaises(DatabaseError, sql.tquery, - 'insert into blah values (1)', con=self.db) - - self.assertRaises(DatabaseError, sql.tquery, - 'insert into blah values (1)', con=self.db, - retry=True) - finally: - sys.stdout = sys.__stdout__ - - def test_keyword_as_column_names(self): - ''' - ''' - _skip_if_no_MySQLdb() - df = DataFrame({'From':np.ones(5)}) - sql.write_frame(df, name='testkeywords', con=self.db, - if_exists='replace', flavor='mysql') - -if __name__ == '__main__': - # unittest.main() - # nose.runmodule(argv=[__file__,'-vvs','-x', '--pdb-failure'], - # exit=False) - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) From 6416e654fcf84d096348118dfa4be4b9dbaa7e9d Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Mon, 20 Jan 2014 21:43:23 +0000 Subject: [PATCH 09/16] ENH #4163 Added more robust type coertion, datetime parsing, and parse date options. Updated optional dependancies Added columns optional arg to read_table, removed failing legacy tests. Added columns to doc ENH #4163 Fixed class renaming, expanded docs ENH #4163 Fixed tests in legacy mode --- README.md | 1 + doc/source/install.rst | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index 3b5f69912823b..ec7b8b07f3e89 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,7 @@ pip install pandas - [Cython](http://www.cython.org): Only necessary to build development version. Version 0.17.1 or higher. - [SciPy](http://www.scipy.org): miscellaneous statistical functions - [PyTables](http://www.pytables.org): necessary for HDF5-based storage +- [SQLAlchemy](http://www.sqlalchemy.org): for SQL database support. Version 0.8.1 or higher recommended. - [matplotlib](http://matplotlib.sourceforge.net/): for plotting - [statsmodels](http://statsmodels.sourceforge.net/) - Needed for parts of `pandas.stats` diff --git a/doc/source/install.rst b/doc/source/install.rst index 631973934cc3b..f67bdc10a457f 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -95,6 +95,7 @@ Optional Dependencies version. Version 0.17.1 or higher. * `SciPy `__: miscellaneous statistical functions * `PyTables `__: necessary for HDF5-based storage + * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. * `matplotlib `__: for plotting * `statsmodels `__ * Needed for parts of :mod:`pandas.stats` From 9a1972a6ce1718ebf2e4adb0cade74507602f6a5 Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Thu, 6 Feb 2014 17:31:06 +0000 Subject: [PATCH 10/16] ENH #4163 Fixed missing basestring import for py3.3 compat --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index ce315375dffff..c2516de0913cf 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -5,7 +5,7 @@ from __future__ import print_function from datetime import datetime, date import warnings -from pandas.compat import range, lzip, map, zip, raise_with_traceback +from pandas.compat import range, lzip, map, zip, raise_with_traceback, basestring import numpy as np From 527fe5657fce4045240c80665d640102c79890a6 Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Thu, 6 Feb 2014 18:39:20 +0000 Subject: [PATCH 11/16] ENH #4163 Fixed missing string_types import for py3.3 compat --- pandas/io/sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c2516de0913cf..e705a3b20585a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -5,7 +5,7 @@ from __future__ import print_function from datetime import datetime, date import warnings -from pandas.compat import range, lzip, map, zip, raise_with_traceback, basestring +from pandas.compat import lzip, map, zip, raise_with_traceback, string_types import numpy as np @@ -444,7 +444,7 @@ def _index_name(self, index): return _safe_col_name(self.frame.index.name) else: return self.prefix + '_index' - elif isinstance(index, basestring): + elif isinstance(index, string_types): return index else: return None From b6e8ad5dd2608e38f512c1e7ded2c0f3d2a4c196 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 9 Feb 2014 23:44:52 +0100 Subject: [PATCH 12/16] TEST: add basic postgresql tests TEST: add postgresql to travis --- .travis.yml | 1 + ci/requirements-2.7.txt | 2 ++ ci/requirements-3.3.txt | 2 ++ pandas/io/tests/test_sql.py | 70 +++++++++++++++++++++++++++++++++++-- 4 files changed, 72 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 48199c57d8b49..3e930d306c1bc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -73,6 +73,7 @@ install: before_script: - mysql -e 'create database pandas_nosetest;' + - psql -c 'create database pandas_nosetest;' -U postgres script: - echo "script" diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt index 477ba83ddf141..b2d4ab500f08b 100644 --- a/ci/requirements-2.7.txt +++ b/ci/requirements-2.7.txt @@ -19,3 +19,5 @@ beautifulsoup4==4.2.1 statsmodels==0.5.0 bigquery==2.0.17 sqlalchemy==0.8.1 +pymysql==0.6.1 +psycopg2==2.5.2 diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt index 73009b572c4c2..7ac8f6f313b19 100644 --- a/ci/requirements-3.3.txt +++ b/ci/requirements-3.3.txt @@ -15,3 +15,5 @@ scipy==0.12.0 beautifulsoup4==4.2.1 statsmodels==0.4.3 sqlalchemy==0.9.1 +pymysql==0.6.1 +psycopg2==2.5.2 \ No newline at end of file diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 4785c4aa8b79d..c45202f80f2ca 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -36,11 +36,19 @@ `PetalLength` DOUBLE, `PetalWidth` DOUBLE, `Name` VARCHAR(200) + )""", + 'postgresql': """CREATE TABLE iris ( + "SepalLength" DOUBLE PRECISION, + "SepalWidth" DOUBLE PRECISION, + "PetalLength" DOUBLE PRECISION, + "PetalWidth" DOUBLE PRECISION, + "Name" VARCHAR(200) )""" }, 'insert_iris': { 'sqlite': """INSERT INTO iris VALUES(?, ?, ?, ?, ?)""", - 'mysql': """INSERT INTO iris VALUES(%s, %s, %s, %s, "%s");""" + 'mysql': """INSERT INTO iris VALUES(%s, %s, %s, %s, "%s");""", + 'postgresql': """INSERT INTO iris VALUES(%s, %s, %s, %s, %s);""" }, 'create_test_types': { 'sqlite': """CREATE TABLE types_test_data ( @@ -62,6 +70,16 @@ `BoolCol` BOOLEAN, `IntColWithNull` INTEGER, `BoolColWithNull` BOOLEAN + )""", + 'postgresql': """CREATE TABLE types_test_data ( + "TextCol" TEXT, + "DateCol" TIMESTAMP, + "IntDateCol" INTEGER, + "FloatCol" DOUBLE PRECISION, + "IntCol" INTEGER, + "BoolCol" BOOLEAN, + "IntColWithNull" INTEGER, + "BoolColWithNull" BOOLEAN )""" }, 'insert_test_types': { @@ -72,6 +90,10 @@ 'mysql': """ INSERT INTO types_test_data VALUES("%s", %s, %s, %s, %s, %s, %s, %s) + """, + 'postgresql': """ + INSERT INTO types_test_data + VALUES(%s, %s, %s, %s, %s, %s, %s, %s) """ } } @@ -504,8 +526,8 @@ def test_default_type_convertion(self): self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), "IntColWithNull loaded with incorrect type") # Non-native Bool column with NA values stays as float - self.assertTrue( - issubclass(df.BoolColWithNull.dtype.type, np.floating), "BoolCol loaded with incorrect type") + self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), + "BoolColWithNull loaded with incorrect type") def test_default_date_load(self): df = sql.read_table("types_test_data", self.conn) @@ -699,6 +721,48 @@ def test_default_date_load(self): self.assertTrue( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") + +class TestPostgreSQLAlchemy(TestSQLAlchemy): + flavor = 'postgresql' + + def connect(self): + return sqlalchemy.create_engine( + 'postgresql+{driver}://postgres@localhost/pandas_nosetest'.format(driver=self.driver)) + + def setUp(self): + if not SQLALCHEMY_INSTALLED: + raise nose.SkipTest('SQLAlchemy not installed') + + try: + import psycopg2 + self.driver = 'psycopg2' + + except ImportError: + raise nose.SkipTest + + self.conn = self.connect() + self.pandasSQL = sql.PandasSQLAlchemy(self.conn) + + self._load_iris_data() + self._load_raw_sql() + + self._load_test1_data() + + def tearDown(self): + c = self.conn.execute( + "SELECT table_name FROM information_schema.tables" + " WHERE table_schema = 'public'") + for table in c.fetchall(): + self.conn.execute("DROP TABLE %s" % table[0]) + + def test_default_date_load(self): + df = sql.read_table("types_test_data", self.conn) + + # IMPORTANT - sqlite has no native date type, so shouldn't parse, + # but PostgreSQL SHOULD be converted. + self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), + "DateCol loaded with incorrect type") + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) From 8c1f6dd9c463b7a0ff4251bd2a9fa612f41e0811 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 10 Feb 2014 18:08:04 +0100 Subject: [PATCH 13/16] TEST io.sql: sqlite tests to seperate class One base class for tests with sqlalchemy backend. So test classes for mysql and postgresql don't have to overwrite tests that are different for sqlite. --- pandas/io/tests/test_sql.py | 116 ++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 51 deletions(-) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index c45202f80f2ca..2be086cddf7c4 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -425,29 +425,12 @@ def test_date_and_index(self): "IntDateCol loaded with incorrect type") -class TestSQLAlchemy(PandasSQLTest): - - ''' - Test the sqlalchemy backend against an in-memory sqlite database. +class _TestSQLAlchemy(PandasSQLTest): + """ + Base class for testing the sqlalchemy backend. Subclasses for specific + database types are created below. Assume that sqlalchemy takes case of the DB specifics - ''' - flavor = 'sqlite' - - def connect(self): - return sqlalchemy.create_engine('sqlite:///:memory:') - - def setUp(self): - # Skip this test if SQLAlchemy not available - if not SQLALCHEMY_INSTALLED: - raise nose.SkipTest('SQLAlchemy not installed') - - self.conn = self.connect() - self.pandasSQL = sql.PandasSQLAlchemy(self.conn) - - self._load_iris_data() - self._load_raw_sql() - - self._load_test1_data() + """ def test_read_sql(self): self._read_sql_iris() @@ -513,20 +496,20 @@ def test_read_table_absent(self): ValueError, sql.read_table, "this_doesnt_exist", con=self.conn) def test_default_type_convertion(self): - """ Test default type conversion""" df = sql.read_table("types_test_data", self.conn) - self.assertTrue( - issubclass(df.FloatCol.dtype.type, np.floating), "FloatCol loaded with incorrect type") - self.assertTrue( - issubclass(df.IntCol.dtype.type, np.integer), "IntCol loaded with incorrect type") - self.assertTrue( - issubclass(df.BoolCol.dtype.type, np.integer), "BoolCol loaded with incorrect type") + + self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), + "FloatCol loaded with incorrect type") + self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), + "IntCol loaded with incorrect type") + self.assertTrue(issubclass(df.BoolCol.dtype.type, np.bool_), + "BoolCol loaded with incorrect type") # Int column with NA values stays as float self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), "IntColWithNull loaded with incorrect type") - # Non-native Bool column with NA values stays as float - self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), + # Bool column with NA values becomes object + self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.object), "BoolColWithNull loaded with incorrect type") def test_default_date_load(self): @@ -534,11 +517,10 @@ def test_default_date_load(self): # IMPORTANT - sqlite has no native date type, so shouldn't parse, but # MySQL SHOULD be converted. - self.assertFalse( + self.assertTrue( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") def test_date_parsing(self): - """ Test date parsing """ # No Parsing df = sql.read_table("types_test_data", self.conn) @@ -573,6 +555,54 @@ def test_date_parsing(self): "IntDateCol loaded with incorrect type") +class TestSQLAlchemy(_TestSQLAlchemy): + """ + Test the sqlalchemy backend against an in-memory sqlite database. + """ + flavor = 'sqlite' + + def connect(self): + return sqlalchemy.create_engine('sqlite:///:memory:') + + def setUp(self): + # Skip this test if SQLAlchemy not available + if not SQLALCHEMY_INSTALLED: + raise nose.SkipTest('SQLAlchemy not installed') + + self.conn = self.connect() + self.pandasSQL = sql.PandasSQLAlchemy(self.conn) + + self._load_iris_data() + self._load_raw_sql() + + self._load_test1_data() + + def test_default_type_convertion(self): + df = sql.read_table("types_test_data", self.conn) + + self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), + "FloatCol loaded with incorrect type") + self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), + "IntCol loaded with incorrect type") + # sqlite has no boolean type, so integer type is returned + self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer), + "BoolCol loaded with incorrect type") + + # Int column with NA values stays as float + self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), + "IntColWithNull loaded with incorrect type") + # Non-native Bool column with NA values stays as float + self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), + "BoolColWithNull loaded with incorrect type") + + def test_default_date_load(self): + df = sql.read_table("types_test_data", self.conn) + + # IMPORTANT - sqlite has no native date type, so shouldn't parse, but + self.assertFalse(issubclass(df.DateCol.dtype.type, np.datetime64), + "DateCol loaded with incorrect type") + + # --- Test SQLITE fallback class TestSQLite(PandasSQLTest): @@ -682,7 +712,7 @@ def tearDown(self): self.conn.close() -class TestMySQLAlchemy(TestSQLAlchemy): +class TestMySQLAlchemy(_TestSQLAlchemy): flavor = 'mysql' def connect(self): @@ -713,16 +743,8 @@ def tearDown(self): for table in c.fetchall(): self.conn.execute('DROP TABLE %s' % table[0]) - def test_default_date_load(self): - df = sql.read_table("types_test_data", self.conn) - - # IMPORTANT - sqlite has no native date type, so shouldn't parse, - # but MySQL SHOULD be converted. - self.assertTrue( - issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") - -class TestPostgreSQLAlchemy(TestSQLAlchemy): +class TestPostgreSQLAlchemy(_TestSQLAlchemy): flavor = 'postgresql' def connect(self): @@ -754,14 +776,6 @@ def tearDown(self): " WHERE table_schema = 'public'") for table in c.fetchall(): self.conn.execute("DROP TABLE %s" % table[0]) - - def test_default_date_load(self): - df = sql.read_table("types_test_data", self.conn) - - # IMPORTANT - sqlite has no native date type, so shouldn't parse, - # but PostgreSQL SHOULD be converted. - self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], From 38abce7de65ebc734f8db3afc2e65a5b7b53dfb0 Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Tue, 11 Feb 2014 15:22:29 +0000 Subject: [PATCH 14/16] Added interval type --- pandas/io/sql.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 989f6983b28d3..59f1f739b0f13 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -3,7 +3,7 @@ retrieval and to reduce dependency on DB-specific API. """ from __future__ import print_function -from datetime import datetime, date +from datetime import datetime, date, timedelta import warnings from pandas.compat import lzip, map, zip, raise_with_traceback, string_types import numpy as np @@ -538,7 +538,7 @@ def _harmonize_columns(self, parse_dates=None): pass # this column not in results def _sqlalchemy_type(self, dtype): - from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date + from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date, Interval pytype = dtype.type @@ -547,6 +547,9 @@ def _sqlalchemy_type(self, dtype): if issubclass(pytype, np.datetime64) or pytype is datetime: # Caution: np.datetime64 is also a subclass of np.number. return DateTime + if issubclass(pytype, np.timedelta64) or pytype is timedelta: + # Caution: np.datetime64 is also a subclass of np.number. + return Interval if issubclass(pytype, np.floating): return Float if issubclass(pytype, np.integer): From b77ab49bc995887e0f2ecaa4825bc7b09d28c02d Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Tue, 11 Feb 2014 15:48:44 +0000 Subject: [PATCH 15/16] Minor name change --- pandas/io/tests/test_sql.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 2be086cddf7c4..110e4bc85b88d 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -426,6 +426,7 @@ def test_date_and_index(self): class _TestSQLAlchemy(PandasSQLTest): + """ Base class for testing the sqlalchemy backend. Subclasses for specific database types are created below. @@ -509,7 +510,7 @@ def test_default_type_convertion(self): self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), "IntColWithNull loaded with incorrect type") # Bool column with NA values becomes object - self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.object), + self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.object), "BoolColWithNull loaded with incorrect type") def test_default_date_load(self): @@ -555,7 +556,8 @@ def test_date_parsing(self): "IntDateCol loaded with incorrect type") -class TestSQLAlchemy(_TestSQLAlchemy): +class TestSQLiteAlchemy(_TestSQLAlchemy): + """ Test the sqlalchemy backend against an in-memory sqlite database. """ @@ -592,14 +594,14 @@ def test_default_type_convertion(self): self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), "IntColWithNull loaded with incorrect type") # Non-native Bool column with NA values stays as float - self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), + self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), "BoolColWithNull loaded with incorrect type") def test_default_date_load(self): df = sql.read_table("types_test_data", self.conn) # IMPORTANT - sqlite has no native date type, so shouldn't parse, but - self.assertFalse(issubclass(df.DateCol.dtype.type, np.datetime64), + self.assertFalse(issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") @@ -746,30 +748,30 @@ def tearDown(self): class TestPostgreSQLAlchemy(_TestSQLAlchemy): flavor = 'postgresql' - + def connect(self): return sqlalchemy.create_engine( 'postgresql+{driver}://postgres@localhost/pandas_nosetest'.format(driver=self.driver)) - + def setUp(self): if not SQLALCHEMY_INSTALLED: raise nose.SkipTest('SQLAlchemy not installed') - + try: import psycopg2 self.driver = 'psycopg2' - + except ImportError: raise nose.SkipTest - + self.conn = self.connect() self.pandasSQL = sql.PandasSQLAlchemy(self.conn) - + self._load_iris_data() self._load_raw_sql() - + self._load_test1_data() - + def tearDown(self): c = self.conn.execute( "SELECT table_name FROM information_schema.tables" From 78cbc0e1f765d83971f56bc3d542d203de146329 Mon Sep 17 00:00:00 2001 From: Jonathan Chambers Date: Thu, 20 Feb 2014 13:34:39 +0000 Subject: [PATCH 16/16] ENH: performance improvements on write - tradoff higher memory use for faster writes. --- pandas/io/sql.py | 73 ++++++++++++++++++++++++------------- pandas/io/tests/test_sql.py | 2 +- 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 59f1f739b0f13..eaa664839dd60 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2,16 +2,17 @@ Collection of query wrappers / abstractions to both facilitate data retrieval and to reduce dependency on DB-specific API. """ -from __future__ import print_function +from __future__ import print_function, division from datetime import datetime, date, timedelta import warnings from pandas.compat import lzip, map, zip, raise_with_traceback, string_types import numpy as np - +import pandas.core.common as com from pandas.core.api import DataFrame from pandas.core.base import PandasObject from pandas.tseries.tools import to_datetime +#from pandas.tseries.index import DateTimeIndex class SQLAlchemyRequired(ImportError): @@ -360,7 +361,7 @@ def pandasSQL_builder(con, flavor=None, meta=None): class PandasSQLTable(PandasObject): - """ + """ For mapping Pandas tables to SQL tables. Uses fact that table is reflected by SQLAlchemy to do better type convertions. @@ -419,13 +420,21 @@ def maybe_asscalar(self, i): def insert(self): ins = self.insert_statement() - - for t in self.frame.iterrows(): - data = dict((k, self.maybe_asscalar(v)) - for k, v in t[1].iteritems()) - if self.index is not None: + data_list = [] + # to avoid if check for every row + if self.index is not None: + for t in self.frame.iterrows(): + data = dict((k, self.maybe_asscalar(v)) + for k, v in t[1].iteritems()) data[self.index] = self.maybe_asscalar(t[0]) - self.pd_sql.execute(ins, **data) + data_list.append(data) + else: + for t in self.frame.iterrows(): + data = dict((k, self.maybe_asscalar(v)) + for k, v in t[1].iteritems()) + data_list.append(data) + #self.pd_sql.execute(ins, **data) + self.pd_sql.execute(ins, data_list) def read(self, coerce_float=True, parse_dates=None, columns=None): @@ -480,7 +489,7 @@ def _create_table_statement(self): if self.index is not None: columns.insert(0, Column(self.index, self._sqlalchemy_type( - self.frame.index.dtype), + self.frame.index), index=True)) return Table(self.name, self.pd_sql.meta, *columns) @@ -537,25 +546,33 @@ def _harmonize_columns(self, parse_dates=None): except KeyError: pass # this column not in results - def _sqlalchemy_type(self, dtype): + def _sqlalchemy_type(self, arr_or_dtype): from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date, Interval - pytype = dtype.type + if isinstance(arr_or_dtype, np.dtype): + tipo = arr_or_dtype + elif isinstance(arr_or_dtype, type): + tipo = np.dtype(arr_or_dtype) + else: + tipo = arr_or_dtype.dtype - if pytype is date: + if arr_or_dtype is date: return Date - if issubclass(pytype, np.datetime64) or pytype is datetime: - # Caution: np.datetime64 is also a subclass of np.number. - return DateTime - if issubclass(pytype, np.timedelta64) or pytype is timedelta: - # Caution: np.datetime64 is also a subclass of np.number. + if com.is_datetime64_dtype(arr_or_dtype): + try: + tz = arr_or_dtype.tzinfo + return DateTime(timezone=True) + except: + print('no tzinfo') + return DateTime + if com.is_timedelta64_dtype(arr_or_dtype): return Interval - if issubclass(pytype, np.floating): + if com.is_float_dtype(arr_or_dtype): return Float - if issubclass(pytype, np.integer): + if com.is_integer_dtype(arr_or_dtype): # TODO: Refine integer size. return Integer - if issubclass(pytype, np.bool_): + if issubclass(tipo, np.bool_): return Boolean return Text @@ -641,14 +658,18 @@ def to_sql(self, frame, name, if_exists='fail', index=True): name, self, frame=frame, index=index, if_exists=if_exists) table.insert() + @property + def tables(self): + return self.meta.tables + def has_table(self, name): - return self.engine.has_table(name) + if self.meta.tables[name] is not None: + return True + else: + return False def get_table(self, table_name): - if self.engine.has_table(table_name): - return self.meta.tables[table_name] - else: - return None + return self.meta.tables.get(table_name) def read_table(self, table_name, index_col=None, coerce_float=True, parse_dates=None, columns=None): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 110e4bc85b88d..c623ad43ee56c 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1,4 +1,4 @@ -from __future__ import print_function +from __future__ import print_function, division import unittest import sqlite3 import csv