Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH #6416 improve performance on SQL insert #6417

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
1259dca
ENH #4163 Use SQLAlchemy for DB abstraction
danielballan Jul 11, 2013
60590f2
ENH #4163 Added tests and documentation
Jan 14, 2014
ac6bf42
ENH #4163 Added more robust type coertion, datetime parsing, and pars…
Jan 20, 2014
6314e6f
ENH #4163 Tweaks to docs, avoid mutable default args, mysql tests
Jan 24, 2014
f156e81
ENH #4163 Introduce DataFrame Index support. Refactor to introduce Pa…
Jan 27, 2014
32b493a
ENH #4163 Fix bug in index + parse date interaction, added test case …
mangecoeur Feb 6, 2014
0ea6483
ENH #4163 Use SQLAlchemy for DB abstraction
danielballan Jul 11, 2013
8ec0c33
ENH #4163 Added tests and documentation
Jan 14, 2014
6416e65
ENH #4163 Added more robust type coertion, datetime parsing, and pars…
Jan 20, 2014
fbc14fa
Merge branch 'master' of https://github.com/mangecoeur/pandas
mangecoeur Feb 6, 2014
9a1972a
ENH #4163 Fixed missing basestring import for py3.3 compat
mangecoeur Feb 6, 2014
527fe56
ENH #4163 Fixed missing string_types import for py3.3 compat
mangecoeur Feb 6, 2014
b6e8ad5
TEST: add basic postgresql tests
jorisvandenbossche Feb 9, 2014
8c1f6dd
TEST io.sql: sqlite tests to seperate class
jorisvandenbossche Feb 10, 2014
3df075a
Merge remote-tracking branch 'upstream/master'
mangecoeur Feb 11, 2014
38abce7
Added interval type
mangecoeur Feb 11, 2014
cbc8354
Merge remote-tracking branch 'joris/sql-postgresql-tests' into sql-en…
mangecoeur Feb 11, 2014
b77ab49
Minor name change
mangecoeur Feb 11, 2014
78cbc0e
ENH: performance improvements on write - tradoff higher memory use fo…
mangecoeur Feb 20, 2014
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ install:

before_script:
- mysql -e 'create database pandas_nosetest;'
- psql -c 'create database pandas_nosetest;' -U postgres

script:
- echo "script"
Expand Down
1 change: 1 addition & 0 deletions ci/requirements-2.6.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ pytz==2013b
http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz
html5lib==1.0b2
bigquery==2.0.17
sqlalchemy==0.8.1
numexpr==1.4.2
sqlalchemy==0.8.1
2 changes: 2 additions & 0 deletions ci/requirements-2.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ beautifulsoup4==4.2.1
statsmodels==0.5.0
bigquery==2.0.17
sqlalchemy==0.8.1
pymysql==0.6.1
psycopg2==2.5.2
2 changes: 2 additions & 0 deletions ci/requirements-3.3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ scipy==0.12.0
beautifulsoup4==4.2.1
statsmodels==0.4.3
sqlalchemy==0.9.1
pymysql==0.6.1
psycopg2==2.5.2
76 changes: 50 additions & 26 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
Collection of query wrappers / abstractions to both facilitate data
retrieval and to reduce dependency on DB-specific API.
"""
from __future__ import print_function
from datetime import datetime, date
from __future__ import print_function, division
from datetime import datetime, date, timedelta
import warnings
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
import numpy as np


import pandas.core.common as com
from pandas.core.api import DataFrame
from pandas.core.base import PandasObject
from pandas.tseries.tools import to_datetime
#from pandas.tseries.index import DateTimeIndex


class SQLAlchemyRequired(ImportError):
Expand Down Expand Up @@ -360,7 +361,7 @@ def pandasSQL_builder(con, flavor=None, meta=None):


class PandasSQLTable(PandasObject):
"""
"""
For mapping Pandas tables to SQL tables.
Uses fact that table is reflected by SQLAlchemy to
do better type convertions.
Expand Down Expand Up @@ -419,13 +420,21 @@ def maybe_asscalar(self, i):

def insert(self):
ins = self.insert_statement()

for t in self.frame.iterrows():
data = dict((k, self.maybe_asscalar(v))
for k, v in t[1].iteritems())
if self.index is not None:
data_list = []
# to avoid if check for every row
if self.index is not None:
for t in self.frame.iterrows():
data = dict((k, self.maybe_asscalar(v))
for k, v in t[1].iteritems())
data[self.index] = self.maybe_asscalar(t[0])
self.pd_sql.execute(ins, **data)
data_list.append(data)
else:
for t in self.frame.iterrows():
data = dict((k, self.maybe_asscalar(v))
for k, v in t[1].iteritems())
data_list.append(data)
#self.pd_sql.execute(ins, **data)
self.pd_sql.execute(ins, data_list)

def read(self, coerce_float=True, parse_dates=None, columns=None):

Expand Down Expand Up @@ -480,7 +489,7 @@ def _create_table_statement(self):
if self.index is not None:
columns.insert(0, Column(self.index,
self._sqlalchemy_type(
self.frame.index.dtype),
self.frame.index),
index=True))

return Table(self.name, self.pd_sql.meta, *columns)
Expand Down Expand Up @@ -537,22 +546,33 @@ def _harmonize_columns(self, parse_dates=None):
except KeyError:
pass # this column not in results

def _sqlalchemy_type(self, dtype):
from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date
def _sqlalchemy_type(self, arr_or_dtype):
from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date, Interval

pytype = dtype.type
if isinstance(arr_or_dtype, np.dtype):
tipo = arr_or_dtype
elif isinstance(arr_or_dtype, type):
tipo = np.dtype(arr_or_dtype)
else:
tipo = arr_or_dtype.dtype

if pytype is date:
if arr_or_dtype is date:
return Date
if issubclass(pytype, np.datetime64) or pytype is datetime:
# Caution: np.datetime64 is also a subclass of np.number.
return DateTime
if issubclass(pytype, np.floating):
if com.is_datetime64_dtype(arr_or_dtype):
try:
tz = arr_or_dtype.tzinfo
return DateTime(timezone=True)
except:
print('no tzinfo')
return DateTime
if com.is_timedelta64_dtype(arr_or_dtype):
return Interval
if com.is_float_dtype(arr_or_dtype):
return Float
if issubclass(pytype, np.integer):
if com.is_integer_dtype(arr_or_dtype):
# TODO: Refine integer size.
return Integer
if issubclass(pytype, np.bool_):
if issubclass(tipo, np.bool_):
return Boolean
return Text

Expand Down Expand Up @@ -638,14 +658,18 @@ def to_sql(self, frame, name, if_exists='fail', index=True):
name, self, frame=frame, index=index, if_exists=if_exists)
table.insert()

@property
def tables(self):
return self.meta.tables

def has_table(self, name):
return self.engine.has_table(name)
if self.meta.tables[name] is not None:
return True
else:
return False

def get_table(self, table_name):
if self.engine.has_table(table_name):
return self.meta.tables[table_name]
else:
return None
return self.meta.tables.get(table_name)

def read_table(self, table_name, index_col=None, coerce_float=True,
parse_dates=None, columns=None):
Expand Down
Loading