Skip to content

Commit

Permalink
ENH pandas-dev#6416: performance improvements on write - tradoff high…
Browse files Browse the repository at this point in the history
…er memory use for faster writes.
  • Loading branch information
mangecoeur authored and gouthambs committed Mar 12, 2014
1 parent 066bd98 commit 6dbe501
Showing 1 changed file with 49 additions and 26 deletions.
75 changes: 49 additions & 26 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
Collection of query wrappers / abstractions to both facilitate data
retrieval and to reduce dependency on DB-specific API.
"""
from __future__ import print_function
from datetime import datetime, date
from __future__ import print_function, division
from datetime import datetime, date, timedelta
import warnings
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
import numpy as np


import pandas.core.common as com
from pandas.core.api import DataFrame
from pandas.core.base import PandasObject
from pandas.tseries.tools import to_datetime
Expand Down Expand Up @@ -360,7 +360,7 @@ def pandasSQL_builder(con, flavor=None, meta=None):


class PandasSQLTable(PandasObject):
"""
"""
For mapping Pandas tables to SQL tables.
Uses fact that table is reflected by SQLAlchemy to
do better type convertions.
Expand Down Expand Up @@ -419,13 +419,21 @@ def maybe_asscalar(self, i):

def insert(self):
ins = self.insert_statement()

for t in self.frame.iterrows():
data = dict((k, self.maybe_asscalar(v))
for k, v in t[1].iteritems())
if self.index is not None:
data_list = []
# to avoid if check for every row
if self.index is not None:
for t in self.frame.iterrows():
data = dict((k, self.maybe_asscalar(v))
for k, v in t[1].iteritems())
data[self.index] = self.maybe_asscalar(t[0])
self.pd_sql.execute(ins, **data)
data_list.append(data)
else:
for t in self.frame.iterrows():
data = dict((k, self.maybe_asscalar(v))
for k, v in t[1].iteritems())
data_list.append(data)
#self.pd_sql.execute(ins, **data)
self.pd_sql.execute(ins, data_list)

def read(self, coerce_float=True, parse_dates=None, columns=None):

Expand Down Expand Up @@ -480,7 +488,7 @@ def _create_table_statement(self):
if self.index is not None:
columns.insert(0, Column(self.index,
self._sqlalchemy_type(
self.frame.index.dtype),
self.frame.index),
index=True))

return Table(self.name, self.pd_sql.meta, *columns)
Expand Down Expand Up @@ -537,22 +545,33 @@ def _harmonize_columns(self, parse_dates=None):
except KeyError:
pass # this column not in results

def _sqlalchemy_type(self, dtype):
from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date
def _sqlalchemy_type(self, arr_or_dtype):
from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date, Interval

pytype = dtype.type
if isinstance(arr_or_dtype, np.dtype):
tipo = arr_or_dtype
elif isinstance(arr_or_dtype, type):
tipo = np.dtype(arr_or_dtype)
else:
tipo = arr_or_dtype.dtype

if pytype is date:
if arr_or_dtype is date:
return Date
if issubclass(pytype, np.datetime64) or pytype is datetime:
# Caution: np.datetime64 is also a subclass of np.number.
return DateTime
if issubclass(pytype, np.floating):
if com.is_datetime64_dtype(arr_or_dtype):
try:
tz = arr_or_dtype.tzinfo
return DateTime(timezone=True)
except:
print('no tzinfo')
return DateTime
if com.is_timedelta64_dtype(arr_or_dtype):
return Interval
if com.is_float_dtype(arr_or_dtype):
return Float
if issubclass(pytype, np.integer):
if com.is_integer_dtype(arr_or_dtype):
# TODO: Refine integer size.
return Integer
if issubclass(pytype, np.bool_):
if isinstance(tipo, np.bool_):
return Boolean
return Text

Expand Down Expand Up @@ -638,14 +657,18 @@ def to_sql(self, frame, name, if_exists='fail', index=True):
name, self, frame=frame, index=index, if_exists=if_exists)
table.insert()

@property
def tables(self):
return self.meta.tables

def has_table(self, name):
return self.engine.has_table(name)
if self.meta.tables.get(name) is not None:
return True
else:
return False

def get_table(self, table_name):
if self.engine.has_table(table_name):
return self.meta.tables[table_name]
else:
return None
return self.meta.tables.get(table_name)

def read_table(self, table_name, index_col=None, coerce_float=True,
parse_dates=None, columns=None):
Expand Down

0 comments on commit 6dbe501

Please sign in to comment.