Skip to content

Commit

Permalink
MDP-395 Backported commit 52f98541e2d from the ahl.mongo
Browse files Browse the repository at this point in the history
  • Loading branch information
Artjoms Iskovs committed Jul 14, 2015
1 parent 8069caa commit 0a2b6ca
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 28 deletions.
37 changes: 24 additions & 13 deletions arctic/tickstore/tickstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def read(self, symbol, date_range=None, columns=None, include_images=False, _tar
for b in self._collection.find(query, projection=projection).sort([(START, pymongo.ASCENDING)],):
data = self._read_bucket(b, column_set, column_dtypes,
multiple_symbols or (columns is not None and 'SYMBOL' in columns),
include_images)
include_images, columns)
for k, v in data.iteritems():
try:
rtn[k].append(v)
Expand Down Expand Up @@ -325,24 +325,35 @@ def _set_or_promote_dtype(self, column_dtypes, c, dtype):
dtype = np.dtype('f8')
column_dtypes[c] = np.promote_types(column_dtypes.get(c, dtype), dtype)

def _prepend_image(self, document, im):
def _prepend_image(self, document, im, rtn_length, column_dtypes, column_set, columns):
image = im[IMAGE]
first_dt = im['t']
if not first_dt.tzinfo:
first_dt = first_dt.replace(tzinfo=mktz('UTC'))
document[INDEX] = np.insert(document[INDEX], 0, np.uint64(datetime_to_ms(first_dt)))
for field in document:
if field == INDEX or document[field] is None:
for field in image:
if field == INDEX:
continue
if field in image:
val = image[field]
else:
logger.debug("Field %s is missing from image!", field)
val = np.nan
if columns and field not in columns:
continue
if field not in document or document[field] is None:
col_dtype = np.dtype(str if isinstance(image[field], basestring) else 'f8')
document[field] = self._empty(rtn_length, dtype=col_dtype)
column_dtypes[field] = col_dtype
column_set.add(field)
val = image[field]
document[field] = np.insert(document[field], 0, document[field].dtype.type(val))
# Now insert rows for fields in document that are not in the image
for field in set(document).difference(set(image)):
if field == INDEX:
continue
logger.debug("Field %s is missing from image!", field)
if document[field] is not None:
val = np.nan
document[field] = np.insert(document[field], 0, document[field].dtype.type(val))
return document

def _read_bucket(self, doc, columns, column_dtypes, include_symbol, include_images):
def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_images, columns):
rtn = {}
if doc[VERSION] != 3:
raise ArcticException("Unhandled document version: %s" % doc[VERSION])
Expand All @@ -351,8 +362,8 @@ def _read_bucket(self, doc, columns, column_dtypes, include_symbol, include_imag
rtn_length = len(rtn[INDEX])
if include_symbol:
rtn['SYMBOL'] = [doc[SYMBOL], ] * rtn_length
columns.update(doc[COLUMNS].keys())
for c in columns:
column_set.update(doc[COLUMNS].keys())
for c in column_set:
try:
coldata = doc[COLUMNS][c]
dtype = np.dtype(coldata[DTYPE])
Expand All @@ -366,7 +377,7 @@ def _read_bucket(self, doc, columns, column_dtypes, include_symbol, include_imag
rtn[c] = None

if include_images and doc.get(IMAGE_DOC, {}).get(IMAGE, {}):
rtn = self._prepend_image(rtn, doc[IMAGE_DOC])
rtn = self._prepend_image(rtn, doc[IMAGE_DOC], rtn_length, column_dtypes, column_set, columns)
return rtn

def _empty(self, length, dtype):
Expand Down
46 changes: 31 additions & 15 deletions tests/integration/tickstore/test_ts_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
import pytest
import pytz

from arctic import arctic as m
from arctic.date import DateRange, mktz, CLOSED_CLOSED, CLOSED_OPEN, OPEN_CLOSED, OPEN_OPEN
from arctic.exceptions import OverlappingDataException, NoDataFoundException
from arctic.exceptions import NoDataFoundException


def test_read(tickstore_lib):
Expand Down Expand Up @@ -356,11 +355,11 @@ def test_read_longs(tickstore_lib):
def test_read_with_image(tickstore_lib):
DUMMY_DATA = [
{'a': 1.,
'index': dt(2013, 6, 1, 12, 00, tzinfo=mktz('Europe/London'))
'index': dt(2013, 1, 1, 11, 00, tzinfo=mktz('Europe/London'))
},
{
'b': 4.,
'index': dt(2013, 6, 1, 13, 00, tzinfo=mktz('Europe/London'))
'index': dt(2013, 1, 1, 12, 00, tzinfo=mktz('Europe/London'))
},
]
# Add an image
Expand All @@ -371,21 +370,38 @@ def test_read_with_image(tickstore_lib):
{'a': 37.,
'c': 2.,
},
't': dt(2013, 6, 1, 11, 0)
't': dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London'))
}
}
}
)

tickstore_lib.read('SYM', columns=None)
read = tickstore_lib.read('SYM', columns=None, date_range=DateRange(dt(2013, 6, 1), dt(2013, 6, 2)))
assert read['a'][0] == 1
dr = DateRange(dt(2013, 1, 1), dt(2013, 1, 2))
# tickstore_lib.read('SYM', columns=None)
df = tickstore_lib.read('SYM', columns=None, date_range=dr)
assert df['a'][0] == 1

# Read with the image as well
read = tickstore_lib.read('SYM', columns=None, date_range=DateRange(dt(2013, 6, 1), dt(2013, 6, 2)),
include_images=True)
assert read['a'][0] == 37
assert read['a'][1] == 1
assert np.isnan(read['b'][0])
assert read['b'][2] == 4
assert read.index[0] == dt(2013, 6, 1, 11)
df = tickstore_lib.read('SYM', columns=None, date_range=dr, include_images=True)
assert set(df.columns) == set(('a', 'b', 'c'))
assert_array_equal(df['a'].values, np.array([37, 1, np.nan]))
assert_array_equal(df['b'].values, np.array([np.nan, np.nan, 4]))
assert_array_equal(df['c'].values, np.array([2, np.nan, np.nan]))
assert df.index[0] == dt(2013, 1, 1, 10)
assert df.index[1] == dt(2013, 1, 1, 11)
assert df.index[2] == dt(2013, 1, 1, 12)

df = tickstore_lib.read('SYM', columns=('a', 'b'), date_range=dr, include_images=True)
assert set(df.columns) == set(('a', 'b'))
assert_array_equal(df['a'].values, np.array([37, 1, np.nan]))
assert_array_equal(df['b'].values, np.array([np.nan, np.nan, 4]))
assert df.index[0] == dt(2013, 1, 1, 10)
assert df.index[1] == dt(2013, 1, 1, 11)
assert df.index[2] == dt(2013, 1, 1, 12)

df = tickstore_lib.read('SYM', columns=['c'], date_range=dr, include_images=True)
assert set(df.columns) == set(['c'])
assert_array_equal(df['c'].values, np.array([2, np.nan, np.nan]))
assert df.index[0] == dt(2013, 1, 1, 10)
assert df.index[1] == dt(2013, 1, 1, 11)
assert df.index[2] == dt(2013, 1, 1, 12)

0 comments on commit 0a2b6ca

Please sign in to comment.