-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add DataFeeder #6102
Add DataFeeder #6102
Changes from 3 commits
25b0c37
9d48911
e0a2300
1687254
2d520eb
549fac0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from __future__ import print_function | ||
from framework import Variable | ||
import core | ||
import layers | ||
import numpy | ||
import six.moves as six | ||
|
||
|
||
class DataToLoDTensorConverter(object): | ||
def __init__(self, place, lod_level, shape, batch_size_dim, dtype): | ||
self.place = place | ||
self.lod_level = lod_level | ||
self.shape = shape | ||
self.batch_size_dim = batch_size_dim | ||
if dtype == core.DataType.FP32: | ||
self.dtype = 'float32' | ||
elif dtype == core.DataType.INT64: | ||
self.dtype = 'int64' | ||
elif dtype == core.DataType.FP64: | ||
self.dtype = 'float64' | ||
elif dtype == core.DataType.INT32: | ||
self.dtype = 'int32' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
self.data = [] | ||
self.lod = [] | ||
|
||
for i in six.range(lod_level): | ||
self.lod.append([0]) | ||
|
||
def feed(self, data): | ||
self._feed_impl_(data, self.lod, self.lod_level) | ||
|
||
def _feed_impl_(self, data, lod, lod_level): | ||
if lod_level == 0: | ||
self.data.append(data) | ||
else: | ||
cur_lod_len = len(data) | ||
lod[-1].append(lod[-1][-1] + cur_lod_len) | ||
for each_data in data: | ||
self._feed_impl_(each_data, lod[:-1], lod_level - 1) | ||
|
||
def done(self): | ||
arr = numpy.array(self.data, dtype=self.dtype).reshape(self.shape) | ||
t = core.LoDTensor() | ||
t.set(arr, self.place) | ||
if self.lod_level != 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. self.lod_level > 0 |
||
t.set_lod(self.lod) | ||
return t | ||
|
||
|
||
class DataFeeder(object): | ||
def __init__(self, feed_list, place): | ||
self.feed_dtypes = [] | ||
self.feed_names = [] | ||
self.feed_shapes = [] | ||
self.feed_lod_level = [] | ||
for each_var in feed_list: | ||
if not isinstance(each_var, Variable): | ||
raise TypeError("Feed list should contain a list of variable") | ||
self.feed_dtypes.append(each_var.dtype) | ||
self.feed_names.append(each_var.name) | ||
shape = each_var.shape | ||
batch_size_dim = -1 | ||
for i, s in enumerate(shape): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is not batch_size_dim always in the first dimension? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, batch_size_dim is not the first dimension if we use static RNN |
||
if s < 0: | ||
batch_size_dim = i | ||
break | ||
if batch_size_dim == -1: | ||
raise ValueError("Variable {0} must has a batch size dimension", | ||
each_var.name) | ||
self.feed_lod_level.append(each_var.lod_level) | ||
self.feed_shapes.append((batch_size_dim, shape)) | ||
|
||
self.place = place | ||
|
||
def feed(self, iterable): | ||
converter = [] | ||
for lod_level, shape, dtype in six.zip( | ||
self.feed_lod_level, self.feed_shapes, self.feed_dtypes): | ||
batch_size_dim, shape = shape | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line take batch_size_dim as the first dimension. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. shape is a tuple before. |
||
converter.append( | ||
DataToLoDTensorConverter( | ||
place=self.place, | ||
lod_level=lod_level, | ||
shape=shape, | ||
batch_size_dim=batch_size_dim, | ||
dtype=dtype)) | ||
|
||
for each_sample in iterable: | ||
for each_converter, each_slot in six.zip(converter, each_sample): | ||
each_converter.feed(each_slot) | ||
ret_dict = {} | ||
for each_name, each_converter in six.zip(self.feed_names, converter): | ||
ret_dict[each_name] = each_converter.done() | ||
return ret_dict |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems that
batch_size_dim
here is no use.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. batch_size_dim could be removed.