Skip to content

Commit

Permalink
add bin2rec, minor change im2rec
Browse files Browse the repository at this point in the history
  • Loading branch information
antinucleon committed May 3, 2015
1 parent 904a6d8 commit 95e06fa
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 10 deletions.
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ ifneq ($(ADD_LDFLAGS), NONE)
endif

# specify tensor path
BIN = bin/cxxnet bin/im2rec
BIN = bin/cxxnet bin/im2rec tools/bin2rec
SLIB = wrapper/libcxxnetwrapper.so
OBJ = layer_cpu.o updater_cpu.o nnet_cpu.o main.o nnet_ps_server.o
OBJCXX11 = data.o
Expand Down Expand Up @@ -103,9 +103,10 @@ data.o: src/io/data.cpp src/io/*.hpp
main.o: src/cxxnet_main.cpp

wrapper/libcxxnetwrapper.so: wrapper/cxxnet_wrapper.cpp $(OBJ) $(OBJCXX11) $(CUDEP)
bin/cxxnet: src/local_main.cpp $(OBJ) $(OBJCXX11) $(DMLC_CORE)/libdmlc.a $(CUDEP)
bin/cxxnet: src/local_main.cpp $(OBJ) $(OBJCXX11) $(DMLC_CORE)/libdmlc.a $(CUDEP)
bin/cxxnet.ps: $(OBJ) $(OBJCXX11) $(CUDEP) $(DMLC_CORE)/libdmlc.a $(PS_LIB)
bin/im2rec: tools/im2rec.cc $(DMLC_CORE)/libdmlc.a
tools/bin2rec: tools/bin2rec.cc $(DMLC_CORE)/libdmlc.a

$(BIN) :
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS)
Expand Down
71 changes: 71 additions & 0 deletions tools/bin2rec.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*!
* Copyright (c) 2015 by Contributors
* \file im2rec.cc
* \brief convert images into image recordio format
* Image Record Format: zeropad[64bit] imid[64bit] img-binary-content
* The 64bit zero pad was reserved for future purposes
*
* Image List Format: unique-image-index label[s] path-to-image
* \sa dmlc/recordio.h
*/
#include <cctype>
#include <string>
#include <cstring>
#include <vector>
#include <dmlc/base.h>
#include <dmlc/io.h>
#include <dmlc/timer.h>
#include <dmlc/logging.h>
#include <dmlc/recordio.h>
#include <opencv2/opencv.hpp>
#include "../src/io/image_recordio.h"
#include "../src/utils/io.h"


int main(int argc, char **argv) {
using namespace cxxnet::utils;
using namespace dmlc;
if (argc < 4) {
printf("usage: bin2rec img_list bin_file rec_file [label_width=1]\n");
exit(-1);
}
FILE *fplst = fopen(argv[1], "r");
CHECK(fplst != NULL);
dmlc::Stream *fo = dmlc::Stream::Create(argv[3], "w");
dmlc::RecordIOWriter writer(fo);
cxxnet::ImageRecordIO rec;
std::string blob, fname;
StdFile fi;
fi.Open(argv[2], "rb");
int label_width = 1;
if (argc > 4) {
label_width = atoi(argv[4]);
}
BinaryPage pg;
size_t imcnt = 0;
while (pg.Load(fi)) {
for (int i = 0; i < pg.Size(); ++i) {
CHECK(fscanf(fplst, "%lu", &rec.header.image_id[0]) == 1);
CHECK(fscanf(fplst, "%f", &rec.header.label) == 1);
for (int k = 1; k < label_width; ++k) {
float tmp;
CHECK(fscanf(fplst, "%f", &tmp) == 1);
}
CHECK(fscanf(fplst, "%*[^\n]\n") == 0) << "ignore";
rec.SaveHeader(&blob);
BinaryPage::Obj obj = pg[i];
size_t bsize = blob.size();
blob.resize(bsize + obj.sz);
memcpy(BeginPtr(blob) + bsize, obj.dptr, obj.sz);
writer.WriteRecord(BeginPtr(blob), blob.size());
imcnt++;
}
}
LOG(INFO) << "Total: " << imcnt << " images processed";
delete fo;
fclose(fplst);
}




22 changes: 16 additions & 6 deletions tools/im2rec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,36 @@
#include "../src/io/image_recordio.h"

int main(int argc, char *argv[]) {
if (argc < 4) {
fprintf(stderr, "Usage: <image.lst> <image_root_dir> <output_file> <new_img_size/-1(do nothing)]> [label_width=1]\n");
if (argc < 3) {
fprintf(stderr, "Usage: <image.lst> <image_root_dir> <output_file> [resize=new_size] [label_width=1]\n");
return 0;
}
int label_width = 1;
int new_size = atoi(argv[4]);
int new_size = -1;
for (int i = 0; i < argc; ++i) {
char key[64];
char val[64];
if (sscanf(argv[i], "%[^=]=%s", key, val) == 2) {
if (!strcmp(key, "resize")) new_size = atoi(val);
if (!strcmp(key, "label_width")) label_width = atoi(val);
}
}
if (new_size > 0) {
LOG(INFO) << "New Image Size: " << new_size << "x" << new_size;
} else {
LOG(INFO) << "Keep origin image size";
}
if (argc > 5) label_width = atoi(argv[5]);
using namespace dmlc;
const static size_t kBufferSize = 16 << 20UL;
const static size_t kBufferSize = 1 << 20UL;
std::string root = argv[2];
cxxnet::ImageRecordIO rec;
size_t imcnt = 0;
double tstart = dmlc::GetTime();
dmlc::Stream *flist = dmlc::Stream::Create(argv[1], "r");
dmlc::istream is(flist);
dmlc::Stream *fo = dmlc::Stream::Create(argv[3], "w");
LOG(INFO) << "Output: " << argv[3];
dmlc::RecordIOWriter writer(fo);
std::string fname, path, blob;
std::vector<unsigned char> decode_buf;
Expand All @@ -65,13 +74,13 @@ int main(int argc, char *argv[]) {
size_t imsize = 0;
while (true) {
decode_buf.resize(imsize + kBufferSize);
size_t nread = fi->Read(BeginPtr(decode_buf) + imsize, kBufferSize);
size_t nread = fi->Read(BeginPtr(decode_buf) + imsize, kBufferSize);
imsize += nread;
decode_buf.resize(imsize);
if (nread != kBufferSize) break;
}
delete fi;
if (new_size > 0) {
if (new_size > 0) {
cv::Mat img = cv::imdecode(decode_buf, CV_LOAD_IMAGE_COLOR);
CHECK(img.data != NULL) << "OpenCV decode fail:" << path;
cv::Mat res;
Expand Down Expand Up @@ -100,6 +109,7 @@ int main(int argc, char *argv[]) {
LOG(INFO) << imcnt << " images processed, " << GetTime() - tstart << " sec elapsed";
}
}
LOG(INFO) << "Total: " << imcnt << " images processed, " << GetTime() - tstart << " sec elapsed";
delete fo;
delete flist;
return 0;
Expand Down
4 changes: 2 additions & 2 deletions tools/imgbin-partition-maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
parser.add_argument('--shuffle', default='0', help="Shuffle the list or not")
parser.add_argument('--prefix', required=True, help="Prefix of output image lists and bins")
parser.add_argument('--out', required=True, help="Output folder for image bins and lists")
parser.add_argument('--new_size', required=True, help="New size of image (-1 for do nothing)")
parser.add_argument('--resize', required=True, help="New size of image (-1 for do nothing)")
parser.add_argument('--makefile', default="Gen.mk", help="name of generated Makefile")


args = parser.parse_args()
# im2bin path
IM2BIN = args.im2rec

new_size = args.new_size
new_size = "resize=" + args.new_size

fi = file(args.img_list)
lst = [line for line in fi]
Expand Down

0 comments on commit 95e06fa

Please sign in to comment.