diff --git a/Makefile b/Makefile index 4fbf5227..a4a5794d 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ ifneq ($(ADD_LDFLAGS), NONE) endif # specify tensor path -BIN = bin/cxxnet bin/im2rec +BIN = bin/cxxnet bin/im2rec tools/bin2rec SLIB = wrapper/libcxxnetwrapper.so OBJ = layer_cpu.o updater_cpu.o nnet_cpu.o main.o nnet_ps_server.o OBJCXX11 = data.o @@ -103,9 +103,10 @@ data.o: src/io/data.cpp src/io/*.hpp main.o: src/cxxnet_main.cpp wrapper/libcxxnetwrapper.so: wrapper/cxxnet_wrapper.cpp $(OBJ) $(OBJCXX11) $(CUDEP) -bin/cxxnet: src/local_main.cpp $(OBJ) $(OBJCXX11) $(DMLC_CORE)/libdmlc.a $(CUDEP) +bin/cxxnet: src/local_main.cpp $(OBJ) $(OBJCXX11) $(DMLC_CORE)/libdmlc.a $(CUDEP) bin/cxxnet.ps: $(OBJ) $(OBJCXX11) $(CUDEP) $(DMLC_CORE)/libdmlc.a $(PS_LIB) bin/im2rec: tools/im2rec.cc $(DMLC_CORE)/libdmlc.a +tools/bin2rec: tools/bin2rec.cc $(DMLC_CORE)/libdmlc.a $(BIN) : $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) diff --git a/tools/bin2rec.cc b/tools/bin2rec.cc new file mode 100644 index 00000000..4eacaf32 --- /dev/null +++ b/tools/bin2rec.cc @@ -0,0 +1,71 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file im2rec.cc + * \brief convert images into image recordio format + * Image Record Format: zeropad[64bit] imid[64bit] img-binary-content + * The 64bit zero pad was reserved for future purposes + * + * Image List Format: unique-image-index label[s] path-to-image + * \sa dmlc/recordio.h + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../src/io/image_recordio.h" +#include "../src/utils/io.h" + + +int main(int argc, char **argv) { + using namespace cxxnet::utils; + using namespace dmlc; + if (argc < 4) { + printf("usage: bin2rec img_list bin_file rec_file [label_width=1]\n"); + exit(-1); + } + FILE *fplst = fopen(argv[1], "r"); + CHECK(fplst != NULL); + dmlc::Stream *fo = dmlc::Stream::Create(argv[3], "w"); + dmlc::RecordIOWriter writer(fo); + cxxnet::ImageRecordIO rec; + std::string blob, fname; + StdFile fi; + fi.Open(argv[2], "rb"); + int label_width = 1; + if (argc > 4) { + label_width = atoi(argv[4]); + } + BinaryPage pg; + size_t imcnt = 0; + while (pg.Load(fi)) { + for (int i = 0; i < pg.Size(); ++i) { + CHECK(fscanf(fplst, "%lu", &rec.header.image_id[0]) == 1); + CHECK(fscanf(fplst, "%f", &rec.header.label) == 1); + for (int k = 1; k < label_width; ++k) { + float tmp; + CHECK(fscanf(fplst, "%f", &tmp) == 1); + } + CHECK(fscanf(fplst, "%*[^\n]\n") == 0) << "ignore"; + rec.SaveHeader(&blob); + BinaryPage::Obj obj = pg[i]; + size_t bsize = blob.size(); + blob.resize(bsize + obj.sz); + memcpy(BeginPtr(blob) + bsize, obj.dptr, obj.sz); + writer.WriteRecord(BeginPtr(blob), blob.size()); + imcnt++; + } + } + LOG(INFO) << "Total: " << imcnt << " images processed"; + delete fo; + fclose(fplst); +} + + + + diff --git a/tools/im2rec.cc b/tools/im2rec.cc index beb514b5..1209f653 100644 --- a/tools/im2rec.cc +++ b/tools/im2rec.cc @@ -20,12 +20,20 @@ #include "../src/io/image_recordio.h" int main(int argc, char *argv[]) { - if (argc < 4) { - fprintf(stderr, "Usage: [label_width=1]\n"); + if (argc < 3) { + fprintf(stderr, "Usage: [resize=new_size] [label_width=1]\n"); return 0; } int label_width = 1; - int new_size = atoi(argv[4]); + int new_size = -1; + for (int i = 0; i < argc; ++i) { + char key[64]; + char val[64]; + if (sscanf(argv[i], "%[^=]=%s", key, val) == 2) { + if (!strcmp(key, "resize")) new_size = atoi(val); + if (!strcmp(key, "label_width")) label_width = atoi(val); + } + } if (new_size > 0) { LOG(INFO) << "New Image Size: " << new_size << "x" << new_size; } else { @@ -33,7 +41,7 @@ int main(int argc, char *argv[]) { } if (argc > 5) label_width = atoi(argv[5]); using namespace dmlc; - const static size_t kBufferSize = 16 << 20UL; + const static size_t kBufferSize = 1 << 20UL; std::string root = argv[2]; cxxnet::ImageRecordIO rec; size_t imcnt = 0; @@ -41,6 +49,7 @@ int main(int argc, char *argv[]) { dmlc::Stream *flist = dmlc::Stream::Create(argv[1], "r"); dmlc::istream is(flist); dmlc::Stream *fo = dmlc::Stream::Create(argv[3], "w"); + LOG(INFO) << "Output: " << argv[3]; dmlc::RecordIOWriter writer(fo); std::string fname, path, blob; std::vector decode_buf; @@ -65,13 +74,13 @@ int main(int argc, char *argv[]) { size_t imsize = 0; while (true) { decode_buf.resize(imsize + kBufferSize); - size_t nread = fi->Read(BeginPtr(decode_buf) + imsize, kBufferSize); + size_t nread = fi->Read(BeginPtr(decode_buf) + imsize, kBufferSize); imsize += nread; decode_buf.resize(imsize); if (nread != kBufferSize) break; } delete fi; - if (new_size > 0) { + if (new_size > 0) { cv::Mat img = cv::imdecode(decode_buf, CV_LOAD_IMAGE_COLOR); CHECK(img.data != NULL) << "OpenCV decode fail:" << path; cv::Mat res; @@ -100,6 +109,7 @@ int main(int argc, char *argv[]) { LOG(INFO) << imcnt << " images processed, " << GetTime() - tstart << " sec elapsed"; } } + LOG(INFO) << "Total: " << imcnt << " images processed, " << GetTime() - tstart << " sec elapsed"; delete fo; delete flist; return 0; diff --git a/tools/imgbin-partition-maker.py b/tools/imgbin-partition-maker.py index e03914dc..2141d725 100644 --- a/tools/imgbin-partition-maker.py +++ b/tools/imgbin-partition-maker.py @@ -14,7 +14,7 @@ parser.add_argument('--shuffle', default='0', help="Shuffle the list or not") parser.add_argument('--prefix', required=True, help="Prefix of output image lists and bins") parser.add_argument('--out', required=True, help="Output folder for image bins and lists") -parser.add_argument('--new_size', required=True, help="New size of image (-1 for do nothing)") +parser.add_argument('--resize', required=True, help="New size of image (-1 for do nothing)") parser.add_argument('--makefile', default="Gen.mk", help="name of generated Makefile") @@ -22,7 +22,7 @@ # im2bin path IM2BIN = args.im2rec -new_size = args.new_size +new_size = "resize=" + args.new_size fi = file(args.img_list) lst = [line for line in fi]