ImageDisplayer on OpenCV

TorontoDeepLearning · Dec 20, 2014 · a1b8b30 · a1b8b30
1 parent 177a980
commit a1b8b30
Show file tree

Hide file tree

Showing 6 changed files with 100 additions and 82 deletions.
diff --git a/INSTALL b/INSTALL
@@ -21,15 +21,12 @@ Dependencies
 - OpenCV
   Download the source code https://github.com/Itseez/opencv/archive/3.0.0-alpha.zip
   To quick install -
-  $ cmake-gui, set CMAKE_BUILD_TYPE to Release
+  $ sudo apt-get install libgtk2.0-dev pkg-config # for opencv imshow
+  $ cmake-gui, set CMAKE_BUILD_TYPE to Release, Configure & Generate
   $ cd build directory
   $ make -j7
   $ sudo make install
 
-- libjpeg
-  This is often present on standard operating systems since it is used by a lot of programs.
-  It can be downloaded from http://libjpeg.sourceforge.net/
-
 - This code uses C++11. Some features require gcc >= 4.6.
 
 ---------------------------

diff --git a/Makefile b/Makefile
@@ -17,8 +17,8 @@ CUDA_LIB=$(CUDA_ROOT)/lib64
 CUDAMAT_DIR=$(CURDIR)/cudamat
 CXX = g++
 LIBFLAGS = -L$(LIB) -L$(CUDA_LIB) -L$(CUDAMAT_DIR)
-CPPFLAGS = -I$(INC) -I$(CUDA_INC) -I$(SRC) -Ideps
-LINKFLAGS = -lopencv_core -lopencv_imgcodecs -lopencv_imgproc -lopencv_videoio -lhdf5 -ljpeg -lX11 -lpthread -lprotobuf -lcublas -ldl -lgomp -lcudamat -lcudart -Wl,-rpath=$(CUDAMAT_DIR) -Wl,-rpath=$(LIB) -Wl,-rpath=$(CUDA_LIB)
+CPPFLAGS = -I$(INC) -I$(CUDA_INC) -I$(SRC)
+LINKFLAGS = -lopencv_core -lopencv_imgcodecs -lopencv_imgproc -lopencv_videoio -lopencv_highgui -lhdf5 -lpthread -lprotobuf -lcublas -ldl -lgomp -lcudamat -lcudart -Wl,-rpath=$(CUDAMAT_DIR) -Wl,-rpath=$(LIB) -Wl,-rpath=$(CUDA_LIB)
 CXXFLAGS = -O2 -std=c++0x -mtune=native -Wall -Wno-unused-result -Wno-sign-compare -fopenmp
 
 ifeq ($(USE_MPI), yes)

diff --git a/src/convnet.cc b/src/convnet.cc
@@ -211,6 +211,9 @@ void ConvNet::BuildNet() {
       if (image_size_y <= 0) image_size_y = model_.patch_size();
       if (image_size_x <= 0) image_size_x = model_.patch_size();
       if (image_size_t <= 0) image_size_t = 1;
+      image_size_y_ = image_size_y;
+      image_size_x_ = image_size_x;
+      image_size_t_ = image_size_t;
     } else {
       image_size_y = l->incoming_edge_[0]->GetNumModulesY();
       image_size_x = l->incoming_edge_[0]->GetNumModulesX();
@@ -491,7 +494,7 @@ void ConvNet::SetupDataset(const string& train_data_config_file,
   train_dataset_ = new DataHandler(model_.train_dataset());
   if (localizer_) {
     train_dataset_->SetFOV(fov_size_, fov_stride_, fov_pad1_, fov_pad2_,
-                           model_.patch_size(), num_fov_x_, num_fov_y_);
+                           image_size_x_, num_fov_x_, num_fov_y_); // TODO: image_size_y_?
   }
   SetBatchsize(train_dataset_->GetBatchSize());
   int dataset_size = train_dataset_->GetDataSetSize();
@@ -501,7 +504,7 @@ void ConvNet::SetupDataset(const string& train_data_config_file,
     val_dataset_ = new DataHandler(model_.valid_dataset());
     if (localizer_) {
       val_dataset_->SetFOV(fov_size_, fov_stride_, fov_pad1_, fov_pad2_,
-                           model_.patch_size(), num_fov_x_, num_fov_y_);
+                           image_size_x_, num_fov_x_, num_fov_y_); // TODO: image_size_y_?
     }
     dataset_size = val_dataset_->GetDataSetSize();
     val_dataset_->AllocateMemory();
@@ -812,11 +815,9 @@ void ConvNet::TimestampModel() {
 }
 
 void ConvNet::SetupLocalizationDisplay() {
-  int image_size = model_.patch_size();
-  localization_display_ = new ImageDisplayer(image_size, image_size, 3, false,
-                                          "localization");
+  localization_display_ = new ImageDisplayer(image_size_x_, image_size_y_, 3, false, "localization");
   localization_display_->SetFOV(fov_size_, fov_stride_, fov_pad1_, fov_pad2_,
-                                image_size, num_fov_x_, num_fov_y_);
+                                image_size_x_, num_fov_x_, num_fov_y_); // TODO: image_size_y_?
 }
 
 void ConvNet::DisplayLocalization() {

diff --git a/src/convnet.h b/src/convnet.h
@@ -168,8 +168,8 @@ class ConvNet {
   int max_iter_, batch_size_, current_iter_, lr_reduce_counter_;
   DataHandler *train_dataset_, *val_dataset_;
   string checkpoint_dir_, output_file_, model_name_;
-  ImageDisplayer displayer_;
   string model_filename_, timestamp_, log_file_, val_log_file_;
+  int image_size_x_, image_size_y_, image_size_t_;
 
   // Field of view.
   int fov_size_, fov_stride_, fov_pad1_, fov_pad2_;

diff --git a/src/util.cc b/src/util.cc
@@ -297,51 +297,69 @@ void AddVectors(vector<float>& a, vector<float>& b) {
 // ImageDisplayer
 //
 
-void DrawRectange(CImg<float>& img, int xmin, int ymin, int xmax, int ymax, const float* color, int thickness) {
-  for (int i = 0; i < thickness; i++) {
-    img.draw_rectangle(xmin-i, ymin-i, xmax+i, ymax+i, color, 1.0, ~0U);
-  }
-}
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui/highgui.hpp>
 
-ImageDisplayer::ImageDisplayer(int width, int height, int num_colors, bool show_separate, const string& title) :
-  width_(width), height_(height), num_colors_(num_colors),
-  show_separate_(show_separate), title_(title) {
-  disp_.set_title(title_.c_str());
+using namespace cv;
+
+inline void resizeOCV(Mat &img, unsigned int width, unsigned int height) {
+  Mat out;
+  resize(img, out, Size(width, height), 0, 0, INTER_LINEAR);
+  img = out;
 }
 
-ImageDisplayer::ImageDisplayer() :
-  width_(0), height_(0), num_colors_(3), show_separate_(false), title_("") {
+Mat image, image1, image2;
+
+ImageDisplayer::ImageDisplayer(int width, int height, int num_colors, bool show_separate, const string& title) :
+  width_(width),
+  height_(height),
+  num_colors_(num_colors),
+  show_separate_(show_separate),
+  title_(title) {
 }
 
 void ImageDisplayer::DisplayImage(float* data, int num_images, int image_id) {
-  CImg<float> img;
-  CreateImage(data, num_images, image_id, img);
-  disp_.set_title(title_.c_str());
-  img.display(disp_);
+  CreateImage(data, num_images, image_id, image);
+  namedWindow(title_.c_str(), WINDOW_AUTOSIZE);
+  imshow(title_.c_str(), image);
+  waitKey(1);
 }
 
-void ImageDisplayer::CreateImage(const float* data, int num_images, int image_id, CImg<float>& img) {
+void ImageDisplayer::CreateImage(const float* data, int num_images, int image_id, Mat &image) {
   int num_colors_width = (int)sqrt(num_colors_);
   int num_colors_height = (num_colors_ + num_colors_width - 1) / num_colors_width;
-  int display_width = show_separate_ ? width_ * num_colors_width: width_;
-  int display_height = show_separate_ ? height_ * num_colors_height: height_;
-  int display_colors = show_separate_ ? 1 : num_colors_;
-
-  img.assign(display_width, display_height, 1, display_colors);
-  img.fill(0);
-  float val;
-  for (int k = 0; k < num_colors_; k++) {
-    for (int i = 0; i < height_; i++) {
-      for (int j = 0; j < width_; j++) {
-        val = data[image_id + num_images * (j + width_ * (i + k * height_))];
-        if (show_separate_) {
-          img(j + (k % num_colors_width) * width_, i + (k / num_colors_width) * height_, 0, 0) = val;
-        } else {
-          img(j, i, 0, k) = val;
-        }
+  int display_width = show_separate_ ? width_ * num_colors_width : width_;
+  int display_height = show_separate_ ? height_ * num_colors_height : height_;
+  int display_colors_type = show_separate_ ? CV_32FC1 : CV_32FC3;
+
+  image.create(display_width, display_height, display_colors_type);
+  for (int k=0; k<num_colors_; k++)
+  {
+    int off_color = 0;
+    if (3==num_colors_)
+    {
+      off_color += 2-k;
+    }
+    for (int i=0; i<height_; ++i)
+    {
+      int off_width = 0;
+      int off_height = 0;
+      if (show_separate_)
+      {
+        off_width  = (k % num_colors_width) * width_;
+        off_height = (k / num_colors_width) * height_;
+      }
+      float *im = image.ptr<float>(i + off_height);
+
+      for (int j=0; j<width_; ++j)
+      {
+        float val = data[image_id + num_images * (j + width_ * (i + k * height_))];
+        im[num_colors_*(j + off_width) + off_color] = val;
       }
     }
   }
+  normalize(image, image, 0, 1, NORM_MINMAX);
 }
 
 void ImageDisplayer::YUVToRGB(const float* yuv, float* rgb, int spacing) {
@@ -368,10 +386,8 @@ void ImageDisplayer::RGBToYUV(const float* rgb, float* yuv, int spacing) {
 
 void ImageDisplayer::DisplayWeights(float* data, int size, int num_filters, int display_size, bool yuv) {
   int num_filters_w = int(sqrt(num_filters));
-  int num_filters_h = num_filters / num_filters_w +  (((num_filters % num_filters_w) > 0) ? 1 : 0);
+  int num_filters_h = num_filters / num_filters_w + (((num_filters % num_filters_w) > 0) ? 1 : 0);
   int data_pos, row, col;
-  CImg<float> img(size * num_filters_w, size * num_filters_h, 1, 3);
-  img.fill(0);
   float norm = 0;
   if (yuv) YUVToRGB(data, data, num_filters * size * size);
   for (int f = 0; f < num_filters; f++) {
@@ -384,29 +400,38 @@ void ImageDisplayer::DisplayWeights(float* data, int size, int num_filters, int
       data[i * num_filters + f] /= norm;
     }
   }
+
+  image.create(size * num_filters_w, size * num_filters_h, CV_32FC3);
   for (int f = 0; f < num_filters; f++) {
     for (int k = 0; k < 3; k++) {
       for (int h = 0; h < size; h++) {
         for (int w = 0; w < size; w++) {
           data_pos = f + num_filters * (w + size * (h + size * k));
           col = w + size * (f % num_filters_w);
           row = h + size * (f / num_filters_w);
-          img(col, row, 0, k) = data[data_pos];
+
+          float *im = image.ptr<float>(row);
+          im[3*col+(2-k)] = data[data_pos];
         }
       }
     }
   }
-  const unsigned char color[] = {0, 0, 0};
-  img.resize(display_size, display_size);
+  normalize(image, image, 0, 1, NORM_MINMAX);
+
+  const Scalar color(0, 0, 0);
+  resizeOCV(image, display_size, display_size);
   for (int i = 0; i < num_filters_w; i++) {
-    int pos = (i * img.width()) / num_filters_w;
-    img.draw_line(pos, 0, pos, img.height(), color);
+    int pos = (i * image.cols/3) / num_filters_w;
+    line(image, Point(pos, 0), Point(pos, image.rows), color);
   }
   for (int i = 0; i < num_filters_h; i++) {
-    int pos = (i * img.height()) / num_filters_h;
-    img.draw_line(0, pos, img.width(), pos, color);
+    int pos = (i * image.rows) / num_filters_h;
+    line(image, Point(0, pos), Point(image.cols/3, pos), color);
   }
-  img.display(disp_);
+
+  namedWindow(title_.c_str(), WINDOW_AUTOSIZE);
+  imshow(title_.c_str(), image);
+  waitKey(1);
 }
 
 void ImageDisplayer::SetFOV(int size, int stride, int pad1, int pad2,
@@ -423,16 +448,15 @@ void ImageDisplayer::DisplayLocalization(float* data, float* preds, float* gt, i
   int image_id = 0;
 
   int num_fovs = num_fov_y_ * num_fov_x_;
-
-  CImg<float> img;
-  CreateImage(data, num_images, image_id, img);
+
+  CreateImage(data, num_images, image_id, image1);
   const int image_size = 250;
-  img.resize(image_size, image_size);
+  resizeOCV(image1, image_size, image_size);
 
-  CImg<float> img2 = CImg<float>(img);
+  image2 = image1.clone();
 
-  const float green[] = {0, 1, 0};
-  const float blue[] = {0, 0, 1};
+  const Scalar green(0, 255, 0);
+  const Scalar blue(0, 0, 255);
 
   float fov_x, fov_y;
   gt += image_id;
@@ -460,11 +484,14 @@ void ImageDisplayer::DisplayLocalization(float* data, float* preds, float* gt, i
     int xmax_preds2 = (int)((xmax_preds + fov_x) * image_size);
     int ymax_preds2 = (int)((ymax_preds + fov_y) * image_size);
 
-    DrawRectange(img, xmin_gt2, ymin_gt2, xmax_gt2, ymax_gt2, green, 3);
-    DrawRectange(img2, xmin_preds2, ymin_preds2, xmax_preds2, ymax_preds2, blue, 3);
+    rectangle(image1, Point(xmin_gt2, ymin_gt2), Point(xmax_gt2, ymax_gt2), green, 3);
+    rectangle(image2, Point(xmin_preds2, ymin_preds2), Point(xmax_preds2, ymax_preds2), blue, 3);
   }
 
-  CImgList<float> img_list(img, img2);
-  img_list.display(disp_);
-
+  namedWindow("Localization1", WINDOW_AUTOSIZE);
+  imshow("Localization1", image1);
+  namedWindow("Localization2", WINDOW_AUTOSIZE);
+  imshow("Localization2", image2);
+  waitKey(1);
 }
+
diff --git a/src/util.h b/src/util.h
@@ -12,9 +12,6 @@
 #include "mpi.h"
 #endif
 #include <string>
-#define cimg_use_jpeg
-#define cimg_use_lapack
-#include "CImg/CImg.h"
 #include <stdio.h>
 #include <google/protobuf/text_format.h>
 #include "convnet_config.pb.h"
@@ -32,7 +29,6 @@
 #define MPITAG_WEIGHTGRAD 11
 #define MPITAG_TRAINERROR 12
 
-using namespace cimg_library;
 using namespace std;
 
 template<class T> void ReadPbtxt(const string& pbtxt_file, T& model);
@@ -58,7 +54,6 @@ string GetTimeStamp();
 void TimestampModelFile(const string& src_file, const string& dest_file, const string& timestamp);
 
 bool ReadLines(const string& filename, vector<string>& lines);
-void DrawRectange(CImg<float>& img, int xmin, int ymin, int xmax, int ymax, const float* color, int thickness);
 
 // Outputs a string that describes the err_code.
 string GetStringError(int err_code);
@@ -70,25 +65,24 @@ void AddVectors(vector<float>& a, vector<float>& b);
 // ImageDisplayer
 //
 
+#include <opencv2/core/core.hpp>
+
 class ImageDisplayer {
- public:
-  ImageDisplayer();
+public:
   ImageDisplayer(int width, int height, int num_colors, bool show_separate, const string& name);
 
   void SetTitle(const string& title) {title_ = title;}
   void DisplayImage(float* data, int spacing, int image_id);
-  void CreateImage(const float* data, int num_images, int image_id, CImg<float>& img);
   void DisplayWeights(float* data, int size, int num_filters, int display_size, bool yuv = false);
   void DisplayLocalization(float* data, float* preds, float* gt, int num_images);
   void SetFOV(int size, int stride, int pad1, int pad2, int patch_size, int num_fov_x, int num_fov_y);
-
+
+private:
+  void CreateImage(const float* data, int num_images, int image_id, cv::Mat &image);
 
   static void YUVToRGB(const float* yuv, float* rgb, int spacing);
   static void RGBToYUV(const float* rgb, float* yuv, int spacing);
 
- private:
-
-  CImgDisplay disp_;
   int width_, height_, num_colors_;
   bool show_separate_;
   string title_;
@@ -97,5 +91,4 @@ class ImageDisplayer {
   int num_fov_x_, num_fov_y_;
 };
 
-
 #endif