diff --git a/Colorization/README.md b/Colorization/README.md index d779f7b23..1cef3e85a 100644 --- a/Colorization/README.md +++ b/Colorization/README.md @@ -15,8 +15,9 @@ a video file: C++: Compilation examples: -g++ -ggdb `pkg-config --cflags --libs /usr/local/Cellar/opencv3/3.4.2/lib/pkgconfig/opencv.pc` colorizeImage.cpp -o colorizeImage.out -g++ -ggdb `pkg-config --cflags --libs /usr/local/Cellar/opencv3/3.4.2/lib/pkgconfig/opencv.pc` colorizeVideo.cpp -o colorizeVideo.out +g++ `pkg-config --cflags --libs opencv4` colorizeImage.cpp -o colorizeImage.out -lopencv_core -lopencv_dnn -lopencv_highgui -lopencv_imgcodecs -lopencv_imgproc -std=c++11 +g++ `pkg-config --cflags --libs opencv4` colorizeVideo.cpp -o colorizeVideo.out -lopencv_core -lopencv_dnn -lopencv_highgui -lopencv_imgcodecs -lopencv_imgproc -lopencv_videoio -std=c++11 + Commandline usage to colorize a single image: diff --git a/Colorization/colorizeImage.cpp b/Colorization/colorizeImage.cpp index bf92e74f8..499c076e7 100644 --- a/Colorization/colorizeImage.cpp +++ b/Colorization/colorizeImage.cpp @@ -43,35 +43,53 @@ static float hull_pts[] = { int main(int argc, char **argv) { - + string imageFileName; - // Take arguments from commmand line - if (argc < 2) + string device; + // Take arguments from command line + if (argc == 3) + { + device = argv[2]; + } + else if (argc == 2) + device = "cpu"; + else { cout << "Please input the greyscale image filename." << endl; cout << "Usage example: ./colorizeImage.out greyscaleImage.png" << endl; + cout << "If you want to use GPU device instead of CPU, add one more argument." << endl; + cout << "Usage example:./colorizeImage.out greyscaleImage.png gpu" << endl; return 1; } - + imageFileName = argv[1]; Mat img = imread(imageFileName); if (img.empty()) { cout << "Can't read image from file: " << imageFileName << endl; - return 2; + return 1; } - + cout << "Input image file: " << imageFileName << endl; + string protoFile = "./models/colorization_deploy_v2.prototxt"; string weightsFile = "./models/colorization_release_v2.caffemodel"; - //string weightsFile = "./models/colorization_release_v2_norebal.caffemodel"; - double t = (double) cv::getTickCount(); - - // fixed input size for the pretrained network + // fixed input size for the pre-trained network const int W_in = 224; const int H_in = 224; Net net = dnn::readNetFromCaffe(protoFile, weightsFile); - + if (device != "gpu") + { + cout << "Using CPU device" << endl; + net.setPreferableBackend(DNN_TARGET_CPU); + } + else + { + cout << "Using GPU device" << endl; + net.setPreferableBackend(DNN_BACKEND_CUDA); + net.setPreferableTarget(DNN_TARGET_CUDA); + } + // setup additional layers: int sz[] = {2, 313, 1, 1}; const Mat pts_in_hull(4, sz, CV_32F, hull_pts); @@ -79,7 +97,9 @@ int main(int argc, char **argv) class8_ab->blobs.push_back(pts_in_hull); Ptr conv8_313_rh = net.getLayer("conv8_313_rh"); conv8_313_rh->blobs.push_back(Mat(1, 313, CV_32F, Scalar(2.606))); - + + double t = (double) cv::getTickCount(); + // extract L channel and subtract mean Mat lab, L, input; img.convertTo(img, CV_32F, 1.0/255); @@ -87,19 +107,19 @@ int main(int argc, char **argv) extractChannel(lab, L, 0); resize(L, input, Size(W_in, H_in)); input -= 50; - + // run the L channel through the network Mat inputBlob = blobFromImage(input); net.setInput(inputBlob); Mat result = net.forward(); - + // retrieve the calculated a,b channels from the network output - Size siz(result.size[2], result.size[3]); - Mat a = Mat(siz, CV_32F, result.ptr(0,0)); - Mat b = Mat(siz, CV_32F, result.ptr(0,1)); + Size out_size(result.size[2], result.size[3]); + Mat a = Mat(out_size, CV_32F, result.ptr(0, 0)); + Mat b = Mat(out_size, CV_32F, result.ptr(0, 1)); resize(a, a, img.size()); resize(b, b, img.size()); - + // merge, and convert back to BGR Mat color, chn[] = {L, a, b}; merge(chn, 3, lab); @@ -107,12 +127,12 @@ int main(int argc, char **argv) t = ((double)cv::getTickCount() - t)/cv::getTickFrequency(); cout << "Time taken : " << t << " secs" << endl; - + string str = imageFileName; - str.replace(str.end()-4, str.end(), ""); - str = str+"_colorized.png"; - - color = color*255; + str.replace(str.end() - 4, str.end(), ""); + str = str + "_colorized.png"; + + color = color.mul(255); color.convertTo(color, CV_8U); imwrite(str, color); diff --git a/Colorization/colorizeImage.py b/Colorization/colorizeImage.py index f57ac47ff..d0d44d3e0 100644 --- a/Colorization/colorizeImage.py +++ b/Colorization/colorizeImage.py @@ -1,33 +1,36 @@ # This code is written by Sunita Nayak at BigVision LLC. It is based on the OpenCV project. # It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html -#### Usage example: python3 colorize.py --input greyscaleImage.png +# Usage example: python3 colorizeImage.py --input greyscaleImage.png import numpy as np import cv2 as cv import argparse import os.path +import time parser = argparse.ArgumentParser(description='Colorize GreyScale Image') parser.add_argument('--input', help='Path to image.') +parser.add_argument("--device", default="cpu", help="Device to inference on") args = parser.parse_args() -if args.input==None: +if args.input is None: print('Please give the input greyscale image name.') print('Usage example: python3 colorizeImage.py --input greyscaleImage.png') exit() -if os.path.isfile(args.input)==0: +if not os.path.isfile(args.input): print('Input file does not exist') exit() +print("Input image file: ", args.input) + # Read the input image frame = cv.imread(args.input) # Specify the paths for the 2 model files protoFile = "./models/colorization_deploy_v2.prototxt" weightsFile = "./models/colorization_release_v2.caffemodel" -#weightsFile = "./models/colorization_release_v2_norebal.caffemodel" # Load the cluster centers pts_in_hull = np.load('./pts_in_hull.npy') @@ -35,33 +38,46 @@ # Read the network into Memory net = cv.dnn.readNetFromCaffe(protoFile, weightsFile) +if args.device == "cpu": + net.setPreferableBackend(cv.dnn.DNN_TARGET_CPU) + print("Using CPU device") +elif args.device == "gpu": + net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA) + net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA) + print("Using GPU device") + # populate cluster centers as 1x1 convolution kernel pts_in_hull = pts_in_hull.transpose().reshape(2, 313, 1, 1) net.getLayer(net.getLayerId('class8_ab')).blobs = [pts_in_hull.astype(np.float32)] net.getLayer(net.getLayerId('conv8_313_rh')).blobs = [np.full([1, 313], 2.606, np.float32)] -#from opencv sample +# from opencv sample W_in = 224 H_in = 224 -img_rgb = (frame[:,:,[2, 1, 0]] * 1.0 / 255).astype(np.float32) +start = time.time() + +img_rgb = (frame[:, :, [2, 1, 0]] * 1.0 / 255).astype(np.float32) img_lab = cv.cvtColor(img_rgb, cv.COLOR_RGB2Lab) -img_l = img_lab[:,:,0] # pull out L channel +img_l = img_lab[:, :, 0] # pull out L channel # resize lightness channel to network input size -img_l_rs = cv.resize(img_l, (W_in, H_in)) # -img_l_rs -= 50 # subtract 50 for mean-centering +img_l_rs = cv.resize(img_l, (W_in, H_in)) +img_l_rs -= 50 # subtract 50 for mean-centering net.setInput(cv.dnn.blobFromImage(img_l_rs)) -ab_dec = net.forward()[0,:,:,:].transpose((1,2,0)) # this is our result +ab_dec = net.forward()[0, :, :, :].transpose((1, 2, 0)) # this is our result -(H_orig,W_orig) = img_rgb.shape[:2] # original image size +(H_orig, W_orig) = img_rgb.shape[:2] # original image size ab_dec_us = cv.resize(ab_dec, (W_orig, H_orig)) -img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original image L +img_lab_out = np.concatenate((img_l[:, :, np.newaxis],ab_dec_us), axis=2) # concatenate with original image L img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1) -outputFile = args.input[:-4]+'_colorized.png' -cv.imwrite(outputFile, (img_bgr_out*255).astype(np.uint8)) -print('Colorized image saved as '+outputFile) +end = time.time() +print("Time taken : {:0.5f} secs".format(end - start)) + +outputFile = args.input[:-4] + '_colorized.png' +cv.imwrite(outputFile, (img_bgr_out * 255).astype(np.uint8)) +print('Colorized image saved as ' + outputFile) print('Done !!!') diff --git a/Colorization/colorizeVideo.cpp b/Colorization/colorizeVideo.cpp index 950f44bc2..2b64c5d7f 100644 --- a/Colorization/colorizeVideo.cpp +++ b/Colorization/colorizeVideo.cpp @@ -44,41 +44,63 @@ static float hull_pts[] = { int main(int argc, char **argv) { - + string videoFileName; - // Take arguments from commmand line - if (argc < 2) + string device; + + // Take arguments from command line + if (argc == 3) + { + device = argv[2]; + } + else if (argc == 2) + device = "cpu"; + else { cout << "Please input the greyscale video filename." << endl; cout << "Usage example: ./colorizeVideo.out greyscaleVideo.mp4" << endl; + cout << "If you want to use GPU device instead of CPU, add one more argument." << endl; + cout << "Usage example: ./colorizeVideo.out greyscaleVideo.mp4 gpu" << endl; return 1; } videoFileName = argv[1]; - + cv::VideoCapture cap(videoFileName); if (!cap.isOpened()) { cerr << "Unable to open video" << endl; return 1; } - + + cout << "Input video file: " << videoFileName << endl; + string protoFile = "./models/colorization_deploy_v2.prototxt"; string weightsFile = "./models/colorization_release_v2.caffemodel"; - //string weightsFile = "./models/colorization_release_v2_norebal.caffemodel"; Mat frame, frameCopy; int frameWidth = cap.get(CAP_PROP_FRAME_WIDTH); int frameHeight = cap.get(CAP_PROP_FRAME_HEIGHT); string str = videoFileName; - str.replace(str.end()-4, str.end(), ""); - string outVideoFileName = str+"_colorized.avi"; + str.replace(str.end() - 4, str.end(), ""); + string outVideoFileName = str + "_colorized.avi"; VideoWriter video(outVideoFileName, VideoWriter::fourcc('M','J','P','G'), 60, Size(frameWidth,frameHeight)); - // fixed input size for the pretrained network + // fixed input size for the pre-trained network const int W_in = 224; const int H_in = 224; Net net = dnn::readNetFromCaffe(protoFile, weightsFile); + if (device != "gpu") + { + cout << "Using CPU device" << endl; + net.setPreferableBackend(DNN_TARGET_CPU); + } + else + { + cout << "Using GPU device" << endl; + net.setPreferableBackend(DNN_BACKEND_CUDA); + net.setPreferableTarget(DNN_TARGET_CUDA); + } // setup additional layers: int sz[] = {2, 313, 1, 1}; @@ -88,6 +110,8 @@ int main(int argc, char **argv) Ptr conv8_313_rh = net.getLayer("conv8_313_rh"); conv8_313_rh->blobs.push_back(Mat(1, 313, CV_32F, Scalar(2.606))); + vector timer; + for(;;) { @@ -96,6 +120,8 @@ int main(int argc, char **argv) frameCopy = frame.clone(); + double t = (double) cv::getTickCount(); + // extract L channel and subtract mean Mat lab, L, input; frame.convertTo(frame, CV_32F, 1.0/255); @@ -110,10 +136,10 @@ int main(int argc, char **argv) Mat result = net.forward(); // retrieve the calculated a,b channels from the network output - Size siz(result.size[2], result.size[3]); - Mat a = Mat(siz, CV_32F, result.ptr(0,0)); - Mat b = Mat(siz, CV_32F, result.ptr(0,1)); - + Size out_size(result.size[2], result.size[3]); + Mat a = Mat(out_size, CV_32F, result.ptr(0, 0)); + Mat b = Mat(out_size, CV_32F, result.ptr(0, 1)); + resize(a, a, frame.size()); resize(b, b, frame.size()); @@ -122,11 +148,16 @@ int main(int argc, char **argv) merge(chn, 3, lab); cvtColor(lab, coloredFrame, COLOR_Lab2BGR); - coloredFrame = coloredFrame*255; + t = ((double)cv::getTickCount() - t)/cv::getTickFrequency(); + timer.push_back(t); + + coloredFrame = coloredFrame.mul(255); coloredFrame.convertTo(coloredFrame, CV_8U); video.write(coloredFrame); } + + cout << "Time taken : " << accumulate(timer.begin(), timer.end(), 0.0) << " secs" << endl; cout << "Colorized video saved as " << outVideoFileName << endl << "Done !!!" << endl; cap.release(); video.release(); diff --git a/Colorization/colorizeVideo.py b/Colorization/colorizeVideo.py index db87b966c..170076665 100644 --- a/Colorization/colorizeVideo.py +++ b/Colorization/colorizeVideo.py @@ -1,37 +1,40 @@ # This code is written by Sunita Nayak at BigVision LLC. It is based on the OpenCV project. # It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html -#### Usage example: python3 colorizeVideo.py --input greyscaleVideo.mp4 +# Usage example: python3 colorizeVideo.py --input greyscaleVideo.mp4 import numpy as np import cv2 as cv import argparse import os.path +import time parser = argparse.ArgumentParser(description='Colorize GreyScale Video') parser.add_argument('--input', help='Path to video file.') +parser.add_argument("--device", default="cpu", help="Device to inference on") args = parser.parse_args() -if args.input==None: +if args.input is None: print('Please give the input greyscale video file.') print('Usage example: python3 colorizeVideo.py --input greyscaleVideo.mp4') exit() -if os.path.isfile(args.input)==0: +if not os.path.isfile(args.input): print('Input file does not exist') exit() - + +print("Input video file: ", args.input) + # Read the input video cap = cv.VideoCapture(args.input) hasFrame, frame = cap.read() -outputFile = args.input[:-4]+'_colorized.avi' +outputFile = args.input[:-4] + '_colorized.avi' vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 60, (frame.shape[1],frame.shape[0])) # Specify the paths for the 2 model files protoFile = "./models/colorization_deploy_v2.prototxt" weightsFile = "./models/colorization_release_v2.caffemodel" -#weightsFile = "./models/colorization_release_v2_norebal.caffemodel" # Load the cluster centers pts_in_hull = np.load('./pts_in_hull.npy') @@ -39,15 +42,25 @@ # Read the network into Memory net = cv.dnn.readNetFromCaffe(protoFile, weightsFile) +if args.device == "cpu": + net.setPreferableBackend(cv.dnn.DNN_TARGET_CPU) + print("Using CPU device") +elif args.device == "gpu": + net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA) + net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA) + print("Using GPU device") + # populate cluster centers as 1x1 convolution kernel pts_in_hull = pts_in_hull.transpose().reshape(2, 313, 1, 1) net.getLayer(net.getLayerId('class8_ab')).blobs = [pts_in_hull.astype(np.float32)] net.getLayer(net.getLayerId('conv8_313_rh')).blobs = [np.full([1, 313], 2.606, np.float32)] -#from opencv sample +# from opencv sample W_in = 224 H_in = 224 +timer = [] + while cv.waitKey(1): hasFrame, frame = cap.read() @@ -55,26 +68,32 @@ if not hasFrame: break - img_rgb = (frame[:,:,[2, 1, 0]] * 1.0 / 255).astype(np.float32) + start = time.time() + + img_rgb = (frame[:, :, [2, 1, 0]] * 1.0 / 255).astype(np.float32) img_lab = cv.cvtColor(img_rgb, cv.COLOR_RGB2Lab) - img_l = img_lab[:,:,0] # pull out L channel + img_l = img_lab[:, :, 0] # pull out L channel # resize lightness channel to network input size img_l_rs = cv.resize(img_l, (W_in, H_in)) - img_l_rs -= 50 # subtract 50 for mean-centering + img_l_rs -= 50 # subtract 50 for mean-centering net.setInput(cv.dnn.blobFromImage(img_l_rs)) - ab_dec = net.forward()[0,:,:,:].transpose((1,2,0)) # this is our result + ab_dec = net.forward()[0, :, :, :].transpose((1, 2, 0)) # this is our result - (H_orig,W_orig) = img_rgb.shape[:2] # original image size + (H_orig,W_orig) = img_rgb.shape[:2] # original image size ab_dec_us = cv.resize(ab_dec, (W_orig, H_orig)) - img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original L channel + img_lab_out = np.concatenate((img_l[:, :, np.newaxis], ab_dec_us), axis=2) # concatenate with original L channel img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1) - vid_writer.write((img_bgr_out*255).astype(np.uint8)) + end = time.time() + timer.append(end - start) + + vid_writer.write((img_bgr_out * 255).astype(np.uint8)) vid_writer.release() -print('Colorized video saved as '+outputFile) +print("Time taken : {:0.5f} secs".format(sum(timer))) +print('Colorized video saved as ' + outputFile) print('Done !!!')