Personal study repo for UCSB Games 101
- Use OpenMP to parallelize Ray Generation
- Use -O3 to optimize the code
#include <omp.h>
#pragma omp parallel for
for (int k = 0; k < spp; k++)
framebuffer[m] += scene.castRay(Ray(eye_pos, dir), 0) / spp;
Add these lines to CMakeLists.txt
to enable OpenMP on macOS
# Set the compiler to clang++ from LLVM
set(CMAKE_CXX_COMPILER "/usr/local/opt/llvm/bin/clang++")
# Add the compile flag -fopenmp
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -O3")
There are totally 12 threads on my machine, and the runtime is as follows:
SPP = 32 with runtime 18s (leftmost/upmost)
SPP = 128 with runtime 38s
SPP = 512 with runtime 156s (rightmost/downmost)
-
Also note that
wo
is pointing inwards, so we should useVector3f wo = ray.direction; Vector3f f_r = hit.m->eval(wo, ws, hit.normal); float pdf_indir = hit.m->pdf(wo, wi, hit.normal); Vector3f f_r = hit.m->eval(wo, wi, hit.normal);
-
If we want the
wo
pointing outwards, we should useVector3f wo = -ray.direction; Vector3f f_r = hit.m->eval(ws, wo, hit.normal); float pdf_indir = hit.m->pdf(wi, wo, hit.normal); Vector3f f_r = hit.m->eval(wi, wo, hit.normal);
- SAH-based BVH construction reference: https://pbr-book.org/4ed/Primitives_and_Intersection_Acceleration/Bounding_Volume_Hierarchies#TheSurfaceAreaHeuristic
- Convert Screen Space to World Space (suppose the camera is at (0, 0, 0) and the near plane is at -1)
- Möller Trumbore Algorithm
Vector3f e1 = v1 - v0;
Vector3f e2 = v2 - v0;
Vector3f s = orig - v0;
Vector3f s1 = crossProduct(dir, e2);
Vector3f s2 = crossProduct(s, e1);
float div = 1.0f / dotProduct(s1, e1);
tnear = dotProduct(s2, e2) * div;
u = dotProduct(s1, s) * div;
v = dotProduct(s2, dir) * div;
// barycentric coordinates must be in the [0, 1] range, sum must be 1 <=> point is inside the triangle
return tnear >= 0 && u >= 0 && v >= 0 && u + v <= 1;
- De Casteljau
cv::Point2f recursive_bezier(std::vector<cv::Point2f> &control_points, int n, float t) {
// Implement de Casteljau's algorithm
if (n == 1)
return control_points[0];
for (int i = 0; i < n - 1; ++i) {
auto point = (1 - t) * control_points[i] + t * control_points[i + 1];
control_points[i].x = point.x;
control_points[i].y = point.y;
}
return recursive_bezier(control_points, n - 1, t);
}
void bezier(const std::vector<cv::Point2f> &control_points, cv::Mat &window) {
// make a copy of control points
std::vector<cv::Point2f> points = control_points;
for (double t = 0.0; t <= 1.0; t += 0.001) {
// reset points
for (int i = 0; i < points.size(); i++) {
points[i].x = control_points[i].x;
points[i].y = control_points[i].y;
}
auto point = recursive_bezier(points, points.size(), t);
window.at<cv::Vec3b>(point.y, point.x)[1] = 255;
}
}
- Anti-aliasing
int x0 = point.x;
int y0 = point.y;
int x1 = x0 + 1; // should check if x1 is out of bound
int y1 = y0 + 1; // should check if y1 is out of bound
float dx = point.x - x0;
int left = (1 - dx) * 255;
int right = 255 - left;
float dy = point.y - y0;
int color00 = std::min(255.f, window.at<cv::Vec3b>(y0, x0)[1] + left * (1 - dy));
int color01 = std::min(255.f, window.at<cv::Vec3b>(y1, x0)[1] + left * dy);
int color10 = std::min(255.f, window.at<cv::Vec3b>(y0, x1)[1] + right * (1 - dy));
int color11 = std::min(255.f, window.at<cv::Vec3b>(y1, x1)[1] + right * dy);
window.at<cv::Vec3b>(y0, x0)[1] = color00;
window.at<cv::Vec3b>(y1, x0)[1] = color01;
window.at<cv::Vec3b>(y0, x1)[1] = color10;
window.at<cv::Vec3b>(y1, x1)[1] = color11;
- Change the initialization of depth buffer in
clear
function
std::fill(depth_buf.begin(), depth_buf.end(), -std::numeric_limits<float>::infinity());
getColorBilinear
Eigen::Vector3f getColorBilinear(float u, float v) {
auto u_img = u * width;
auto v_img = (1 - v) * height;
auto u_left = (int)u_img;
auto u_right = std::min(u_left + 1, width);
auto v_top = (int)v_img;
auto v_bottom = std::min(v_top + 1, height);
auto u_ratio = u_img - u_left;
auto v_ratio = v_img - v_bottom;
auto color_top_left = image_data.at<cv::Vec3b>(v_top, u_left);
auto color_top_right = image_data.at<cv::Vec3b>(v_top, u_right);
auto color_bottom_left = image_data.at<cv::Vec3b>(v_bottom, u_left);
auto color_bottom_right = image_data.at<cv::Vec3b>(v_bottom, u_right);
auto color_top = color_top_left + (color_top_right - color_top_left) * u_ratio;
auto color_bottom = color_bottom_left + (color_bottom_right - color_bottom_left) * u_ratio;
auto color = color_bottom + (color_top - color_bottom) * v_ratio;
return Eigen::Vector3f(color[0], color[1], color[2]);
}
- We need to change the initialization of depth buffer in
clear
function
std::fill(depth_buf.begin(), depth_buf.end(), -std::numeric_limits<float>::infinity());
void rasterize_triangle_ssaa(const Triangle &t);
void rasterize_triangle_ssaa2(const Triangle &t);
- Method 2: for a pixel on edges (cnt < 4), each pass, we clear its depth buffer to ensure both colors will be painted
- Reference: https://zhuanlan.zhihu.com/p/454001952
Rotation by angle
- By default, any
$\text{n}$ will cross (0, 0, 0)
Eigen::Matrix4f get_rotation(Vector3f axis, float angle) {
Eigen::Matrix3f I = Eigen::Matrix3f::Identity();
Eigen::Matrix3f NNT = axis * axis.transpose();
Eigen::Matrix3f A_star;
A_star << 0, -axis[2], axis[1],
axis[2], 0, -axis[0],
-axis[1], axis[0], 0;
Eigen::Matrix3f R = cos(angle / 180 * MY_PI) * I + (1 - cos(angle / 180 * MY_PI)) * NNT + sin(angle / 180 * MY_PI) * A_star;
Eigen::Matrix4f rotate = Eigen::Matrix4f::Identity();
rotate.block(0, 0, 3, 3) = R;
return rotate;
}