From 3e408cab9fa4398b1c8dd37e50d8df8940af6d87 Mon Sep 17 00:00:00 2001
From: Ye Kuang <yekuang.ky@gmail.com>
Date: Wed, 21 Oct 2020 10:25:53 +0900
Subject: [PATCH 1/2] [GUI] [mac] Support fast_gui on macOS

---
 python/taichi/lang/meta.py | 17 ++++++++++++++---
 python/taichi/misc/gui.py  |  3 ++-
 taichi/gui/cocoa.cpp       | 28 +++++++++++++++++-----------
 3 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/python/taichi/lang/meta.py b/python/taichi/lang/meta.py
index f28cbb5744ce1..1d9e46eae01ad 100644
--- a/python/taichi/lang/meta.py
+++ b/python/taichi/lang/meta.py
@@ -16,13 +16,24 @@ def tensor_to_ext_arr(tensor: ti.template(), arr: ti.ext_arr()):
 
 
 @ti.kernel
-def vector_to_fast_image(img: ti.template(), out: ti.ext_arr()):
+def vector_to_fast_image(img: ti.template(), small_endian: ti.template(),
+                         out: ti.ext_arr()):
     # FIXME: Why is ``for i, j in img:`` slower than:
     for i, j in ti.ndrange(*img.shape):
         u, v, w = min(255, max(0, int(img[i, img.shape[1] - 1 - j] * 255)))
+        idx = j * img.shape[0] + i
         # We use i32 for |out| since OpenGL and Metal doesn't support u8 types
-        # TODO: treat Cocoa and Big-endian machines, with XOR logic
-        out[j * img.shape[0] + i] = w + (v << 8) + (u << 16)
+        if ti.static(small_endian):
+            out[idx] = w + (v << 8) + (u << 16)
+        else:
+            # What's -16777216?
+            #
+            # On Mac, we need to set the alpha channel to 0xff. Since Mac's GUI
+            # is big-endian, the color is stored in ABGR order, and we need to
+            # add 0xff000000, which is -16777216 in I32's legit range. (Albeit
+            # the clarity, adding 0xff000000 doesn't work.)
+            alpha = -16777216
+            out[idx] = (w << 16) + (v << 8) + u + alpha
 
 
 @ti.kernel
diff --git a/python/taichi/misc/gui.py b/python/taichi/misc/gui.py
index f12196145bb19..28524f0058520 100644
--- a/python/taichi/misc/gui.py
+++ b/python/taichi/misc/gui.py
@@ -164,7 +164,8 @@ def set_image(self, img):
                     "Only RGB images are supported in GUI.set_image when fast_gui=True"
 
             from taichi.lang.meta import vector_to_fast_image
-            vector_to_fast_image(img, self.img)
+            is_small_endian = ti.get_os_name() != 'osx'
+            vector_to_fast_image(img, is_small_endian, self.img)
             return
 
         if isinstance(img, ti.Expr):
diff --git a/taichi/gui/cocoa.cpp b/taichi/gui/cocoa.cpp
index f72e086bcb287..fdb458e514eb8 100644
--- a/taichi/gui/cocoa.cpp
+++ b/taichi/gui/cocoa.cpp
@@ -194,21 +194,27 @@ void updateLayer(id self, SEL _) {
   using namespace taichi;
   auto *gui = gui_from_id[self];
   auto width = gui->width, height = gui->height;
-  auto &img = gui->canvas->img;
-  auto &data = gui->img_data;
-  for (int j = 0; j < height; j++) {
-    for (int i = 0; i < width; i++) {
-      int index = 4 * (i + j * width);
-      auto pixel = img[i][height - j - 1];
-      data[index++] = uint8(clamp(int(pixel[0] * 255.0_f), 0, 255));
-      data[index++] = uint8(clamp(int(pixel[1] * 255.0_f), 0, 255));
-      data[index++] = uint8(clamp(int(pixel[2] * 255.0_f), 0, 255));
-      data[index++] = 255;  // alpha
+  uint8_t *data_ptr = nullptr;
+  if (gui->fast_gui) {
+    data_ptr = reinterpret_cast<uint8_t *>(gui->fast_buf);
+  } else {
+    auto &img = gui->canvas->img;
+    auto &data = gui->img_data;
+    data_ptr = data.data();
+    for (int j = 0; j < height; j++) {
+      for (int i = 0; i < width; i++) {
+        int index = 4 * (i + j * width);
+        auto pixel = img[i][height - j - 1];
+        data[index++] = uint8(clamp(int(pixel[0] * 255.0_f), 0, 255));
+        data[index++] = uint8(clamp(int(pixel[1] * 255.0_f), 0, 255));
+        data[index++] = uint8(clamp(int(pixel[2] * 255.0_f), 0, 255));
+        data[index++] = 255;  // alpha
+      }
     }
   }
 
   CGDataProviderRef provider = CGDataProviderCreateWithData(
-      nullptr, data.data(), gui->img_data_length, nullptr);
+      nullptr, data_ptr, gui->img_data_length, nullptr);
   CGColorSpaceRef colorspace = CGColorSpaceCreateDeviceRGB();
   CGImageRef image =
       CGImageCreate(width, height, 8, 32, width * 4, colorspace,

From 456b7efdc638a8cbe32ff2ac37ebb8f0c5385dd2 Mon Sep 17 00:00:00 2001
From: Ye Kuang <yekuang.ky@gmail.com>
Date: Thu, 22 Oct 2020 19:05:31 +0900
Subject: [PATCH 2/2] address comments

---
 python/taichi/lang/meta.py | 11 +++++------
 python/taichi/misc/gui.py  |  3 +--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/python/taichi/lang/meta.py b/python/taichi/lang/meta.py
index 1d9e46eae01ad..7ed3f48e60661 100644
--- a/python/taichi/lang/meta.py
+++ b/python/taichi/lang/meta.py
@@ -16,15 +16,14 @@ def tensor_to_ext_arr(tensor: ti.template(), arr: ti.ext_arr()):
 
 
 @ti.kernel
-def vector_to_fast_image(img: ti.template(), small_endian: ti.template(),
-                         out: ti.ext_arr()):
+def vector_to_fast_image(img: ti.template(), out: ti.ext_arr()):
     # FIXME: Why is ``for i, j in img:`` slower than:
     for i, j in ti.ndrange(*img.shape):
-        u, v, w = min(255, max(0, int(img[i, img.shape[1] - 1 - j] * 255)))
+        r, g, b = min(255, max(0, int(img[i, img.shape[1] - 1 - j] * 255)))
         idx = j * img.shape[0] + i
         # We use i32 for |out| since OpenGL and Metal doesn't support u8 types
-        if ti.static(small_endian):
-            out[idx] = w + (v << 8) + (u << 16)
+        if ti.static(ti.get_os_name() != 'osx'):
+            out[idx] = (r << 16) + (g << 8) + b
         else:
             # What's -16777216?
             #
@@ -33,7 +32,7 @@ def vector_to_fast_image(img: ti.template(), small_endian: ti.template(),
             # add 0xff000000, which is -16777216 in I32's legit range. (Albeit
             # the clarity, adding 0xff000000 doesn't work.)
             alpha = -16777216
-            out[idx] = (w << 16) + (v << 8) + u + alpha
+            out[idx] = (b << 16) + (g << 8) + r + alpha
 
 
 @ti.kernel
diff --git a/python/taichi/misc/gui.py b/python/taichi/misc/gui.py
index 28524f0058520..f12196145bb19 100644
--- a/python/taichi/misc/gui.py
+++ b/python/taichi/misc/gui.py
@@ -164,8 +164,7 @@ def set_image(self, img):
                     "Only RGB images are supported in GUI.set_image when fast_gui=True"
 
             from taichi.lang.meta import vector_to_fast_image
-            is_small_endian = ti.get_os_name() != 'osx'
-            vector_to_fast_image(img, is_small_endian, self.img)
+            vector_to_fast_image(img, self.img)
             return
 
         if isinstance(img, ti.Expr):