Merge pull request #2 from reyoung/mac_port

Mac port
reyoung · Sep 13, 2016 · 7dec631 · 7dec631
2 parents 4315a38 + 55a1a75
commit 7dec631
Show file tree

Hide file tree

Showing 20 changed files with 566 additions and 261 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 *.DS_Store
 build/
+*.user
diff --git a/cmake/util.cmake b/cmake/util.cmake
@@ -104,9 +104,10 @@ function(link_paddle_exe TARGET_NAME)
         ${PROTOBUF_LIBRARY}
         ${CMAKE_THREAD_LIBS_INIT}
         ${CBLAS_LIBS}
-        ${CMAKE_DL_LIBS}
         ${INTERAL_LIBS}
-        ${ZLIB_LIBRARIES})
+        ${ZLIB_LIBRARIES}
+        ${CMAKE_DL_LIBS}
+        )
 
     if(WITH_PYTHON)
         target_link_libraries(${TARGET_NAME}

diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp
@@ -277,6 +277,7 @@ void NeuralNetwork::getState(MachineState& machineState) {
 }
 
 void NeuralNetwork::backward(const UpdateCallback& callback) {
+  gLayerStackTrace.pop("");  // tell layer trace is during backward.
   FOR_EACH_R(layer, layers_) {
     REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str());
     if ((*layer)->needGradient()) {

diff --git a/paddle/math/Allocator.h b/paddle/math/Allocator.h
@@ -49,7 +49,7 @@ class CpuAllocator : public Allocator {
    */
   virtual void* alloc(size_t size) {
       void* ptr;
-      posix_memalign(&ptr, 32ul, size);
+      CHECK_EQ(posix_memalign(&ptr, 32ul, size), 0);
       CHECK(ptr) << "Fail to allocate CPU memory: size=" << size;
       return ptr;
   }

diff --git a/paddle/math/tests/test_SIMDFunctions.cpp b/paddle/math/tests/test_SIMDFunctions.cpp
@@ -38,7 +38,7 @@ static std::mt19937 RandomEngine(time(0));
 inline static std::unique_ptr<float[]> NewVector(size_t len = VECTOR_LEN,
                                                  size_t align = ALIGN) {
   float* ptr;
-  posix_memalign((void**)&ptr, align, len * sizeof(float));
+  CHECK_EQ(posix_memalign((void**)&ptr, align, len * sizeof(float)), 0);
   return std::unique_ptr<float[]>(ptr);
 }
 

diff --git a/paddle/math/tests/test_perturbation.cpp b/paddle/math/tests/test_perturbation.cpp
@@ -249,9 +249,4 @@ TEST_F(PerturbationTest, scale_test) {
   }
 }
 
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-
 #endif
diff --git a/paddle/parameter/tests/test_common.cpp b/paddle/parameter/tests/test_common.cpp
@@ -125,9 +125,11 @@ TEST_F(CommonTest, sgdUpdate) {
   const size_t alignHeader[] = {0, 2, 3, 5, 7, 8};
   for (auto& size : sizeVec_) {
     real *gradientBuffer, *valueBuffer, *momentumBuffer;
-    posix_memalign((void**)&gradientBuffer, 32, sizeof(real) * size);
-    posix_memalign((void**)&valueBuffer, 32, sizeof(real) * size);
-    posix_memalign((void**)&momentumBuffer, 32, sizeof(real) * size);
+    CHECK_EQ(posix_memalign((void**)&gradientBuffer, 32, sizeof(real) * size),
+        0);
+    CHECK_EQ(posix_memalign((void**)&valueBuffer, 32, sizeof(real) * size), 0);
+    CHECK_EQ(posix_memalign((void**)&momentumBuffer, 32, sizeof(real) * size),
+        0);
 
     for (size_t i = 0; i < size; i++) {
       gradientBuffer[i] = 1.0;

diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt
@@ -2,12 +2,18 @@
 
 file(GLOB UTIL_HEADERS . *.h)
 file(GLOB UTIL_SOURCES . *.cpp)
-
+if(APPLE)
+    file(GLOB UTIL_ARCH_SOURCES . arch/osx/*.cpp)
+else()
+    file(GLOB UTIL_ARCH_SOURCES . arch/linux/*.cpp)
+endif()
 add_library(paddle_utils STATIC
-        ${UTIL_SOURCES})
+        ${UTIL_SOURCES}
+        ${UTIL_ARCH_SOURCES})
 add_style_check_target(paddle_utils ${UTIL_HEADERS})
-add_style_check_target(paddle_utils ${UTIL_SOURCES})
+add_style_check_target(paddle_utils ${UTIL_SOURCES}
+    ${UTIL_ARCH_SOURCES})
 add_dependencies(paddle_utils gen_proto_cpp)
 if(WITH_TESTING)
     add_subdirectory(tests)
-endif()
+endif()
diff --git a/paddle/utils/CustomStackTrace.cpp b/paddle/utils/CustomStackTrace.cpp
@@ -14,9 +14,44 @@ limitations under the License. */
 
 
 #include "CustomStackTrace.h"
+#include "CommandLineParser.h"
+#include <iostream>
+
+P_DEFINE_bool(layer_stack_error_only_current_thread,
+    true,
+    "Dump current thread or whole process layer stack when signal error "
+    "occurred. true means only dump current thread layer stack");
 
 namespace paddle {
 
 CustomStackTrace<std::string> gLayerStackTrace;
 
+static std::mutex gLayerStackTraceMtx;
+void installLayerStackTracer() {
+  logging::installFailureWriter([](const char* data, int sz) {
+    std::lock_guard<std::mutex> guard(gLayerStackTraceMtx);
+    if (!gLayerStackTrace.empty()) {
+      size_t curTid = -1UL;
+      std::hash<std::thread::id> hasher;
+      gLayerStackTrace.dump([&curTid, &hasher](std::thread::id tid,
+                            bool* isForwarding,
+                            const std::string& layerName) {
+        if (curTid != hasher(tid)) {
+          if (curTid != -1UL) {
+            std::cerr << std::endl;
+          }
+          curTid = hasher(tid);
+          std::cerr << "Thread [" << tid << "] ";
+          if (isForwarding) {
+            std::cerr << (*isForwarding ? "Forwarding ": "Backwarding ");
+          }
+        }
+        std::cerr << layerName << ", ";
+      }, FLAGS_layer_stack_error_only_current_thread);
+      std::cerr << std::endl;
+    }
+    std::cerr.write(data, sz);
+  });
+}
+
 }  // namespace paddle
diff --git a/paddle/utils/CustomStackTrace.h b/paddle/utils/CustomStackTrace.h
@@ -15,6 +15,9 @@ limitations under the License. */
 #pragma once
 
 #include <stack>
+#include <thread>
+#include <unordered_map>
+#include <functional>
 
 #include "ThreadLocal.h"
 
@@ -29,71 +32,160 @@ namespace paddle {
  * @code{.cpp}
  * 
  * paddle::CustomStackTrace<std::string> stack;
- * PASS_TEST=0;
  * for (auto& layer : layers){
  *   stack.push(layer->getName());
- *   layer->forward(passType);
+ *   layer->forward();
  * }
- * for (auto& layer : layers){
+ *
+ * stack.pop("");  // mark under pop stage.
+ *
+ * for (auto it = layers.rbegin(); it != layers.rend(); ++it){
+ *   auto& layer = *it;
  *   layer->backward(passType);
  *   stack.pop(layer->getName());
  * }
- * 
- * if(passType == PASS_TEST) {
- *   stack.clear();
- * }
- * else {
- *   stack.dump([](const std::string& layername){
- *     LOG(INFO) << "LayerName: " << layername;
- *   })
- * }
- * 
  *
  * @endcode
  */
 template <typename T>
 class CustomStackTrace{
 public:
   /**
-   * @brief Pop out an item from the top of the stack. For safety the item 
-   * will be poped should equal to ip.
+   * @brief Pop out an item from the top of the stack if item == top.
+   *        Else, just set status to popping.
    */
-  void pop(const T& ip) {
-    auto& p = *logstack_;
-    CHECK_EQ(ip, p.top());
-    p.pop();
+  void pop(const T& item) {
+    pushing() = false;
+    auto& s = this->stack();
+    if (item == s.top()) {
+      s.pop();
+    }
   }
+
   /**
-   * @brief Empty the stack by sequence from top to button.
-   * @param[in] callback A function deal with each item while dumping.
-   * It must have and only have a in parameter which is the stack item.
+   * @brief clear current thread stack.
    */
-  template <typename Callback>
-  void dump(Callback callback) {
-    auto& p = *logstack_;
-    while (!p.empty()) {
-      callback(p.top());
-      p.pop();
+  void clear() {
+    auto& s = stack();
+    while (!s.empty()) {
+      s.pop();
     }
   }
+
   /**
-   * @brief Only empty the stack.
+   * @brief return true if all thread's stack is empty.
+   * @return true if empty
    */
-  void clear() {
-    dump([](const T& ip){});
+  bool empty() const {
+    std::lock_guard<std::mutex> g(this->mtx_);
+    for (auto p : this->stackBuffers_) {
+      std::stack<T>& s = *p.second;
+      if (!s.empty()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+
+  /**
+   * @brief DumpCallback Type. It will be invoked many times by dump method.
+   *
+   * The first parameter is stack thread id.
+   * The second parameter is the last action of stack is push or not.
+   * The third parameter is the item in stack.
+   */
+  typedef std::function<void(const std::thread::id& /*threadId*/,
+                              bool* /*isPushing*/,
+                              const T& /*item*/)> DumpCallback;
+
+  /**
+   * Dump all thread stack, and all stack will be cleared.
+   */
+  void dump(const DumpCallback& callback, bool onlyCurrentThread = false) {
+    std::lock_guard<std::mutex> g(this->mtx_);
+    for (auto p : this->stackBuffers_) {
+      std::thread::id tid = p.first;
+      if (onlyCurrentThread && tid != std::this_thread::get_id()) {
+        continue;
+      }
+      std::stack<T>& s = *p.second;
+      bool* isPush = nullptr;
+      auto it = this->pushingBuffers_.find(tid);
+      if (it != this->pushingBuffers_.end()) {
+        isPush = it->second;
+      }
+
+      while (!s.empty()) {
+        callback(tid, isPush, s.top());
+        s.pop();
+      }
+    }
   }
+
   /**
-   * @brief Push item ip to the top of the stack.
+   * @brief Push item to current thread stack.
    */
-  void push(const T& ip) {
-    auto& p = *logstack_;
-    p.push(ip);
+  void push(const T& item) {
+    pushing() = true;
+    auto& p = this->stack();
+    p.push(item);
   }
 
 private:
-  ThreadLocalD<std::stack<T> > logstack_;
+  /**
+   * Get thread local attribute, and save them into a map (threadId => TYPE*)
+   *
+   * @tparam TYPE thread local attribute type.
+   * @param threadLocal Thread Local object.
+   * @param buffers a map from threadId to TYPE*
+   */
+  template <typename TYPE>
+  inline TYPE& getThreadLocal(
+      ThreadLocal<TYPE>& threadLocal,
+      std::unordered_map<std::thread::id, TYPE*>& buffers) {
+    TYPE* retv = threadLocal.get(false);
+    if (retv) {
+      return *retv;
+    } else {
+      std::lock_guard<std::mutex> guard(this->mtx_);
+      retv = threadLocal.get();
+      auto id = std::this_thread::get_id();
+      buffers.insert({id, retv});
+      return *retv;
+    }
+  }
+
+  /**
+   * @brief Get thread local stack reference.
+   */
+  std::stack<T>& stack() {
+    return this->getThreadLocal(this->logStack_,
+                                this->stackBuffers_);
+  }
+
+  /**
+   * @brief Get thread local pushing flag.
+   */
+  bool& pushing() {
+    return this->getThreadLocal(this->isPushing_,
+                                this->pushingBuffers_);
+  }
+
+private:
+  mutable std::mutex mtx_;
+
+  std::unordered_map<std::thread::id, std::stack<T>* > stackBuffers_;
+  std::unordered_map<std::thread::id, bool* > pushingBuffers_;
+  ThreadLocal<bool> isPushing_;
+  ThreadLocal<std::stack<T> > logStack_;
 };
 
 extern CustomStackTrace<std::string> gLayerStackTrace;
 
+/**
+ * @brief Install a failure handler to print layer stack when error.
+ */
+extern void installLayerStackTracer();
+
 }  // namespace paddle