diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index fca52828957a2..eb1522a178d48 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -277,6 +277,7 @@ void NeuralNetwork::getState(MachineState& machineState) { } void NeuralNetwork::backward(const UpdateCallback& callback) { + gLayerStackTrace.pop(""); // tell layer trace is during backward. FOR_EACH_R(layer, layers_) { REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str()); if ((*layer)->needGradient()) { diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index e1762e8d360de..cef8772fc254f 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -114,27 +114,30 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { } else { create(tmpBiasGrad_, 1, channels_, &betaGrad); } -#if CUDNN_VERSION < 5000 + // because of the different api of cudnn v4 and v5. - if (weight_->getWGrad()) { - create(tmpWGrad_, 1, channels_, &gammaGrad); - } - if (biases_ && biases_->getWGrad()) { - create(tmpBiasGrad_, 1, channels_, &betaGrad); + if (hl_get_cudnn_lib_version() < 5000) { + if (weight_->getWGrad()) { + create(tmpWGrad_, 1, channels_, &gammaGrad); + } + if (biases_ && biases_->getWGrad()) { + create(tmpBiasGrad_, 1, channels_, &betaGrad); + } } -#endif + hl_batch_norm_backward(ioDesc_, input, ioDesc_, outGrad, ioDesc_, inGrad, bnParamDesc_, gamma, gammaGrad, betaGrad, EPS, savedMean, savedInvVar); -#if CUDNN_VERSION < 5000 // because of the different api of cudnn v4 and v5. - if (weight_->getWGrad() && biases_->getWGrad()) { - weight_->getWGrad()->add(*tmpWGrad_); - biases_->getWGrad()->add(*tmpBiasGrad_); + if (hl_get_cudnn_lib_version() < 5000) { + if (weight_->getWGrad() && biases_->getWGrad()) { + weight_->getWGrad()->add(*tmpWGrad_); + biases_->getWGrad()->add(*tmpBiasGrad_); + } } -#endif + { REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); biases_->getParameterPtr()->incUpdate(callback); diff --git a/paddle/utils/CustomStackTrace.cpp b/paddle/utils/CustomStackTrace.cpp index 50d7f5402f586..232a478ecd93a 100644 --- a/paddle/utils/CustomStackTrace.cpp +++ b/paddle/utils/CustomStackTrace.cpp @@ -14,9 +14,44 @@ limitations under the License. */ #include "CustomStackTrace.h" +#include "CommandLineParser.h" +#include + +P_DEFINE_bool(layer_stack_error_only_current_thread, + true, + "Dump current thread or whole process layer stack when signal error " + "occurred. true means only dump current thread layer stack"); namespace paddle { CustomStackTrace gLayerStackTrace; +static std::mutex gLayerStackTraceMtx; +void installLayerStackTracer() { + logging::installFailureWriter([](const char* data, int sz) { + std::lock_guard guard(gLayerStackTraceMtx); + if (!gLayerStackTrace.empty()) { + size_t curTid = -1UL; + std::hash hasher; + gLayerStackTrace.dump([&curTid, &hasher](std::thread::id tid, + bool* isForwarding, + const std::string& layerName) { + if (curTid != hasher(tid)) { + if (curTid != -1UL) { + std::cerr << std::endl; + } + curTid = hasher(tid); + std::cerr << "Thread [" << tid << "] "; + if (isForwarding) { + std::cerr << (*isForwarding ? "Forwarding ": "Backwarding "); + } + } + std::cerr << layerName << ", "; + }, FLAGS_layer_stack_error_only_current_thread); + std::cerr << std::endl; + } + std::cerr.write(data, sz); + }); +} + } // namespace paddle diff --git a/paddle/utils/CustomStackTrace.h b/paddle/utils/CustomStackTrace.h index e1b2d2d8e5ee6..774c4db2b9be4 100644 --- a/paddle/utils/CustomStackTrace.h +++ b/paddle/utils/CustomStackTrace.h @@ -15,6 +15,9 @@ limitations under the License. */ #pragma once #include +#include +#include +#include #include "ThreadLocal.h" @@ -29,25 +32,18 @@ namespace paddle { * @code{.cpp} * * paddle::CustomStackTrace stack; - * PASS_TEST=0; * for (auto& layer : layers){ * stack.push(layer->getName()); - * layer->forward(passType); + * layer->forward(); * } - * for (auto& layer : layers){ + * + * stack.pop(""); // mark under pop stage. + * + * for (auto it = layers.rbegin(); it != layers.rend(); ++it){ + * auto& layer = *it; * layer->backward(passType); * stack.pop(layer->getName()); * } - * - * if(passType == PASS_TEST) { - * stack.clear(); - * } - * else { - * stack.dump([](const std::string& layername){ - * LOG(INFO) << "LayerName: " << layername; - * }) - * } - * * * @endcode */ @@ -55,45 +51,141 @@ template class CustomStackTrace{ public: /** - * @brief Pop out an item from the top of the stack. For safety the item - * will be poped should equal to ip. + * @brief Pop out an item from the top of the stack if item == top. + * Else, just set status to popping. */ - void pop(const T& ip) { - auto& p = *logstack_; - CHECK_EQ(ip, p.top()); - p.pop(); + void pop(const T& item) { + pushing() = false; + auto& s = this->stack(); + if (item == s.top()) { + s.pop(); + } } + /** - * @brief Empty the stack by sequence from top to button. - * @param[in] callback A function deal with each item while dumping. - * It must have and only have a in parameter which is the stack item. + * @brief clear current thread stack. */ - template - void dump(Callback callback) { - auto& p = *logstack_; - while (!p.empty()) { - callback(p.top()); - p.pop(); + void clear() { + auto& s = stack(); + while (!s.empty()) { + s.pop(); } } + /** - * @brief Only empty the stack. + * @brief return true if all thread's stack is empty. + * @return true if empty */ - void clear() { - dump([](const T& ip){}); + bool empty() const { + std::lock_guard g(this->mtx_); + for (auto p : this->stackBuffers_) { + std::stack& s = *p.second; + if (!s.empty()) { + return false; + } + } + return true; + } + + + /** + * @brief DumpCallback Type. It will be invoked many times by dump method. + * + * The first parameter is stack thread id. + * The second parameter is the last action of stack is push or not. + * The third parameter is the item in stack. + */ + typedef std::function DumpCallback; + + /** + * Dump all thread stack, and all stack will be cleared. + */ + void dump(const DumpCallback& callback, bool onlyCurrentThread = false) { + std::lock_guard g(this->mtx_); + for (auto p : this->stackBuffers_) { + std::thread::id tid = p.first; + if (onlyCurrentThread && tid != std::this_thread::get_id()) { + continue; + } + std::stack& s = *p.second; + bool* isPush = nullptr; + auto it = this->pushingBuffers_.find(tid); + if (it != this->pushingBuffers_.end()) { + isPush = it->second; + } + + while (!s.empty()) { + callback(tid, isPush, s.top()); + s.pop(); + } + } } + /** - * @brief Push item ip to the top of the stack. + * @brief Push item to current thread stack. */ - void push(const T& ip) { - auto& p = *logstack_; - p.push(ip); + void push(const T& item) { + pushing() = true; + auto& p = this->stack(); + p.push(item); } private: - ThreadLocalD > logstack_; + /** + * Get thread local attribute, and save them into a map (threadId => TYPE*) + * + * @tparam TYPE thread local attribute type. + * @param threadLocal Thread Local object. + * @param buffers a map from threadId to TYPE* + */ + template + inline TYPE& getThreadLocal( + ThreadLocal& threadLocal, + std::unordered_map& buffers) { + TYPE* retv = threadLocal.get(false); + if (retv) { + return *retv; + } else { + std::lock_guard guard(this->mtx_); + retv = threadLocal.get(); + auto id = std::this_thread::get_id(); + buffers.insert({id, retv}); + return *retv; + } + } + + /** + * @brief Get thread local stack reference. + */ + std::stack& stack() { + return this->getThreadLocal(this->logStack_, + this->stackBuffers_); + } + + /** + * @brief Get thread local pushing flag. + */ + bool& pushing() { + return this->getThreadLocal(this->isPushing_, + this->pushingBuffers_); + } + +private: + mutable std::mutex mtx_; + + std::unordered_map* > stackBuffers_; + std::unordered_map pushingBuffers_; + ThreadLocal isPushing_; + ThreadLocal > logStack_; }; extern CustomStackTrace gLayerStackTrace; +/** + * @brief Install a failure handler to print layer stack when error. + */ +extern void installLayerStackTracer(); + } // namespace paddle diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index 1c1d75dc5bed9..d8c3376fb18c4 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -129,13 +129,7 @@ void runInitFunctions() { void initMain(int argc, char** argv) { initializeLogging(argc, argv); - logging::installFailureWriter([](const char* data, int sz) { - std::cerr << "Current Layer forward/backward stack is " << std::endl; - gLayerStackTrace.dump([](const std::string& layername){ - std::cerr << "LayerName: " << layername << std::endl; - }); - std::cerr.write(data, sz); - }); + installLayerStackTracer(); std::string line; for (int i = 0; i < argc; ++i) { line += argv[i]; diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/utils/tests/CMakeLists.txt index 147ee3f6d6d86..be59a785ecf36 100644 --- a/paddle/utils/tests/CMakeLists.txt +++ b/paddle/utils/tests/CMakeLists.txt @@ -2,3 +2,13 @@ add_simple_unittest(test_CommandLineParser) add_simple_unittest(test_Logging) add_simple_unittest(test_Thread) add_simple_unittest(test_StringUtils) +add_simple_unittest(test_CustomStackTrace) + +add_executable( + test_CustomStackTracePrint + test_CustomStackTracePrint.cpp +) +link_paddle_exe(test_CustomStackTracePrint) +add_test(NAME test_CustomStackTracePrint + COMMAND ${PROJ_ROOT}/paddle/utils/tests/test_CustomStackTracePrint.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/paddle/utils/tests/test_CustomStackTrace.cpp b/paddle/utils/tests/test_CustomStackTrace.cpp new file mode 100644 index 0000000000000..26ca4c678a650 --- /dev/null +++ b/paddle/utils/tests/test_CustomStackTrace.cpp @@ -0,0 +1,95 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/utils/CustomStackTrace.h" +#include "paddle/utils/CommandLineParser.h" +#include "paddle/utils/Util.h" +#include "paddle/utils/Locks.h" + +P_DEFINE_int32(test_thread_num, 10, "testing thread number"); + +void testNormalImpl(const std::function&, + size_t, size_t, + paddle::ThreadBarrier&, + paddle::ThreadBarrier&)>& callback) { + paddle::CustomStackTrace tracer; + paddle::ThreadBarrier doneBarrier(FLAGS_test_thread_num + 1); + paddle::ThreadBarrier startBarrier(FLAGS_test_thread_num + 1); + constexpr size_t countDown = 10; + constexpr size_t layerSize = 1000; + std::vector> threads; + threads.reserve(FLAGS_test_thread_num); + + for (int32_t i=0; i < FLAGS_test_thread_num; ++i) { + threads.emplace_back(new std::thread([&tracer, &countDown, &layerSize, + &startBarrier, &doneBarrier, + &callback]{ + callback(tracer, countDown, layerSize, startBarrier, doneBarrier); + })); + } + size_t cntDown = countDown; + while (cntDown-- > 0) { + startBarrier.wait(); + doneBarrier.wait(); + ASSERT_TRUE(tracer.empty()); + } + + for (auto& thread : threads) { + thread->join(); + } +} + + +TEST(CustomStackTrace, normalTrain) { + testNormalImpl([](paddle::CustomStackTrace& tracer, + size_t countDown, size_t layerSize, + paddle::ThreadBarrier& start, paddle::ThreadBarrier& finish){ + while (countDown-- > 0) { + start.wait(); + for (size_t i=0; i < layerSize; ++i) { + tracer.push("layer_" + std::to_string(i)); + } + tracer.pop(""); + for (size_t i=0; i < layerSize; ++i) { + tracer.pop("layer_" + std::to_string(layerSize - 1 - i)); + } + finish.wait(); + } + }); +} + +TEST(CustomStackTrace, normalTest) { + testNormalImpl([] (paddle::CustomStackTrace& tracer, + size_t countDown, size_t layerSize, + paddle::ThreadBarrier& start, paddle::ThreadBarrier& finish){ + while (countDown-- > 0) { + start.wait(); + for (size_t i=0; i < layerSize; ++i) { + tracer.push("layer_" + std::to_string(i)); + } + tracer.clear(); // in forward test, tracer will clear after forward. + finish.wait(); + } + }); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + paddle::initMain(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/paddle/utils/tests/test_CustomStackTracePrint.cpp b/paddle/utils/tests/test_CustomStackTracePrint.cpp new file mode 100644 index 0000000000000..c19c98614e6a7 --- /dev/null +++ b/paddle/utils/tests/test_CustomStackTracePrint.cpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/utils/Util.h" +#include "paddle/utils/CustomStackTrace.h" + +int main(int argc, char** argv) { + paddle::initMain(argc, argv); + + for (size_t i=0; i < 1000; ++i) { + paddle::gLayerStackTrace.push("layer_" + std::to_string(i)); + if (i == 998) { + throw "Unhandle exception"; + } + } + + return 0; +} diff --git a/paddle/utils/tests/test_CustomStackTracePrint.sh b/paddle/utils/tests/test_CustomStackTracePrint.sh new file mode 100755 index 0000000000000..b5543485f365a --- /dev/null +++ b/paddle/utils/tests/test_CustomStackTracePrint.sh @@ -0,0 +1,15 @@ +#!/bin/bash +echo "Test Custom Stack Trace print correct result when fail" +./test_CustomStackTracePrint >customStackTraceLog 2>&1 +if [ $? -eq 0 ]; then + exit 1 +else + set -e + TEXT="" + for ((i=0; i<=998; i++)) + do + TEXT="layer_$i, "$TEXT + done + TEXT="Forwarding "$TEXT + grep -q "$TEXT" customStackTraceLog +fi