PaddlePaddle · hedaoyuan · Nov 7, 2016 · Oct 31, 2016 · Nov 4, 2016 · hedaoyuan
diff --git a/demo/semantic_role_labeling/.gitignore b/demo/semantic_role_labeling/.gitignore
@@ -0,0 +1,10 @@
+*.pyc
+train.log
+data/feature
+data/conll05st-release/
+data/src.dict
+data/test.wsj.props
+data/test.wsj.seq_pair
+data/test.wsj.words
+data/tgt.dict
+output
diff --git a/paddle/gserver/dataproviders/DataProvider.cpp b/paddle/gserver/dataproviders/DataProvider.cpp
@@ -131,9 +131,10 @@ void DoubleBuffer::asyncLoadBatch() {
     taskReadySem_.wait();
     if (stopping_) break;
 
-    while (batchSize_ == 0) {
+    while (batchSize_ == 0 && !stopping_) {
       usleep(5);
     }
+    if (stopping_) break;
 
     do {
       DataBatch newBatch;

diff --git a/paddle/gserver/dataproviders/PyDataProvider2.cpp b/paddle/gserver/dataproviders/PyDataProvider2.cpp
@@ -201,7 +201,7 @@ class IPyDataProviderCache {
  * Here, we start a thread to read data. It is totally asynchronous for reading
  * data. And it support cache strategies.
  */
-class PyDataProvider2 : public DataProvider {
+class PyDataProvider2 : public DataProvider, private WaitMethodDone {
 public:
   /**
    * Ctor
@@ -433,26 +433,33 @@ class PyDataProvider2 : public DataProvider {
 
   inline void resetImpl(bool startNewThread) {
     DBG << "Reseting " << startNewThread;
+    exit_.store(true);
     if (loadThread_) {  // is loading.
-      exit_.store(true);
       loadThread_->join();
       loadThread_.reset();
     }
     {
       PyGuard g;
       callingContexts_.clear();
+      this->pullCV_.notify_one();
+    }
+    this->waitNotCalling();
+    {
+      PyGuard g;
       dataPool_.clear();
     }
     poolActualSize_ = 0;
-    exit_ = false;
+
     if (startNewThread && cache_->reset()) {
       DBG << "Start new thread.";
       loadThread_.reset(new std::thread([this] {
+        exit_ = false;
         loadThread();
       }));
       callingContextCreated_.wait();
     }
     DBG << "Reset done";
+    exit_ = false;
   }
 
 private:
@@ -529,6 +536,7 @@ class PyDataProvider2 : public DataProvider {
    * Loading a batch of data.
    */
   int64_t getNextBatchInternal(int64_t size_, DataBatch *batch) {
+    auto guard = this->guard();
     REGISTER_TIMER("PyDP2.getNextBatchInternal")
     CHECK_GE(size_, 0);
     size_t size = (size_t) size_;
@@ -554,6 +562,10 @@ class PyDataProvider2 : public DataProvider {
     } else {  // loading from cache.
       poolPtr = this->cache_->load();
     }
+    if (exit_) {
+      // PyDataProvider is destructing.
+      return 0;
+    }
     CHECK(poolPtr != nullptr);
 
     std::deque<PyObjectPtr>& pool = *poolPtr;

diff --git a/paddle/gserver/tests/test_PyDataProvider2.cpp b/paddle/gserver/tests/test_PyDataProvider2.cpp
@@ -353,6 +353,23 @@ TEST(PyDataProvider2, test_check) {
   }
 }
 
+TEST(PyDataProvider2, multiThread) {
+  paddle::DataConfig config;
+  config.set_type("py2");
+  config.set_files(FLAGS_train_list.c_str());
+  config.set_load_data_module("test_PyDataProvider2");
+  config.set_load_data_object("test_dense_no_seq");
+  config.set_async_load_data(true);
+
+  std::unique_ptr<paddle::DataProvider> provider(
+      paddle::DataProvider::create(config, false));
+  provider->reset();
+  paddle::DataBatch batch;
+  provider->getNextBatch(100, &batch);
+  provider->reset();
+  provider.reset();
+}
+
 int main(int argc, char** argv) {
   testing::InitGoogleTest(&argc, argv);
   paddle::initMain(argc, argv);

diff --git a/paddle/utils/Locks.h b/paddle/utils/Locks.h
@@ -240,4 +240,77 @@ class LockedCondition : public std::condition_variable {
   std::mutex mutex_;
 };
 
+
+/**
+ * @brief Wait Some Method Done.
+ *
+ * It provide a guard when invoke a method, and give the ability to wait calling
+ * some method is done in another thread. The example usage are:
+ *
+ * @code{.cpp}
+ * class A {
+ * private:
+ *   WaitMethodDone done_;
+ * public:
+ *   void foo() {
+ *     auto guard = done_.guard();
+ *     // your code.
+ *   }
+ *
+ *   void clear() {
+ *     done_.waitNotCalling();
+ *     // ensure the foo() is not calling here.
+ *     // do some job.
+ *   }
+ * }
+ * @endcode
+ */
+class WaitMethodDone {
+public:
+  DISABLE_COPY(WaitMethodDone);
+
+  class CallingGuard {
+  public:
+    CallingGuard(const CallingGuard& other) = delete;
+    CallingGuard(CallingGuard&& other) {
+     self_ = other.self_;
+     other.self_ = nullptr;
+    }
+
+    explicit CallingGuard(WaitMethodDone* self): self_(self) {
+      self_->cv_.notify_all([this] {
+        self_->isCalling_ = true;
+      });
+    }
+
+    ~CallingGuard() {
+      if (self_) {
+        self_->cv_.notify_all([this] {
+          self_->isCalling_ = false;
+        });
+      }
+    }
+
+  private:
+    WaitMethodDone* self_;
+  };
+
+  WaitMethodDone(): isCalling_(false) {}
+
+  CallingGuard guard() {
+    return CallingGuard(this);
+  }
+
+  void waitNotCalling() {
+    cv_.wait([this] {
+      return !isCalling_;
+    });
+  }
+
+private:
+  bool isCalling_;
+  LockedCondition cv_;
+  friend class CallingGuard;
+};
+
 }  // namespace paddle