Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#21 from Superjom/feature/add_storage
Browse files Browse the repository at this point in the history
feature/add storage
  • Loading branch information
jacquesqiao authored Nov 17, 2017
2 parents 4c949e6 + e5c1ef4 commit efcd358
Show file tree
Hide file tree
Showing 4 changed files with 216 additions and 0 deletions.
19 changes: 19 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
cmake_minimum_required(VERSION 3.8)
project(VisualDL)

set(CMAKE_CXX_STANDARD 11)

include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(thirdparty/local/include)
link_directories(thirdparty/local/lib)
add_subdirectory(thirdparty/pybind11-2.2.1)

set(SOURCE_FILES
visualdl/backend/storage/storage.cc visualdl/backend/storage/storage.h
visualdl/backend/storage/storage.pb.h
visualdl/backend/storage/storage.pb.cc
)

add_library(storage visualdl/backend/storage/storage.cc
visualdl/backend/storage/storage.pb.cc)
add_executable(VisualDL ${SOURCE_FILES})
45 changes: 45 additions & 0 deletions visualdl/backend/storage/storage.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include <fstream>
#include <glog/logging.h>

#include "visualdl/backend/storage/storage.h"

namespace visualdl {

storage::Tablet *Storage::Add(const std::string &tag) {
return &proto_.mutable_tablets()->at(tag);
}

const storage::Tablet *Storage::Find(const std::string &tag) const {
auto it = proto_.tablets().find(tag);
if (it != proto_.tablets().end()) {
return &it->second;
}
return nullptr;
}

void Storage::Save(const std::string &path) const {
std::ofstream file(path, file.binary | file.out);
CHECK(file.is_open()) << "can't open path " << path;
auto str = Serialize();
file.write(str.c_str(), str.size());
}

void Storage::Load(const std::string &path) {
std::ifstream file(path, file.binary);
CHECK(file.is_open()) << "can't open path " << path;
size_t size = file.tellg();
std::string buffer(size, ' ');
file.seekg(0);
file.read(&buffer[0], size);
DeSerialize(buffer);
}

std::string Storage::Serialize() const {
return proto_.SerializeAsString();
}

void Storage::DeSerialize(const std::string &data) {
proto_.ParseFromString(data);
}

} // namespace visualdl
65 changes: 65 additions & 0 deletions visualdl/backend/storage/storage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#ifndef VISUALDL_STORAGE_H
#define VISUALDL_STORAGE_H

#include <string>
#include <time.h>

#include "visualdl/backend/storage/storage.pb.h"

namespace visualdl {

class Storage final {
public:
/*
* There should be only one Storage instance in memory.
*/
Storage &Global() {
static Storage *instance = new Storage();
return *instance;
}

/*
* Add a new tablet named `tag`, the newly added instance will be returned.
*/
storage::Tablet *Add(const std::string &tag);

/*
* Search the tablet named `tag`, if not exist, return nullptr.
*/
const storage::Tablet *Find(const std::string &tag) const;

/*
* Serialize this object to string and save it to a file.
*/
void Save(const std::string &path) const;

/*
* Load the Protobuf message from a file.
*/
void Load(const std::string &path);

protected:
/*
* Serialize the Storage instance to string.
*/
std::string Serialize() const;

/*
* De-serialize from a string and update this Storage instance.
*/
void DeSerialize(const std::string &data);

Storage() {
// set time stamp
time_t time0;
time(&time0);
proto_.set_timestamp(time0);
}

private:
storage::Storage proto_;
};

} // namespace visualdl

#endif //VISUALDL_STORAGE_H
87 changes: 87 additions & 0 deletions visualdl/backend/storage/storage.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
syntax = "proto3";
package storage;

enum DataType {
// single entry
kInt32 = 0;
kInt64 = 1;
kFloat = 2;
kDouble = 3;
kString = 4;
kBool = 5;
// entrys
kInt64s = 6;
kFloats = 7;
kDoubles = 8;
kStrings = 9;
kInt32s = 10;
kBools = 11;
}

// A data array, which type is `type`.
message Entry {
// if all the entries in a record share the same data type, ignore this field
// and store type to `dtype` in `Record`.
DataType dtype = 1;
// single element
int32 i32 = 2;
int64 i64 = 3;
string s = 4;
float f = 5;
double d = 6;
bool b = 7;
// array
repeated int64 i64s = 8;
repeated float fs = 9;
repeated double ds = 10;
repeated int32 i32s = 11;
repeated bool bs = 12;
}

message Record {
Entry data = 1;
int64 timestamp = 2;
// store the count of writing operations to the tablet.
int64 id = 3;
DataType dtype = 4;
// shape or some other meta infomation for this record, if all the records
// share the same meta, just store one copy of meta in `Storage`, or create
// a unique copy for each `Record`.
Entry meta = 5;
}

/*
A Tablet stores the records of a component which type is `component` and indidates as `tag`.
The records will be saved in a file which name contains `tag`. During the running period,
`num_records` will be accumulated, and `num_samples` indicates the size of sample set the
reservoir sampling algorithm will collect.
*/
message Tablet {
// the kinds of the components that supported
enum Type {
kScalar = 0;
kHistogram = 1;
kGraph = 2;
}
// type of the component, different component should have different storage format.
Type component = 1;
// records the total count of records, each Write operation should increate this value.
int64 num_records = 2;
// indicate the number of instances to sample, this should be a constant value.
int32 num_samples = 3;
repeated Record records = 4;
// store a meta infomation if all the records share.
Entry meta = 5;
// the unique identification for this `Tablet`.
string tag = 6;
}

/*
The Storage stores all the records.
*/
message Storage {
// tags to Tablet, should be thread safe if fix the keys after initialization.
map<string, Tablet> tablets = 1;
string dir = 2;
int64 timestamp = 3;
}

0 comments on commit efcd358

Please sign in to comment.