Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement compression and splitting option in TRestProcessRunner #201

Merged
merged 34 commits into from
May 20, 2022
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6491041
update logic of expression identification.
nkx111 Apr 26, 2022
a31104f
StringToLong(): handles correctly scientific notation
nkx111 Apr 26, 2022
6603cda
update TRestStringHelper
Apr 27, 2022
73dc883
update TRestStringHelper
Apr 27, 2022
5e9c15c
fix line endings
Apr 27, 2022
a4efadd
using const string
Apr 27, 2022
ce7f6dd
Merge branch 'master' into nkx111-patch-3
jgalan May 5, 2022
8a4fc35
TRestThread no longer inherites from TRestMetadata; added compression…
nkx111 May 8, 2022
9323726
TrestAnalysisTree now supports operation under chain mode
nkx111 May 8, 2022
645a1b3
TRestProcessRunner updated to auto split files
nkx111 May 8, 2022
1bf1808
TRestRun supported to read splitted data files
nkx111 May 8, 2022
84fd1a0
restRoot supported to read splitted data files
nkx111 May 8, 2022
783e6df
TRestReflector::Converter adds both ROOT def type(Long64_t) and stand…
nkx111 May 8, 2022
89b3aef
fixing g4 selection pipeline
nkx111 May 8, 2022
0519166
ci: ctest job is after build, to avoid PCM warning
nkx111 May 8, 2022
7062418
TRestDataBase: updated IsZombie() logic
nkx111 May 8, 2022
6d394af
update selector pipeline
nkx111 May 8, 2022
76935d8
Merge branch 'master' into nkx111-patch-3
jgalan May 9, 2022
5c496d3
Merge branch 'nkx111-patch-3' of github.com:rest-for-physics/framewor…
jgalan May 9, 2022
0ef05d3
Pipeline EvSelection IDs from file
DavidDiezIb May 10, 2022
e8ec61d
Merge branch 'master' into nkx111-patch-3
nkx111 May 11, 2022
4240835
Merge remote-tracking branch 'origin/master' into nkx111-patch-3
nkx111 May 12, 2022
0c2dddb
Update source/framework/core/inc/TRestAnalysisTree.h
lobis May 12, 2022
772195f
Update source/bin/restRoot.cxx
lobis May 12, 2022
aec750c
Update .gitlab-ci.yml
nkx111 May 13, 2022
56ea183
ci.yml reverted to previous sequence. test-->build
nkx111 May 13, 2022
a5cedd1
replace NULL to nullptr; replace new TFile to TFile::Open
nkx111 May 13, 2022
2d88d50
TRestAnalysisTree: fix a bug
nkx111 May 18, 2022
6799207
TRestAnalysisTree event info wil be set correctly when reading from i…
nkx111 May 18, 2022
0d2df45
manual merge from master solving conflicts
nkx111 May 18, 2022
d877fb3
fix compilation error
nkx111 May 18, 2022
aba1def
fixed event tree reading from splitted file
nkx111 May 19, 2022
ddab667
Update TRestRun.cxx
nkx111 May 20, 2022
6795538
Update TRestRun.h
nkx111 May 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 21 additions & 20 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ stages:
# Basic checks to verify correctness of code
- pre-build

# Build and run tests
- test

# Project compilation validation
- build

# Build and run tests
- test

# REST libraries installed as submodules
- libraries

Expand Down Expand Up @@ -78,23 +78,6 @@ Validate Code:
- python3 pull-submodules.py --force --dontask --latest:${CI_COMMIT_BRANCH}
- python3 pipeline/validateProcesses.py source/libraries/

Build and Test:
stage: test
script:
- cd ${CI_PROJECT_DIR}
- python3 pull-submodules.py --force --dontask --latest:${CI_COMMIT_BRANCH}
- mkdir ${CI_PROJECT_DIR}/build && cd ${CI_PROJECT_DIR}/build
- cmake ${CI_PROJECT_DIR}
-DTEST=ON -DREST_GARFIELD=OFF -DREST_G4=ON -DRESTLIB_DETECTOR=ON -DRESTLIB_RAW=ON -DRESTLIB_TRACK=ON
- make -j2
- ctest --verbose -O ${CI_PROJECT_DIR}/build/Testing/summary.txt

artifacts:
name: "Testing"
paths:
- ${CI_PROJECT_DIR}/build/Testing
expire_in: 1 day

Build and Install:
stage: build
script:
Expand All @@ -112,6 +95,24 @@ Build and Install:
- ${CI_PROJECT_DIR}/install
expire_in: 1 day

Build and Test:
stage: test
script:
- source ${CI_PROJECT_DIR}/install/thisREST.sh
nkx111 marked this conversation as resolved.
Show resolved Hide resolved
- cd ${CI_PROJECT_DIR}
- python3 pull-submodules.py --force --dontask --latest:${CI_COMMIT_BRANCH}
- mkdir ${CI_PROJECT_DIR}/build && cd ${CI_PROJECT_DIR}/build
- cmake ${CI_PROJECT_DIR}
-DTEST=ON -DREST_GARFIELD=OFF -DREST_G4=ON -DRESTLIB_DETECTOR=ON -DRESTLIB_RAW=ON -DRESTLIB_TRACK=ON
- make -j2
- ctest --verbose -O ${CI_PROJECT_DIR}/build/Testing/summary.txt

artifacts:
name: "Testing"
paths:
- ${CI_PROJECT_DIR}/build/Testing
expire_in: 1 day

Load REST Libraries:
stage: install
script:
Expand Down
8 changes: 5 additions & 3 deletions pipeline/selection/Validate.C
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ Int_t Validate(TString fname) {
TFile* f = new TFile(fname);
TTree* tr = (TTree*)f->Get("AnalysisTree");

if (tr->GetEntries() != 3) {
tr->Scan("eventID:g4Ana_totalEdep:g4Ana_energyPrimary");

if (tr->GetEntries() != 1) {
cout << "Number of entries is not the same!" << endl;
cout << "Expected: 3. Obtained: " << tr->GetEntries() << endl;
cout << "Expected: 1. Obtained: " << tr->GetEntries() << endl;
return 1;
}

// Check IDs.
std::vector<Int_t> ids = {0, 3, 6};
jgalan marked this conversation as resolved.
Show resolved Hide resolved
std::vector<Int_t> ids = {1};
for (Int_t i = 0; i < tr->GetEntries(); i++) {
tr->GetEntry(i);
if (tr->GetLeaf("eventID")->GetValue(0) != ids[i]) {
Expand Down
2 changes: 1 addition & 1 deletion pipeline/selection/g4OnSelection.rml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

<addProcess type="TRestEventSelectionProcess" name="evSelection" fileWithIDs="Run00001_NLDBD_Test_g4Analysis.root" conditions="g4Ana_totalEdep<2400 AND g4Ana_energyPrimary>1000" value="ON" verboseLevel="info"/>

<addProcess type="TRestEventSelectionProcess" name="evSelection" fileWithIDs="IDs.txt" value="ON" verboseLevel="info"/>
<!--<addProcess type="TRestEventSelectionProcess" name="evSelection" fileWithIDs="IDs.txt" value="ON" verboseLevel="info"/>-->
jgalan marked this conversation as resolved.
Show resolved Hide resolved

</TRestProcessRunner>

Expand Down
30 changes: 30 additions & 0 deletions source/bin/restRoot.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,36 @@ int main(int argc, char* argv[]) {
runTmp->GetEntry(0);
}

// command line AnalysisTree object
nkx111 marked this conversation as resolved.
Show resolved Hide resolved
if (runTmp->GetAnalysisTree() != NULL) {
juanangp marked this conversation as resolved.
Show resolved Hide resolved
// if (runTmp->GetAnalysisTree()->GetChain() != NULL) {
// printf("Attaching ana_tree%i...\n", Nfile);
// string evcmd = Form("TChain* ana_tree%i = (TChain*)%s;", Nfile,
// ToString(runTmp->GetAnalysisTree()->GetChain()).c_str());
// if (debug) printf("%s\n", evcmd.c_str());
// gROOT->ProcessLine(evcmd.c_str());
//}
// else
//{
printf("Attaching ana_tree%i...\n", Nfile);
string evcmd = Form("TRestAnalysisTree* ana_tree%i = (TRestAnalysisTree*)%s;", Nfile,
ToString(runTmp->GetAnalysisTree()).c_str());
if (debug) printf("%s\n", evcmd.c_str());
gROOT->ProcessLine(evcmd.c_str());
// runTmp->GetEntry(0);
//}
}

// command line EventTree object
if (runTmp->GetEventTree() != NULL) {
lobis marked this conversation as resolved.
Show resolved Hide resolved
printf("Attaching ev_tree%i...\n", Nfile);
string evcmd =
Form("TTree* ev_tree%i = (TTree*)%s;", Nfile, ToString(runTmp->GetEventTree()).c_str());
if (debug) printf("%s\n", evcmd.c_str());
gROOT->ProcessLine(evcmd.c_str());
}


printf("\n%s\n", "Attaching metadata structures...");
Int_t Nmetadata = runTmp->GetNumberOfMetadataStructures();
map<string, int> metanames;
Expand Down
116 changes: 109 additions & 7 deletions source/framework/core/inc/TRestAnalysisTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#ifndef RestCore_TRestAnalysisTree
#define RestCore_TRestAnalysisTree

#include "TChain.h"
#include "TRestEvent.h"
#include "TRestReflector.h"
#include "TTree.h"
Expand All @@ -44,7 +45,7 @@ class TRestAnalysisTree : public TTree {
Bool_t fQuickSetObservableValue = true; //!
std::vector<RESTValue> fObservables; //!
std::map<std::string, int> fObservableIdMap; //!
TTree* fROOTTree; //!
TChain* fChain = NULL; //! in case multiple files for reading
lobis marked this conversation as resolved.
Show resolved Hide resolved

// for storage
Int_t fNObservables;
Expand Down Expand Up @@ -87,8 +88,6 @@ class TRestAnalysisTree : public TTree {
//!< There are branches in the tree with data. There are observables in the list.
//! Once filled, it is forbidden to add new observable to the tree.
Filled = 5,
//!< first status when constructed from ROOT tree
ROOTTree = 6
};

TRestAnalysisTree(TTree* tree);
Expand All @@ -101,10 +100,19 @@ class TRestAnalysisTree : public TTree {
int GetStatus() { return fStatus; }
Int_t GetObservableID(const std::string& obsName);
Bool_t ObservableExists(const std::string& obsName);
Int_t GetEventID() { return fEventID; }
Int_t GetSubEventID() { return fSubEventID; }
Double_t GetTimeStamp() { return fTimeStamp; }
TString GetSubEventTag() { return *fSubEventTag; }
// six basic event prameters
Int_t GetEventID() { return fChain ? ((TRestAnalysisTree*)fChain->GetTree())->GetEventID() : fEventID; }
Int_t GetSubEventID() {
return fChain ? ((TRestAnalysisTree*)fChain->GetTree())->GetEventID() : fSubEventID;
}
Double_t GetTimeStamp() {
return fChain ? ((TRestAnalysisTree*)fChain->GetTree())->GetEventID() : fTimeStamp;
}
TString GetSubEventTag() {
return fChain ? ((TRestAnalysisTree*)fChain->GetTree())->GetEventID() : *fSubEventTag;
}
// we suppose all the chained trees have same run and sub run id.
// so there is no need to call fChain->GetTree()
Int_t GetRunOrigin() { return fRunOrigin; }
Int_t GetSubRunOrigin() { return fSubRunOrigin; }
Int_t GetNumberOfObservables() { return fNObservables; }
Expand All @@ -128,6 +136,9 @@ class TRestAnalysisTree : public TTree {
std::cout << "Error! TRestAnalysisTree::GetObservableValue(): index outside limits!" << endl;
return T();
}
if (fChain != NULL) {
juanangp marked this conversation as resolved.
Show resolved Hide resolved
return ((TRestAnalysisTree*)fChain->GetTree())->GetObservableValue<T>(n);
}
return fObservables[n].GetValue<T>();
}
///////////////////////////////////////////////
Expand Down Expand Up @@ -156,6 +167,10 @@ class TRestAnalysisTree : public TTree {
std::cout << "Error! TRestAnalysisTree::SetObservableValue(): index outside limits!" << endl;
return;
}
if (fChain != NULL) {
nkx111 marked this conversation as resolved.
Show resolved Hide resolved
std::cout << "Error! cannot set observable! AnalysisTree is in chain state" << endl;
return;
}
fObservables[id].SetValue(value);
}
///////////////////////////////////////////////
Expand Down Expand Up @@ -266,6 +281,93 @@ class TRestAnalysisTree : public TTree {

Int_t WriteAsTTree(const char* name = 0, Int_t option = 0, Int_t bufsize = 0);

Bool_t AddChainFile(std::string file);

TTree* GetTree() const;

TChain* GetChain() { return fChain; }

Long64_t LoadTree(Long64_t entry);

Long64_t GetEntries() const;

Long64_t GetEntries(const char* sel);

void Browse(TBrowser* b) { fChain ? fChain->Browse(b) : TTree::Browse(b); }
Long64_t Draw(const char* varexp, const TCut& selection, Option_t* option = "",
Long64_t nentries = kMaxEntries, Long64_t firstentry = 0) {
return fChain ? fChain->Draw(varexp, selection, option, nentries, firstentry)
: TTree::Draw(varexp, selection, option, nentries, firstentry);
}
Long64_t Draw(const char* varexp, const char* selection, Option_t* option = "",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an override of TTree functions? What is the reason of these changes?

Copy link
Member Author

@nkx111 nkx111 May 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. It is used to read splitted data files. In case multiple files are created to store analysis tree data, we make TRestAnalysisTree running internally with TChain.

Long64_t nentries = kMaxEntries, Long64_t firstentry = 0) {
return fChain ? fChain->Draw(varexp, selection, option, nentries, firstentry)
: TTree::Draw(varexp, selection, option, nentries, firstentry);
}
void Draw(Option_t* opt) { fChain ? fChain->Draw(opt) : TTree::Draw(opt); }
TBranch* FindBranch(const char* name) {
return fChain ? fChain->FindBranch(name) : TTree::FindBranch(name);
}
TLeaf* FindLeaf(const char* name) { return fChain ? fChain->FindLeaf(name) : TTree::FindLeaf(name); }
TBranch* GetBranch(const char* name) { return fChain ? fChain->GetBranch(name) : TTree::GetBranch(name); }
Bool_t GetBranchStatus(const char* branchname) const {
return fChain ? fChain->GetBranchStatus(branchname) : TTree::GetBranchStatus(branchname);
}
Long64_t GetCacheSize() const { return fChain ? fChain->GetCacheSize() : TTree::GetCacheSize(); }
Long64_t GetChainEntryNumber(Long64_t entry) const {
return fChain ? fChain->GetChainEntryNumber(entry) : TTree::GetChainEntryNumber(entry);
}
Long64_t GetEntryNumber(Long64_t entry) const {
return fChain ? fChain->GetEntryNumber(entry) : TTree::GetEntryNumber(entry);
}
Long64_t GetReadEntry() const { return fChain ? fChain->GetReadEntry() : TTree::GetReadEntry(); }

TLeaf* GetLeaf(const char* branchname, const char* leafname) {
return fChain ? fChain->GetLeaf(branchname, leafname) : TTree::GetLeaf(branchname, leafname);
}
TLeaf* GetLeaf(const char* name) { return fChain ? fChain->GetLeaf(name) : TTree::GetLeaf(name); }
TObjArray* GetListOfBranches() {
return fChain ? fChain->GetListOfBranches() : TTree::GetListOfBranches();
}
TObjArray* GetListOfLeaves() { return fChain ? fChain->GetListOfLeaves() : TTree::GetListOfLeaves(); }
Long64_t Process(const char* filename, Option_t* option = "", Long64_t nentries = kMaxEntries,
Long64_t firstentry = 0) {
return fChain ? fChain->Process(filename, option, nentries, firstentry)
: TTree::Process(filename, option, nentries, firstentry);
}
Long64_t Process(TSelector* selector, Option_t* option = "", Long64_t nentries = kMaxEntries,
Long64_t firstentry = 0) {
return fChain ? fChain->Process(selector, option, nentries, firstentry)
: TTree::Process(selector, option, nentries, firstentry);
}
Long64_t Scan(const char* varexp = "", const char* selection = "", Option_t* option = "",
Long64_t nentries = kMaxEntries, Long64_t firstentry = 0) {
return fChain ? fChain->Scan(varexp, selection, option, nentries, firstentry)
: TTree::Scan(varexp, selection, option, nentries, firstentry);
}
Int_t SetBranchAddress(const char* bname, void* add, TBranch** ptr = 0) {
return fChain ? fChain->SetBranchAddress(bname, add, ptr) : TTree::SetBranchAddress(bname, add, ptr);
}
Int_t SetBranchAddress(const char* bname, void* add, TBranch** ptr, TClass* realClass, EDataType datatype,
Bool_t isptr) {
return fChain ? fChain->SetBranchAddress(bname, add, ptr, realClass, datatype, isptr)
: TTree::SetBranchAddress(bname, add, ptr, realClass, datatype, isptr);
}
Int_t SetBranchAddress(const char* bname, void* add, TClass* realClass, EDataType datatype,
Bool_t isptr) {
return fChain ? fChain->SetBranchAddress(bname, add, realClass, datatype, isptr)
: TTree::SetBranchAddress(bname, add, realClass, datatype, isptr);
}
void SetBranchStatus(const char* bname, Bool_t status = 1, UInt_t* found = 0) {
fChain ? fChain->SetBranchStatus(bname, status, found) : TTree::SetBranchStatus(bname, status, found);
}
void SetDirectory(TDirectory* dir) { fChain ? fChain->SetDirectory(dir) : TTree::SetDirectory(dir); }

void ResetBranchAddress(TBranch* br) {
fChain ? fChain->ResetBranchAddress(br) : TTree::ResetBranchAddress(br);
}
void ResetBranchAddresses() { fChain ? fChain->ResetBranchAddresses() : TTree::ResetBranchAddresses(); }

// Construtor
TRestAnalysisTree();
TRestAnalysisTree(TString name, TString title);
Expand Down
2 changes: 2 additions & 0 deletions source/framework/core/inc/TRestEventProcess.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ class TRestEventProcess : public TRestMetadata {
void SetFriendProcess(TRestEventProcess* p);
/// Add parallel process to this process
void SetParallelProcess(TRestEventProcess* p);
/// In case the analysis tree is reset(switched to new file), some process needs to have action
virtual void NotifyAnalysisTreeReset() {}

// getters
/// Get pointer to input event. Must be implemented in the derived class
Expand Down
26 changes: 19 additions & 7 deletions source/framework/core/inc/TRestProcessRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,14 @@ class TRestProcessRunner : public TRestMetadata {

// self variables for processing
std::vector<TRestThread*> fThreads; //!
TFile* fTempOutputDataFile; //!
TTree* fEventTree; //!
TRestAnalysisTree* fAnalysisTree; //!
ProcStatus fProcStatus; //!
Int_t fNBranches; //!
TFile* fOutputDataFile; //! the TFile pointer being used
TString fOutputDataFileName; //! indicates the name of the first file created as output data file. The actual
//! output file maybe changed if tree is too large
TTree* fEventTree; //!
TRestAnalysisTree* fAnalysisTree; //!
ProcStatus fProcStatus; //!
Int_t fNBranches; //!
Int_t fNFilesSplit; //! Number of files being split.

// metadata
Bool_t fUseTestRun;
Expand All @@ -56,8 +59,14 @@ class TRestProcessRunner : public TRestMetadata {
Int_t fFirstEntry;
Int_t fEventsToProcess;
Int_t fProcessedEvents;

Long64_t fFileSplitSize; // in bytes
Int_t fFileCompression; // 1~9
std::map<std::string, std::string> fProcessInfo;

// bool fOutputItem[4] = {
// false}; // the on/off status for item: inputAnalysis, inputEvent, outputEvent, outputAnalysis

public:
/// REST run class
void Initialize();
Expand Down Expand Up @@ -89,6 +98,8 @@ class TRestProcessRunner : public TRestMetadata {
Int_t GetNextevtFunc(TRestEvent* targetevt, TRestAnalysisTree* targettree);
void FillThreadEventFunc(TRestThread* t);
void ConfigOutputFile();
void MergeOutputFile();
void WriteMetadata();

// tools
void ResetRunTimes();
Expand All @@ -100,7 +111,7 @@ class TRestProcessRunner : public TRestMetadata {
TRestEvent* GetInputEvent();
TRestAnalysisTree* GetInputAnalysisTree();
TRestAnalysisTree* GetOutputAnalysisTree() { return fAnalysisTree; }
TFile* GetTempOutputDataFile() { return fTempOutputDataFile; }
TFile* GetOutputDataFile() { return fOutputDataFile; }
std::string GetProcInfo(std::string infoname) {
return fProcessInfo[infoname] == "" ? infoname : fProcessInfo[infoname];
}
Expand All @@ -110,12 +121,13 @@ class TRestProcessRunner : public TRestMetadata {
double GetReadingSpeed();
bool UseTestRun() const { return fUseTestRun; }
ProcStatus GetStatus() { return fProcStatus; }
Long64_t GetFileSplitSize() { return fFileSplitSize; }

// Constructor & Destructor
TRestProcessRunner();
~TRestProcessRunner();

ClassDef(TRestProcessRunner, 6);
ClassDef(TRestProcessRunner, 7);
};

#endif
Loading