Skip to content

Commit

Permalink
Merge pull request #427 from rest-for-physics/jgalan_minor_fix
Browse files Browse the repository at this point in the history
Fix on TRestMetadata and TRestDataSet add-ons
  • Loading branch information
jgalan authored May 26, 2023
2 parents a9b66be + 397b679 commit c33746a
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 9 deletions.
29 changes: 29 additions & 0 deletions macros/REST_GenerateDataSets.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include "TRestDataSet.h"
#include "TRestTask.h"

#ifndef RestTask_GenerateDataSets
#define RestTask_GenerateDataSets

//*******************************************************************************************************
//*** Description: This macro will launch the generation of datasets defined
//*** inside a particular RML file `datasets.rml` that contains the dataset
//*** definitions. The second argument will allow to specify the datasets
//*** to be generated from the existing ones inside `dataset.rml`.
//***
//*** --------------
//*** Usage: restManager GenerateDataSets datasets.rml set1,set2,set3
//***
//*******************************************************************************************************

Int_t REST_GenerateDataSets(const std::string& inputRML, const std::string& datasets) {
std::vector<std::string> sets = REST_StringHelper::Split(datasets, ",");

for (const auto& set : sets) {
std::cout << "Set : " << set << std::endl;
TRestDataSet d(inputRML.c_str(), set.c_str());
d.GenerateDataSet();
d.Export("Dataset_" + set + ".root");
}
return 0;
}
#endif
26 changes: 20 additions & 6 deletions source/framework/core/inc/TRestDataSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class TRestDataSet : public TRestMetadata {
/// A list of metadata members where filters will be applied
std::vector<std::string> fFilterMetadata; //<

/// If not empty it will check if the metadata member contains the value
/// If not empty it will check if the metadata member contains the string
std::vector<std::string> fFilterContains; //<

/// If the corresponding element is not empty it will check if the metadata member is greater
Expand All @@ -74,6 +74,9 @@ class TRestDataSet : public TRestMetadata {
/// If the corresponding element is not empty it will check if the metadata member is lower
std::vector<Double_t> fFilterLowerThan; //<

/// If the corresponding element is not empty it will check if the metadata member is equal
std::vector<Double_t> fFilterEqualsTo; //<

/// The properties of a relevant quantity that we want to store together with the dataset
std::map<std::string, RelevantQuantity> fQuantity; //<

Expand All @@ -87,16 +90,22 @@ class TRestDataSet : public TRestMetadata {
std::vector<std::string> fFileSelection; //<

/// TimeStamp for the start time of the first file
Double_t fStartTime = REST_StringHelper::StringToTimeStamp(fFilterEndTime);
Double_t fStartTime = REST_StringHelper::StringToTimeStamp(fFilterEndTime); //<

/// TimeStamp for the end time of the last file
Double_t fEndTime = REST_StringHelper::StringToTimeStamp(fFilterStartTime);
Double_t fEndTime = REST_StringHelper::StringToTimeStamp(fFilterStartTime); //<

/// It keeps track if the generated dataset is a pure dataset or a merged one
Bool_t fMergedDataset = false; //<

/// The list of dataset files imported
std::vector<std::string> fImportedFiles; //<

/// The resulting RDF::RNode object after initialization
ROOT::RDF::RNode fDataSet = ROOT::RDataFrame(0); //!

/// A pointer to the generated tree
TTree* fTree = nullptr; //!
TChain* fTree = nullptr; //!

void InitFromConfigFile() override;

Expand All @@ -116,7 +125,7 @@ class TRestDataSet : public TRestMetadata {
TTree* GetTree() const {
if (fTree == nullptr) {
RESTError << "Tree has not been yet initialized" << RESTendl;
RESTError << "You should invoke TRestDataSet::Initialize() before trying to access the tree"
RESTError << "You should invoke TRestDataSet::GenerateDataSet() before trying to access the tree"
<< RESTendl;
}
return fTree;
Expand Down Expand Up @@ -145,17 +154,22 @@ class TRestDataSet : public TRestMetadata {
inline auto GetFilterContains() const { return fFilterContains; }
inline auto GetFilterGreaterThan() const { return fFilterGreaterThan; }
inline auto GetFilterLowerThan() const { return fFilterLowerThan; }
inline auto GetFilterEqualsTo() const { return fFilterEqualsTo; }
inline auto GetQuantity() const { return fQuantity; }
inline auto GetCut() const { return fCut; }
inline auto IsMergedDataSet() const { return fMergedDataset; }

inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; }

TRestDataSet& operator=(TRestDataSet& dS);
void Import(const std::string& fileName);
void Import(std::vector<std::string> fileNames);
void Export(const std::string& filename);

ROOT::RDF::RNode MakeCut(const TRestCut* cut);

ROOT::RDF::RNode Define(const std::string& columnName, const std::string& formula);

void PrintMetadata() override;
void Initialize() override;

Expand All @@ -165,6 +179,6 @@ class TRestDataSet : public TRestMetadata {
TRestDataSet(const char* cfgFileName, const std::string& name = "");
~TRestDataSet();

ClassDefOverride(TRestDataSet, 2);
ClassDefOverride(TRestDataSet, 3);
};
#endif
117 changes: 114 additions & 3 deletions source/framework/core/src/TRestDataSet.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,18 @@
/// - **last**: It will simply register the value of the metadata member
/// from the last file in the list of selected files.
///
/// ### Adding a new column based on relevant quantities
///
/// Using the method TRestDataSet::Define method we can implement a
/// formula based on column names and relevant quantities. Then, the
/// relevant quantities will be sustituted by their dataset value.
///
/// \code
/// dataset.GetColumnNames()
/// dataset.Define("newColumnName", "QuantityName * column1" )
/// dataset.GetColumnNames()
/// dataset.GetDataFrame().Display({"column1", "newColumnName"})->Print();
/// \endcode
///
///----------------------------------------------------------------------
///
Expand Down Expand Up @@ -273,7 +285,8 @@ void TRestDataSet::Initialize() { SetSectionName(this->ClassName()); }
///
void TRestDataSet::GenerateDataSet() {
if (fTree != nullptr) {
RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::Initialize ... " << RESTendl;
RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::GenerateDataSet ... "
<< RESTendl;
return;
}

Expand Down Expand Up @@ -324,7 +337,7 @@ void TRestDataSet::GenerateDataSet() {
fDataSet = ROOT::RDataFrame("AnalysisTree", fOutName);

TFile* f = TFile::Open(fOutName.c_str());
fTree = (TTree*)f->Get("AnalysisTree");
fTree = (TChain*)f->Get("AnalysisTree");

RESTInfo << " - Dataset initialized!" << RESTendl;
}
Expand Down Expand Up @@ -378,6 +391,9 @@ std::vector<std::string> TRestDataSet::FileSelection() {
if (fFilterLowerThan[n] != -1)
if (StringToDouble(mdValue) >= fFilterLowerThan[n]) accept = false;

if (fFilterEqualsTo[n] != -1)
if (StringToDouble(mdValue) != fFilterEqualsTo[n]) accept = false;

n++;
}

Expand Down Expand Up @@ -466,6 +482,29 @@ ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) {
return df;
}

///////////////////////////////////////////////
/// \brief This function will add a new column to the RDataFrame using
/// the same scheme as the usual RDF::Define method, but it will on top of
/// that evaluate the values of any relevant quantities used.
///
/// For example, the following code line would create a new column named
/// `test` replacing the relevant quantity `Nsim` and the previously
/// existing column `probability`.
/// \code
/// d.Define("test", "Nsim * probability");
/// \endcode
///
ROOT::RDF::RNode TRestDataSet::Define(const std::string& columnName, const std::string& formula) {
std::string evalFormula = formula;
for (auto const& [name, properties] : fQuantity)
evalFormula =
REST_StringHelper::Replace(evalFormula, name, DoubleToString(properties.value, "%12.10e"));

fDataSet = fDataSet.Define(columnName, evalFormula);

return fDataSet;
}

/////////////////////////////////////////////
/// \brief Prints on screen the information about the metadata members of TRestDataSet
///
Expand Down Expand Up @@ -511,6 +550,7 @@ void TRestDataSet::PrintMetadata() {
if (!fFilterContains[n].empty()) RESTMetadata << " Contains: " << fFilterContains[n];
if (fFilterGreaterThan[n] != -1) RESTMetadata << " Greater than: " << fFilterGreaterThan[n];
if (fFilterLowerThan[n] != -1) RESTMetadata << " Lower than: " << fFilterLowerThan[n];
if (fFilterEqualsTo[n] != -1) RESTMetadata << " Equals to: " << fFilterEqualsTo[n];

RESTMetadata << RESTendl;
n++;
Expand All @@ -535,6 +575,19 @@ void TRestDataSet::PrintMetadata() {
}
}

if (fMergedDataset) {
RESTMetadata << " " << RESTendl;
RESTMetadata << "This is a combined dataset." << RESTendl;
RESTMetadata << " -------------------- " << RESTendl;
RESTMetadata << " - Relevant quantities have been removed!" << RESTendl;
RESTMetadata << " - Dataset metadata properties correspond to the first file in the list."
<< RESTendl;
RESTMetadata << " " << RESTendl;
RESTMetadata << "List of imported files: " << RESTendl;
RESTMetadata << " -------------------- " << RESTendl;
for (const auto& fn : fImportedFiles) RESTMetadata << " - " << fn << RESTendl;
}

RESTMetadata << "----" << RESTendl;
}

Expand All @@ -559,10 +612,12 @@ void TRestDataSet::InitFromConfigFile() {
if (contains == "Not defined") contains = "";
Double_t greaterThan = StringToDouble(GetFieldValue("greaterThan", filterDefinition));
Double_t lowerThan = StringToDouble(GetFieldValue("lowerThan", filterDefinition));
Double_t equalsTo = StringToDouble(GetFieldValue("equalsTo", filterDefinition));

fFilterContains.push_back(contains);
fFilterGreaterThan.push_back(greaterThan);
fFilterLowerThan.push_back(lowerThan);
fFilterEqualsTo.push_back(equalsTo);

filterDefinition = GetNextElement(filterDefinition);
}
Expand Down Expand Up @@ -688,6 +743,7 @@ void TRestDataSet::Export(const std::string& filename) {
if (!fFilterContains[n].empty()) fprintf(f, " Contains: %s.", fFilterContains[n].c_str());
if (fFilterGreaterThan[n] != -1) fprintf(f, " Greater than: %6.3lf.", fFilterGreaterThan[n]);
if (fFilterLowerThan[n] != -1) fprintf(f, " Lower than: %6.3lf.", fFilterLowerThan[n]);
if (fFilterEqualsTo[n] != -1) fprintf(f, " Equals to: %6.3lf.", fFilterLowerThan[n]);
fprintf(f, "\n");
n++;
}
Expand Down Expand Up @@ -763,6 +819,7 @@ TRestDataSet& TRestDataSet::operator=(TRestDataSet& dS) {
fFilterContains = dS.GetFilterContains();
fFilterGreaterThan = dS.GetFilterGreaterThan();
fFilterLowerThan = dS.GetFilterLowerThan();
fFilterEqualsTo = dS.GetFilterEqualsTo();
fQuantity = dS.GetQuantity();
fTotalDuration = dS.GetTotalTimeInSeconds();
fCut = dS.GetCut();
Expand Down Expand Up @@ -803,5 +860,59 @@ void TRestDataSet::Import(const std::string& fileName) {
RESTInfo << "Opening " << fileName << RESTendl;
fDataSet = ROOT::RDataFrame("AnalysisTree", fileName);

fTree = (TTree*)file->Get("AnalysisTree");
fTree = (TChain*)file->Get("AnalysisTree");
}

///////////////////////////////////////////////
/// \brief This function initializes the chained tree and the RDataFrame using
/// as input several root files that should contain TRestDataSet metadata
/// information. The values of the first dataset will be considered to be stored
/// in this new instance.
///
/// The metadata member `fMergedDataset` will be set to true to understand this
/// dataset is the combination of several datasets, and not a pure original one.
///
void TRestDataSet::Import(std::vector<std::string> fileNames) {
for (const auto& fN : fileNames)
if (TRestTools::GetFileNameExtension(fN) != "root") {
RESTError << "Datasets can only be imported from root files" << RESTendl;
return;
}

if (fileNames.size() == 0) return;

TFile* file = TFile::Open(fileNames[0].c_str(), "READ");
if (file != nullptr) {
TIter nextkey(file->GetListOfKeys());
TKey* key;
while ((key = (TKey*)nextkey())) {
std::string kName = key->GetClassName();
if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
TRestDataSet* dS = file->Get<TRestDataSet>(key->GetName());
if (GetVerboseLevel() >= TRestStringOutput::REST_Verbose_Level::REST_Info)
dS->PrintMetadata();
*this = *dS;
}
}
} else {
RESTError << "Cannot open " << fileNames[0] << RESTendl;
exit(1);
}

RESTInfo << "Opening list of files. First file: " << fileNames[0] << RESTendl;
fDataSet = ROOT::RDataFrame("AnalysisTree", fileNames);

if (fTree != nullptr) {
delete fTree;
fTree = nullptr;
}
fTree = new TChain("AnalysisTree");

for (const auto& fN : fileNames) fTree->Add((TString)fN);

fMergedDataset = true;
fImportedFiles = fileNames;

fQuantity.clear();
}
2 changes: 2 additions & 0 deletions source/framework/core/src/TRestMetadata.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2327,6 +2327,8 @@ std::vector<string> TRestMetadata::GetDataMemberValues(string memberName, Int_t

result = Replace(result, "{", "");
result = Replace(result, "}", "");
result = Replace(result, "(", "");
result = Replace(result, ")", "");

std::vector<std::string> results = REST_StringHelper::Split(result, ",");

Expand Down

0 comments on commit c33746a

Please sign in to comment.