Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: get trainer from model_name in weight #1744

Merged
merged 1 commit into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion data/model_weight/acpi_AbsPowerModel.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"platform": {"All_Weights": {"Bias_Weight": 220.9079278650894, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 29.028228361462897}}}}}
{"model_name": "SGDRegressorTrainer_0", "platform": {"All_Weights": {"Bias_Weight": 220.9079278650894, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 29.028228361462897}}}}}
2 changes: 1 addition & 1 deletion data/model_weight/acpi_DynPowerModel.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"platform": {"All_Weights": {"Bias_Weight": 49.56491877218095, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 28.501356366108837}}}}}
{"model_name": "SGDRegressorTrainer_0", "platform": {"All_Weights": {"Bias_Weight": 49.56491877218095, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 28.501356366108837}}}}}
2 changes: 1 addition & 1 deletion data/model_weight/intel_rapl_AbsPowerModel.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"package": {"All_Weights": {"Bias_Weight": 69.91739430907396, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 22.16772409328642}}}}, "core": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "uncore": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "dram": {"All_Weights": {"Bias_Weight": 47.142633336743344, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 3.57348245077466}}}}}
{"model_name": "SGDRegressorTrainer_0", "package": {"All_Weights": {"Bias_Weight": 69.91739430907396, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 22.16772409328642}}}}, "core": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "uncore": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "dram": {"All_Weights": {"Bias_Weight": 47.142633336743344, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 3.57348245077466}}}}}
2 changes: 1 addition & 1 deletion data/model_weight/intel_rapl_DynPowerModel.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"package": {"All_Weights": {"Bias_Weight": 38.856412561925055, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 22.258830113477515}}}}, "core": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "uncore": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "dram": {"All_Weights": {"Bias_Weight": 9.080889901856153, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 3.0358946796490924}}}}}
{"model_name": "SGDRegressorTrainer_0", "package": {"All_Weights": {"Bias_Weight": 38.856412561925055, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 22.258830113477515}}}}, "core": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "uncore": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "dram": {"All_Weights": {"Bias_Weight": 9.080889901856153, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 3.0358946796490924}}}}}
3 changes: 2 additions & 1 deletion pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"strconv"
"strings"

"github.com/sustainable-computing-io/kepler/pkg/model/types"
"golang.org/x/sys/unix"
"k8s.io/klog/v2"
)
Expand Down Expand Up @@ -129,7 +130,7 @@ var (
FixedTrainerNameKey = "TRAINER"
FixedNodeTypeKey = "NODE_TYPE"
ModelFiltersKey = "FILTERS"
DefaultTrainerName = "SGDRegressorTrainer"
DefaultTrainerName = types.LinearRegressionTrainer
////////////////////////////////////

// KubeConfig is used to start k8s client with the pod running outside the cluster
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/estimator/local/regressor/exponential_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (

var (
exponentialCurveFits = []float64{1, 1, 1}
dummyExponentialWeightHandler = genHandlerFunc(exponentialCurveFits)
dummyExponentialWeightHandler = genHandlerFunc(exponentialCurveFits, types.ExponentialTrainer)
)

var _ = Describe("Test Exponential Predictor Unit", func() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/estimator/local/regressor/logarithm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (

var (
logarithmicCurveFits = []float64{1, 1, 1}
dummyLogarithmicWeightHandler = genHandlerFunc(logarithmicCurveFits)
dummyLogarithmicWeightHandler = genHandlerFunc(logarithmicCurveFits, types.LogarithmicTrainer)
)

var _ = Describe("Test Logarithmic Predictor Unit", func() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/estimator/local/regressor/logistic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (

var (
logisticCurveFits = []float64{1, 1, 1, 1}
dummyLogisticWeightHandler = genHandlerFunc(logisticCurveFits)
dummyLogisticWeightHandler = genHandlerFunc(logisticCurveFits, types.LogisticTrainer)
)

var _ = Describe("Test Logistic Predictor Unit", func() {
Expand Down
24 changes: 24 additions & 0 deletions pkg/model/estimator/local/regressor/model_weights.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ package regressor
import (
"errors"
"fmt"
"strings"

"github.com/sustainable-computing-io/kepler/pkg/model/types"

"github.com/sustainable-computing-io/kepler/pkg/config"
)
Expand Down Expand Up @@ -113,3 +116,24 @@ func (w ComponentModelWeights) String() string {
}
return fmt.Sprintf("%s (package: %v (core: %v, uncore: %v), dram: %v)", w.ModelName, w.Package, w.Core, w.Uncore, w.DRAM)
}

func (w ComponentModelWeights) Trainer() string {
if w.ModelName == "" {
return ""
}
modelNameSplits := strings.Split(w.ModelName, "_")
splitTrainer := strings.Join(modelNameSplits[0:len(modelNameSplits)-1], "_")
if isSupportedTrainer(splitTrainer) {
return splitTrainer
}
return ""
}
Comment on lines +120 to +130
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMHO, models should have the trainer-name metadata embedded in the json itself so that this isn't needed.

Copy link
Collaborator Author

@sunya-ch sunya-ch Aug 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

model_name has more complete information since it also embed the node_type and linked to model db.
I think we can add trainer_name additionally if needed (need update on model-server side first).


func isSupportedTrainer(trainer string) bool {
for _, supportedTrainer := range types.WeightSupportedTrainers {
if trainer == supportedTrainer {
return true
}
}
return false
}
21 changes: 16 additions & 5 deletions pkg/model/estimator/local/regressor/regressor.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ func (r *Regressor) getWeightFromServer() (*ComponentModelWeights, error) {
return nil, fmt.Errorf("model unmarshal error: %v (%s)", err, string(body))
}
if weightResponse.ModelName != "" {
klog.V(3).Infof("Using weights trained by %s", weightResponse.ModelName)
r.TrainerName = weightResponse.Trainer()
klog.V(3).Infof("Using weights from model %s trained by %s for %s", weightResponse.ModelName, r.TrainerName, r.EnergySource)
}
r.updateCoreRatio(weightResponse.ModelMachineSpec)
return &weightResponse, nil
Expand All @@ -195,6 +196,7 @@ func (r *Regressor) getWeightFromServer() (*ComponentModelWeights, error) {
// loadWeightFromURLorLocal get weight from either local or URL
// if string start with '/', we take it as local file
func (r *Regressor) loadWeightFromURLorLocal() (*ComponentModelWeights, error) {
var modelName string // to be set by ModelWeightsURL
var body []byte
var err error

Expand All @@ -204,12 +206,21 @@ func (r *Regressor) loadWeightFromURLorLocal() (*ComponentModelWeights, error) {
if err != nil {
return nil, err
}
} else {
modelName = utils.GetModelNameFromURL(r.ModelWeightsURL)
}
var content ComponentModelWeights
err = json.Unmarshal(body, &content)
if err != nil {
return nil, fmt.Errorf("model unmarshal error: %v (%s)", err, string(body))
}
if content.ModelName == "" {
// Expect for the case loadWeightFromURL
// ModelWeightsFilepath should contain model_name field
content.ModelName = modelName
}
r.TrainerName = content.Trainer()
klog.V(3).Infof("Using weights from model %s trained by %s for %s", content.ModelName, r.TrainerName, r.EnergySource)
r.updateCoreRatio(content.ModelMachineSpec)
return &content, nil
}
Expand Down Expand Up @@ -250,13 +261,13 @@ func (r *Regressor) loadWeightFromURL() ([]byte, error) {
// Create Predictor based on trainer name
func (r *Regressor) createPredictor(weight ModelWeights) (predictor Predictor, err error) {
switch r.TrainerName {
case "SGDRegressorTrainer":
case types.LinearRegressionTrainer:
predictor, err = NewLinearPredictor(weight)
case "LogarithmicRegressionTrainer":
case types.LogarithmicTrainer:
predictor, err = NewLogarithmicPredictor(weight)
case "LogisticRegressionTrainer":
case types.LogisticTrainer:
predictor, err = NewLogisticPredictor(weight)
case "ExponentialRegressionTrainer":
case types.ExponentialTrainer:
predictor, err = NewExponentialPredictor(weight)
default:
predictor, err = NewLinearPredictor(weight)
Expand Down
29 changes: 22 additions & 7 deletions pkg/model/estimator/local/regressor/regressor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,15 @@ var (
SampleDramNumbericalVars = map[string]NormalizedNumericalFeature{
"cache_miss": {Weight: 1.0, Scale: 2},
}
DummyWeightHandler = http.HandlerFunc(genHandlerFunc([]float64{}))
DummyModelName = "dummy"
DummyWeightHandler = http.HandlerFunc(genHandlerFunc([]float64{}, types.LinearRegressionTrainer))
ModelCores = config.GenerateSpec().Cores
ExpectedAbsPowerFromDummyWeightHandler = 2500
ExpectedIdlePowerFromDummyWeightHandler = 2000
)

func GenPlatformModelWeights(curveFitWeights []float64) ComponentModelWeights {
func GenPlatformModelWeights(curveFitWeights []float64, trainerName string) ComponentModelWeights {
return ComponentModelWeights{
ModelName: DummyModelName,
ModelName: trainerName + "_0",
ModelMachineSpec: &config.MachineSpec{
Cores: ModelCores,
},
Expand All @@ -100,7 +99,7 @@ func genWeights(numericalVars map[string]NormalizedNumericalFeature, curveFitWei
}
}

func genHandlerFunc(curvefit []float64) (handlerFunc func(w http.ResponseWriter, r *http.Request)) {
func genHandlerFunc(curvefit []float64, trainerName string) (handlerFunc func(w http.ResponseWriter, r *http.Request)) {
return func(w http.ResponseWriter, r *http.Request) {
reqBody, err := io.ReadAll(r.Body)
if err != nil {
Expand All @@ -114,7 +113,7 @@ func genHandlerFunc(curvefit []float64) (handlerFunc func(w http.ResponseWriter,
if req.EnergySource == types.ComponentEnergySource {
err = json.NewEncoder(w).Encode(GenComponentModelWeights(curvefit))
} else {
err = json.NewEncoder(w).Encode(GenPlatformModelWeights(curvefit))
err = json.NewEncoder(w).Encode(GenPlatformModelWeights(curvefit, trainerName))
}
if err != nil {
panic(err)
Expand Down Expand Up @@ -177,7 +176,7 @@ var _ = Describe("Test Regressor Weight Unit (default trainer)", func() {
})

It("Get Node Components Power By Default Regression Estimator with ModelServerEndpoint", func() {
compPowers := GetNodeComponentsPowerFromDummyServer(genHandlerFunc([]float64{}), "")
compPowers := GetNodeComponentsPowerFromDummyServer(genHandlerFunc([]float64{}, types.LinearRegressionTrainer), "")
// TODO: verify if the power makes sense
Expect(compPowers[0].Core).Should(BeEquivalentTo(3000))
})
Expand Down Expand Up @@ -317,4 +316,20 @@ var _ = Describe("Test Regressor Weight Unit (default trainer)", func() {
Entry("invalid model core", 16, 0, 1.0),
)
})

DescribeTable("Test ComponentModelWeights.Trainer", func(modelName, expectedTrainer string) {
w := ComponentModelWeights{
ModelName: modelName,
}
Expect(w.Trainer()).To(Equal(expectedTrainer))
},
Entry("empty model name", "", ""),
Entry("invalid model name", "some", ""),
Entry("invalid model name with _", "some_invalid", ""),
Entry("valid SGDRegressorTrainer", "SGDRegressorTrainer_0", "SGDRegressorTrainer"),
Entry("valid LogarithmicRegressionTrainer", "LogarithmicRegressionTrainer_0", "LogarithmicRegressionTrainer"),
Entry("valid LogisticRegressionTrainer", "LogisticRegressionTrainer_0", "LogisticRegressionTrainer"),
Entry("valid ExponentialRegressionTrainer", "ExponentialRegressionTrainer_0", "ExponentialRegressionTrainer"),
Entry("invalid GradientBoostingRegressorTrainer", "GradientBoostingRegressorTrainer_0", ""),
)
})
9 changes: 9 additions & 0 deletions pkg/model/model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ var _ = Describe("Test Model Unit", func() {
Entry("IdlePower with invalid coreRatio = -1", true, -1.0, 1.0),
Entry("IdlePower with invalid coreRatio > 1", true, 1.2, 1.0),
)

DescribeTable("Test GetModelNameFromURL()", func(url, expectedModelName string) {
Expect(utils.GetModelNameFromURL(url)).To(Equal(expectedModelName))
},
Entry("empty", "", ""),
Entry("some model with multiple subfolders", "http://some/path/to/some_model.json", "some_model"),
Entry("some model with direct path", "http://some_model.json", "some_model"),
Entry("some model without file extension", "http://some_model", "some_model"),
)
})

})
9 changes: 9 additions & 0 deletions pkg/model/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,23 @@
ExponentialTrainer = "ExponentialRegressionTrainer"
)

var (
WeightSupportedTrainers = []string{
LinearRegressionTrainer,
LogarithmicTrainer,
LogisticTrainer,
ExponentialTrainer,
}
)

func getModelOutputTypeConverter() []string {
return []string{

Check failure on line 59 in pkg/model/types/types.go

View workflow job for this annotation

GitHub Actions / golang / escapes_detect

[]string{...} escapes to heap
"AbsPower", "DynPower",
}
}

func getModelTypeConverter() []string {
return []string{

Check failure on line 65 in pkg/model/types/types.go

View workflow job for this annotation

GitHub Actions / golang / escapes_detect

[]string{...} escapes to heap
"Ratio", "Regressor", "EstimatorSidecar",
}
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/model/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package utils

import (
"math"
"strings"

"github.com/sustainable-computing-io/kepler/pkg/sensors/components/source"
)
Expand Down Expand Up @@ -69,3 +70,12 @@ func GetCoreRatio(isIdlePower bool, inCoreRatio float64) float64 {
}
return coreRatio
}

func GetModelNameFromURL(url string) string {
urlSplits := strings.Split(url, "/")
if len(urlSplits) > 0 {
lastItem := urlSplits[len(urlSplits)-1]
return strings.Split(lastItem, ".")[0]
}
return ""
}
Loading