Skip to content

Commit

Permalink
[VC-35738] Bubble up the errors from sub-commands and handle errors c…
Browse files Browse the repository at this point in the history
…entrally instead of using log.Fatal at multiple sites (#599)

* Bubble up the errors from sub-commands and handle errors centrally instead of using log.Fatal at multiple sites
* Log the bubbled up error instead of printing it
* Turn on SilenceErrors and SilenceUsage to prevent Cobra from messing up the stderr output
* Remove obsolete translations for converting log.printf to error messages
  • Loading branch information
wallrj authored Nov 5, 2024
1 parent 41553d4 commit 61d64e4
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 89 deletions.
12 changes: 6 additions & 6 deletions cmd/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"github.com/spf13/cobra"

"github.com/jetstack/preflight/pkg/agent"
"github.com/jetstack/preflight/pkg/logs"
"github.com/jetstack/preflight/pkg/permissions"
)

Expand All @@ -16,7 +15,7 @@ var agentCmd = &cobra.Command{
Short: "start the preflight agent",
Long: `The agent will periodically gather data for the configured data
gatherers and send it to a remote backend for evaluation`,
Run: agent.Run,
RunE: agent.Run,
}

var agentInfoCmd = &cobra.Command{
Expand All @@ -34,24 +33,25 @@ var agentRBACCmd = &cobra.Command{
Use: "rbac",
Short: "print the agent's minimal RBAC manifest",
Long: `Print RBAC string by reading GVRs`,
Run: func(cmd *cobra.Command, args []string) {
RunE: func(cmd *cobra.Command, args []string) error {

b, err := os.ReadFile(agent.Flags.ConfigFilePath)
if err != nil {
logs.Log.Fatalf("Failed to read config file: %s", err)
return fmt.Errorf("Failed to read config file: %s", err)
}
cfg, err := agent.ParseConfig(b)
if err != nil {
logs.Log.Fatalf("Failed to parse config file: %s", err)
return fmt.Errorf("Failed to parse config file: %s", err)
}

err = agent.ValidateDataGatherers(cfg.DataGatherers)
if err != nil {
logs.Log.Fatalf("Failed to validate data gatherers: %s", err)
return fmt.Errorf("Failed to validate data gatherers: %s", err)
}

out := permissions.GenerateFullManifest(cfg.DataGatherers)
fmt.Print(out)
return nil
},
}

Expand Down
2 changes: 1 addition & 1 deletion cmd/echo.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ var echoCmd = &cobra.Command{
Short: "starts an echo server to test the agent",
Long: `The agent sends data to a server. This echo server
can be used to act as the server part and echo the data received by the agent.`,
Run: echo.Echo,
RunE: echo.Echo,
}

func init() {
Expand Down
21 changes: 17 additions & 4 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ import (
"os"
"strings"

"github.com/jetstack/preflight/pkg/logs"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"k8s.io/klog/v2"

"github.com/jetstack/preflight/pkg/logs"
)

// rootCmd represents the base command when called without any subcommands
Expand All @@ -21,6 +23,14 @@ Preflight checks are bundled into Packages`,
PersistentPreRun: func(cmd *cobra.Command, args []string) {
logs.Initialize()
},
// SilenceErrors and SilenceUsage prevents this command or any sub-command
// from printing arbitrary text to stderr.
// Why? To ensure that each line of output can be parsed as a single message
// for consumption by logging agents such as fluentd.
// Usage information is still available on stdout with the `-h` and `--help`
// flags.
SilenceErrors: true,
SilenceUsage: true,
}

func init() {
Expand All @@ -31,13 +41,16 @@ func init() {

// Execute adds all child commands to the root command and sets flags appropriately.
// This is called by main.main(). It only needs to happen once to the rootCmd.
// If the root command or sub-command returns an error, the error message will
// will be logged and the process will exit with status 1.
func Execute() {
logs.AddFlags(rootCmd.PersistentFlags())

var exitCode int
if err := rootCmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(1)
exitCode = 1
klog.ErrorS(err, "Exiting due to error", "exit-code", exitCode)
}
klog.FlushAndExit(klog.ExitFlushTimeout, exitCode)
}

func setFlagsFromEnv(prefix string, fs *pflag.FlagSet) {
Expand Down
60 changes: 35 additions & 25 deletions pkg/agent/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ var Flags AgentCmdFlags
const schemaVersion string = "v2.0.0"

// Run starts the agent process
func Run(cmd *cobra.Command, args []string) {
func Run(cmd *cobra.Command, args []string) error {
logs.Log.Printf("Preflight agent version: %s (%s)", version.PreflightVersion, version.Commit)
ctx, cancel := context.WithCancel(
klog.NewContext(
Expand All @@ -62,31 +62,33 @@ func Run(cmd *cobra.Command, args []string) {

file, err := os.Open(Flags.ConfigFilePath)
if err != nil {
logs.Log.Fatalf("Failed to load config file for agent from: %s", Flags.ConfigFilePath)
return fmt.Errorf("Failed to load config file for agent from: %s", Flags.ConfigFilePath)
}
defer file.Close()

b, err := io.ReadAll(file)
if err != nil {
logs.Log.Fatalf("Failed to read config file: %s", err)
return fmt.Errorf("Failed to read config file: %s", err)
}

cfg, err := ParseConfig(b)
if err != nil {
logs.Log.Fatalf("Failed to parse config file: %s", err)
return fmt.Errorf("Failed to parse config file: %s", err)
}

config, preflightClient, err := ValidateAndCombineConfig(logs.Log, cfg, Flags)
if err != nil {
logs.Log.Fatalf("While evaluating configuration: %v", err)
return fmt.Errorf("While evaluating configuration: %v", err)
}

group, gctx := errgroup.WithContext(ctx)
defer func() {
// TODO: replace Fatalf log calls with Errorf and return the error
cancel()
if err := group.Wait(); err != nil {
logs.Log.Fatalf("failed to wait for controller-runtime component to stop: %v", err)
if groupErr := group.Wait(); groupErr != nil {
err = multierror.Append(
err,
fmt.Errorf("failed to wait for controller-runtime component to stop: %v", groupErr),
)
}
}()

Expand Down Expand Up @@ -159,7 +161,7 @@ func Run(cmd *cobra.Command, args []string) {
// the agent pod's events.
eventf, err := newEventf(config.InstallNS)
if err != nil {
logs.Log.Fatalf("failed to create event recorder: %v", err)
return fmt.Errorf("failed to create event recorder: %v", err)
}

dataGatherers := map[string]datagatherer.DataGatherer{}
Expand All @@ -169,12 +171,12 @@ func Run(cmd *cobra.Command, args []string) {
kind := dgConfig.Kind
if dgConfig.DataPath != "" {
kind = "local"
logs.Log.Fatalf("running data gatherer %s of type %s as Local, data-path override present: %s", dgConfig.Name, dgConfig.Kind, dgConfig.DataPath)
return fmt.Errorf("running data gatherer %s of type %s as Local, data-path override present: %s", dgConfig.Name, dgConfig.Kind, dgConfig.DataPath)
}

newDg, err := dgConfig.Config.NewDataGatherer(gctx)
if err != nil {
logs.Log.Fatalf("failed to instantiate %q data gatherer %q: %v", kind, dgConfig.Name, err)
return fmt.Errorf("failed to instantiate %q data gatherer %q: %v", kind, dgConfig.Name, err)
}

logs.Log.Printf("starting %q datagatherer", dgConfig.Name)
Expand Down Expand Up @@ -225,18 +227,21 @@ func Run(cmd *cobra.Command, args []string) {
// TODO(wallrj): Pass a context to gatherAndOutputData, so that we don't
// have to wait for it to finish before exiting the process.
for {
gatherAndOutputData(eventf, config, preflightClient, dataGatherers)
if err := gatherAndOutputData(eventf, config, preflightClient, dataGatherers); err != nil {
return err
}

if config.OneShot {
break
}

select {
case <-gctx.Done():
return
return nil
case <-time.After(config.Period):
}
}
return nil
}

// Creates an event recorder for the agent's Pod object. Expects the env var
Expand All @@ -246,7 +251,7 @@ func Run(cmd *cobra.Command, args []string) {
func newEventf(installNS string) (Eventf, error) {
restcfg, err := kubeconfig.LoadRESTConfig("")
if err != nil {
logs.Log.Fatalf("failed to load kubeconfig: %v", err)
return nil, fmt.Errorf("failed to load kubeconfig: %v", err)
}
scheme := runtime.NewScheme()
_ = corev1.AddToScheme(scheme)
Expand Down Expand Up @@ -276,31 +281,35 @@ func newEventf(installNS string) (Eventf, error) {
// Like Printf but for sending events to the agent's Pod object.
type Eventf func(eventType, reason, msg string, args ...interface{})

func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient client.Client, dataGatherers map[string]datagatherer.DataGatherer) {
func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient client.Client, dataGatherers map[string]datagatherer.DataGatherer) error {
var readings []*api.DataReading

if config.InputPath != "" {
logs.Log.Printf("Reading data from local file: %s", config.InputPath)
data, err := os.ReadFile(config.InputPath)
if err != nil {
logs.Log.Fatalf("failed to read local data file: %s", err)
return fmt.Errorf("failed to read local data file: %s", err)
}
err = json.Unmarshal(data, &readings)
if err != nil {
logs.Log.Fatalf("failed to unmarshal local data file: %s", err)
return fmt.Errorf("failed to unmarshal local data file: %s", err)
}
} else {
readings = gatherData(config, dataGatherers)
var err error
readings, err = gatherData(config, dataGatherers)
if err != nil {
return err
}
}

if config.OutputPath != "" {
data, err := json.MarshalIndent(readings, "", " ")
if err != nil {
logs.Log.Fatal("failed to marshal JSON")
return fmt.Errorf("failed to marshal JSON: %s", err)
}
err = os.WriteFile(config.OutputPath, data, 0644)
if err != nil {
logs.Log.Fatalf("failed to output to local file: %s", err)
return fmt.Errorf("failed to output to local file: %s", err)
}
logs.Log.Printf("Data saved to local file: %s", config.OutputPath)
} else {
Expand All @@ -316,12 +325,13 @@ func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient c
logs.Log.Printf("retrying in %v after error: %s", t, err)
})
if err != nil {
logs.Log.Fatalf("Exiting due to fatal error uploading: %v", err)
return fmt.Errorf("Exiting due to fatal error uploading: %v", err)
}
}
return nil
}

func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.DataGatherer) []*api.DataReading {
func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.DataGatherer) ([]*api.DataReading, error) {
var readings []*api.DataReading

var dgError *multierror.Error
Expand Down Expand Up @@ -360,10 +370,10 @@ func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.Dat
}

if config.StrictMode && dgError.ErrorOrNil() != nil {
logs.Log.Fatalf("halting datagathering in strict mode due to error: %s", dgError.ErrorOrNil())
return nil, fmt.Errorf("halting datagathering in strict mode due to error: %s", dgError.ErrorOrNil())
}

return readings
return readings, nil
}

func postData(config CombinedConfig, preflightClient client.Client, readings []*api.DataReading) error {
Expand All @@ -388,7 +398,7 @@ func postData(config CombinedConfig, preflightClient client.Client, readings []*
if config.OrganizationID == "" {
data, err := json.Marshal(readings)
if err != nil {
logs.Log.Fatalf("Cannot marshal readings: %+v", err)
return fmt.Errorf("Cannot marshal readings: %+v", err)
}

// log and collect metrics about the upload size
Expand Down
8 changes: 2 additions & 6 deletions pkg/echo/echo.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,16 @@ import (
"github.com/spf13/cobra"

"github.com/jetstack/preflight/api"
"github.com/jetstack/preflight/pkg/logs"
)

var EchoListen string

var Compact bool

func Echo(cmd *cobra.Command, args []string) {
func Echo(cmd *cobra.Command, args []string) error {
http.HandleFunc("/", echoHandler)
fmt.Println("Listening to requests at ", EchoListen)
err := http.ListenAndServe(EchoListen, nil)
if err != nil {
logs.Log.Fatal(err)
}
return http.ListenAndServe(EchoListen, nil)
}

func echoHandler(w http.ResponseWriter, r *http.Request) {
Expand Down
7 changes: 1 addition & 6 deletions pkg/logs/logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,7 @@ func (w LogToSlogWriter) Write(p []byte) (n int, err error) {

message := string(p)
if strings.Contains(message, "error") ||
strings.Contains(message, "failed") ||
strings.Contains(message, "fatal") ||
strings.Contains(message, "Failed") ||
strings.Contains(message, "While evaluating configuration") ||
strings.Contains(message, "data-path override present") ||
strings.Contains(message, "Cannot marshal readings") {
strings.Contains(message, "failed") {
w.Slog.With("source", w.Source).Error(message)
} else {
w.Slog.With("source", w.Source).Info(message)
Expand Down
52 changes: 11 additions & 41 deletions pkg/logs/logs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,52 +375,22 @@ func Test_replaceWithStaticTimestamps(t *testing.T) {
}

func TestLogToSlogWriter(t *testing.T) {
// This test makes sure that all the agent's Log.Fatalf calls are correctly
// translated to slog.Error calls.
// This test makes sure that all the agent's remaining Log calls are correctly
// translated to slog.Error calls where appropriate.
//
// This list was generated using:
// grep -r "Log\.Fatalf" ./cmd ./pkg
// git grep -i "log\.\(print\|fatal\)" pkg/ cmd/ | fgrep -e error -e failed
given := strings.TrimPrefix(`
Failed to load config file for agent from
Failed to read config file
Failed to parse config file
While evaluating configuration
failed to run pprof profiler
failed to run the health check server
failed to start a controller-runtime component
failed to wait for controller-runtime component to stop
running data gatherer %s of type %s as Local, data-path override present
failed to instantiate %q data gatherer
failed to read local data file
failed to unmarshal local data file
failed to output to local file
Exiting due to fatal error uploading
halting datagathering in strict mode due to error
Cannot marshal readings
Failed to read config file
Failed to parse config file
Failed to validate data gatherers
failed to complete initial sync of %q data gatherer %q: %v
error messages will not show in the pod's events because the POD_NAME environment variable is empty
retrying in %v after error: %s
datagatherer informer for %q has failed and is backing off due to error: %s
this is a happy log that should show as INFO`, "\n")
expect := strings.TrimPrefix(`
level=ERROR msg="Failed to load config file for agent from" source=agent
level=ERROR msg="Failed to read config file" source=agent
level=ERROR msg="Failed to parse config file" source=agent
level=ERROR msg="While evaluating configuration" source=agent
level=ERROR msg="failed to run pprof profiler" source=agent
level=ERROR msg="failed to run the health check server" source=agent
level=ERROR msg="failed to start a controller-runtime component" source=agent
level=ERROR msg="failed to wait for controller-runtime component to stop" source=agent
level=ERROR msg="running data gatherer %!s(MISSING) of type %!s(MISSING) as Local, data-path override present" source=agent
level=ERROR msg="failed to instantiate %!q(MISSING) data gatherer" source=agent
level=ERROR msg="failed to read local data file" source=agent
level=ERROR msg="failed to unmarshal local data file" source=agent
level=ERROR msg="failed to output to local file" source=agent
level=ERROR msg="Exiting due to fatal error uploading" source=agent
level=ERROR msg="halting datagathering in strict mode due to error" source=agent
level=ERROR msg="Cannot marshal readings" source=agent
level=ERROR msg="Failed to read config file" source=agent
level=ERROR msg="Failed to parse config file" source=agent
level=ERROR msg="Failed to validate data gatherers" source=agent
level=ERROR msg="failed to complete initial sync of %!q(MISSING) data gatherer %!q(MISSING): %!v(MISSING)" source=agent
level=ERROR msg="error messages will not show in the pod's events because the POD_NAME environment variable is empty" source=agent
level=ERROR msg="retrying in %!v(MISSING) after error: %!s(MISSING)" source=agent
level=ERROR msg="datagatherer informer for %!q(MISSING) has failed and is backing off due to error: %!s(MISSING)" source=agent
level=INFO msg="this is a happy log that should show as INFO" source=agent
`, "\n")

Expand Down

0 comments on commit 61d64e4

Please sign in to comment.