From 082f12bb906adf29c90acefb5258c34d2c0dfd43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Thu, 1 Oct 2020 15:22:25 +0200 Subject: [PATCH 1/2] Add reader of filestream input with configuration --- filebeat/input/filestream/config.go | 147 ++++++++++++++++ filebeat/input/filestream/filestream.go | 214 ++++++++++++++++++++++++ 2 files changed, 361 insertions(+) create mode 100644 filebeat/input/filestream/config.go create mode 100644 filebeat/input/filestream/filestream.go diff --git a/filebeat/input/filestream/config.go b/filebeat/input/filestream/config.go new file mode 100644 index 000000000000..93b232325945 --- /dev/null +++ b/filebeat/input/filestream/config.go @@ -0,0 +1,147 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package filestream + +import ( + "fmt" + "time" + + "github.com/dustin/go-humanize" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/common/match" + "github.com/elastic/beats/v7/libbeat/reader/readfile" +) + +// Config stores the options of a file stream. +type config struct { + Paths []string `config:"paths"` + Close closerConfig `config:"close"` + FileWatcher *common.ConfigNamespace `config:"file_watcher"` + Reader readerConfig `config:"readers"` + FileIdentity *common.ConfigNamespace `config:"file_identity"` + CleanInactive time.Duration `config:"clean_inactive" validate:"min=0"` + CleanRemoved bool `config:"clean_removed"` + HarvesterLimit uint32 `config:"harvester_limit" validate:"min=0"` + IgnoreOlder time.Duration `config:"ignore_older"` +} + +type closerConfig struct { + OnStateChange stateChangeCloserConfig `config:"on_state_change"` + Reader readerCloserConfig `config:"reader"` +} + +type readerCloserConfig struct { + AfterInterval time.Duration + Inactive time.Duration + OnEOF bool +} + +type stateChangeCloserConfig struct { + CheckInterval time.Duration + Removed bool + Renamed bool +} + +// TODO should this be inline? +type readerConfig struct { + Backoff backoffConfig `config:"backoff"` + BufferSize int `config:"buffer_size"` + Encoding string `config:"encoding"` + ExcludeLines []match.Matcher `config:"exclude_lines"` + IncludeLines []match.Matcher `config:"include_lines"` + LineTerminator readfile.LineTerminator `config:"line_terminator"` + MaxBytes int `config:"message_max_bytes" validate:"min=0,nonzero"` + Tail bool `config:"seek_to_tail"` + + Parsers []*common.ConfigNamespace `config:"parsers"` // TODO multiline, json, syslog? +} + +type backoffConfig struct { + Init time.Duration `config:"init" validate:"nonzero"` + Max time.Duration `config:"max" validate:"nonzero"` +} + +func defaultConfig() config { + return config{ + Paths: []string{}, + Close: defaultCloserConfig(), + Reader: defaultReaderConfig(), + CleanInactive: 0, + CleanRemoved: true, + HarvesterLimit: 0, + IgnoreOlder: 0, + } +} + +func defaultCloserConfig() closerConfig { + return closerConfig{ + OnStateChange: stateChangeCloserConfig{ + CheckInterval: 5 * time.Second, + Removed: true, // TODO check clean_removed option + Renamed: false, + }, + Reader: readerCloserConfig{ + OnEOF: false, + Inactive: 0 * time.Second, + AfterInterval: 0 * time.Second, + }, + } +} + +func defaultReaderConfig() readerConfig { + return readerConfig{ + Backoff: backoffConfig{ + Init: 1 * time.Second, + Max: 10 * time.Second, + }, + BufferSize: 16 * humanize.KiByte, + LineTerminator: readfile.AutoLineTerminator, + MaxBytes: 10 * humanize.MiByte, + Tail: false, + Parsers: nil, + } +} + +func (c *config) Validate() error { + if len(c.Paths) == 0 { + return fmt.Errorf("no path is configured") + } + // TODO + //if c.CleanInactive != 0 && c.IgnoreOlder == 0 { + // return fmt.Errorf("ignore_older must be enabled when clean_inactive is used") + //} + + // TODO + //if c.CleanInactive != 0 && c.CleanInactive <= c.IgnoreOlder+c.ScanFrequency { + // return fmt.Errorf("clean_inactive must be > ignore_older + scan_frequency to make sure only files which are not monitored anymore are removed") + //} + + // TODO + //if c.JSON != nil && len(c.JSON.MessageKey) == 0 && + // c.Multiline != nil { + // return fmt.Errorf("When using the JSON decoder and multiline together, you need to specify a message_key value") + //} + + //if c.JSON != nil && len(c.JSON.MessageKey) == 0 && + // (len(c.IncludeLines) > 0 || len(c.ExcludeLines) > 0) { + // return fmt.Errorf("When using the JSON decoder and line filtering together, you need to specify a message_key value") + //} + + return nil +} diff --git a/filebeat/input/filestream/filestream.go b/filebeat/input/filestream/filestream.go new file mode 100644 index 000000000000..b5c595f12e7d --- /dev/null +++ b/filebeat/input/filestream/filestream.go @@ -0,0 +1,214 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package filestream + +import ( + "context" + "errors" + "io" + "os" + "time" + + input "github.com/elastic/beats/v7/filebeat/input/v2" + "github.com/elastic/beats/v7/libbeat/common/backoff" + "github.com/elastic/beats/v7/libbeat/logp" + "github.com/elastic/go-concert/ctxtool" + "github.com/elastic/go-concert/unison" +) + +var ( + ErrFileTruncate = errors.New("detected file being truncated") + ErrClosed = errors.New("reader closed") +) + +// logFile contains all log related data +type logFile struct { + file *os.File + log *logp.Logger + ctx context.Context + cancelReading context.CancelFunc + + closeInactive time.Duration + closeAfterInterval time.Duration + closeOnEOF bool + + offset int64 + lastTimeRead time.Time + backoff backoff.Backoff + tg unison.TaskGroup +} + +// newFileReader creates a new log instance to read log sources +func newFileReader( + log *logp.Logger, + canceler input.Canceler, + f *os.File, + config readerConfig, + closerConfig readerCloserConfig, +) (*logFile, error) { + offset, err := f.Seek(0, os.SEEK_CUR) + if err != nil { + return nil, err + } + + l := &logFile{ + file: f, + log: log, + closeInactive: closerConfig.Inactive, + closeAfterInterval: closerConfig.AfterInterval, + closeOnEOF: closerConfig.OnEOF, + offset: offset, + lastTimeRead: time.Now(), + backoff: backoff.NewExpBackoff(canceler.Done(), config.Backoff.Init, config.Backoff.Max), + tg: unison.TaskGroup{}, + } + + l.ctx, l.cancelReading = ctxtool.WithFunc(ctxtool.FromCanceller(canceler), func() { + err := l.tg.Stop() + if err != nil { + l.log.Errorf("Error while stopping filestream logFile reader: %v", err) + } + }) + + l.startFileMonitoringIfNeeded() + + return l, nil +} + +// Read reads from the reader and updates the offset +// The total number of bytes read is returned. +func (f *logFile) Read(buf []byte) (int, error) { + totalN := 0 + + for f.ctx.Err() == nil { + n, err := f.file.Read(buf) + if n > 0 { + f.offset += int64(n) + f.lastTimeRead = time.Now() + } + totalN += n + + // Read from source completed without error + // Either end reached or buffer full + if err == nil { + // reset backoff for next read + f.backoff.Reset() + return totalN, nil + } + + // Move buffer forward for next read + buf = buf[n:] + + // Checks if an error happened or buffer is full + // If buffer is full, cannot continue reading. + // Can happen if n == bufferSize + io.EOF error + err = f.errorChecks(err) + if err != nil || len(buf) == 0 { + return totalN, err + } + + f.log.Debugf("End of file reached: %s; Backoff now.", f.file.Name()) + f.backoff.Wait() + } + + return 0, ErrClosed +} + +func (f *logFile) startFileMonitoringIfNeeded() { + if f.closeInactive == 0 && f.closeAfterInterval == 0 { + return + } + + if f.closeInactive > 0 { + f.tg.Go(func(ctx unison.Canceler) error { + f.closeIfTimeout(ctx) + return nil + }) + } + + if f.closeAfterInterval > 0 { + f.tg.Go(func(ctx unison.Canceler) error { + f.closeIfInactive(ctx) + return nil + }) + } +} + +func (f *logFile) closeIfTimeout(ctx unison.Canceler) { + timer := time.NewTimer(f.closeAfterInterval) + for { + select { + case <-ctx.Done(): + return + case <-timer.C: + f.cancelReading() + return + } + } +} + +func (f *logFile) closeIfInactive(ctx unison.Canceler) { + // TODO it can be optimized + for ctx.Err() == nil { + age := time.Since(f.lastTimeRead) + if age > f.closeInactive { + f.cancelReading() + return + } + } +} + +// errorChecks determines the cause for EOF errors, and how the EOF event should be handled +// based on the config options. +func (f *logFile) errorChecks(err error) error { + if err != io.EOF { + f.log.Error("Unexpected state reading from %s; error: %s", f.file.Name(), err) + return err + } + + return f.handleEOF() +} + +func (f *logFile) handleEOF() error { + if f.closeOnEOF { + return io.EOF + } + + // Refetch fileinfo to check if the file was truncated. + // Errors if the file was removed/rotated after reading and before + // calling the stat function + info, statErr := f.file.Stat() + if statErr != nil { + f.log.Error("Unexpected error reading from %s; error: %s", f.file.Name(), statErr) + return statErr + } + + // check if file was truncated + if info.Size() < f.offset { + f.log.Debugf("File was truncated as offset (%d) > size (%d): %s", f.offset, info.Size(), f.file.Name()) + return ErrFileTruncate + } + + return nil +} + +// Close +func (f *logFile) Close() error { + f.cancelReading() + return f.file.Close() +} From 850916cda6b098193e48b9da4fde9ec83cf040ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Fri, 2 Oct 2020 10:56:58 +0200 Subject: [PATCH 2/2] check if file is inactive every 5 minutes --- filebeat/input/filestream/filestream.go | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/filebeat/input/filestream/filestream.go b/filebeat/input/filestream/filestream.go index b5c595f12e7d..59f26ccca1b3 100644 --- a/filebeat/input/filestream/filestream.go +++ b/filebeat/input/filestream/filestream.go @@ -151,6 +151,8 @@ func (f *logFile) startFileMonitoringIfNeeded() { func (f *logFile) closeIfTimeout(ctx unison.Canceler) { timer := time.NewTimer(f.closeAfterInterval) + defer timer.Stop() + for { select { case <-ctx.Done(): @@ -163,12 +165,21 @@ func (f *logFile) closeIfTimeout(ctx unison.Canceler) { } func (f *logFile) closeIfInactive(ctx unison.Canceler) { - // TODO it can be optimized - for ctx.Err() == nil { - age := time.Since(f.lastTimeRead) - if age > f.closeInactive { - f.cancelReading() + // This can be made configureble if users need a more flexible + // cheking for inactive files. + ticker := time.NewTicker(5 * time.Minute) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): return + case <-ticker.C: + age := time.Since(f.lastTimeRead) + if age > f.closeInactive { + f.cancelReading() + return + } } } }