Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exclude files in filebeat #563

Merged
merged 1 commit into from
Dec 21, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ https://github.com/elastic/beats/compare/1.0.0...master[Check the HEAD diff]
*Topbeat*

*Filebeat*
- Add exclude_files configuration option {pull}563[563]

*Winlogbeat*

Expand Down
3 changes: 3 additions & 0 deletions filebeat/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"log"
"os"
"path/filepath"
"regexp"
"time"

"github.com/elastic/beats/libbeat/cfgfile"
Expand Down Expand Up @@ -49,6 +50,8 @@ type ProspectorConfig struct {
ScanFrequency string `yaml:"scan_frequency"`
ScanFrequencyDuration time.Duration
Harvester HarvesterConfig `yaml:",inline"`
ExcludeFiles []string `yaml:"exclude_files"`
ExcludeFilesRegexp []*regexp.Regexp
}

type HarvesterConfig struct {
Expand Down
25 changes: 25 additions & 0 deletions filebeat/crawler/prospector.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ func (p *Prospector) setupProspectorConfig() error {
if err != nil {
return err
}
config.ExcludeFilesRegexp, err = harvester.InitRegexps(config.ExcludeFiles)
if err != nil {
return err
}

// Init File Stat list
p.prospectorList = make(map[string]harvester.FileStat)
Expand Down Expand Up @@ -219,11 +223,26 @@ func (p *Prospector) stdinRun(spoolChan chan *input.FileEvent) {
}
}

func (p *Prospector) isFileExcluded(file string) bool {

config := &p.ProspectorConfig

if len(config.ExcludeFilesRegexp) > 0 {

if harvester.MatchAnyRegexps(config.ExcludeFilesRegexp, file) {
return true
}
}

return false
}

// Scans the specific path which can be a glob (/**/**/*.log)
// For all found files it is checked if a harvester should be started
func (p *Prospector) scan(path string, output chan *input.FileEvent) {

logp.Debug("prospector", "scan path %s", path)
logp.Debug("prospector", "exclude_files: %s", p.ProspectorConfig.ExcludeFiles)
// Evaluate the path as a wildcards/shell glob
matches, err := filepath.Glob(path)
if err != nil {
Expand All @@ -237,6 +256,12 @@ func (p *Prospector) scan(path string, output chan *input.FileEvent) {
for _, file := range matches {
logp.Debug("prospector", "Check file for harvesting: %s", file)

// check if the file is in the exclude_files list
if p.isFileExcluded(file) {
logp.Debug("prospector", "Exclude file: %s", file)
continue
}

// Stat the file, following any symlinks.
fileinfo, err := os.Stat(file)

Expand Down
20 changes: 20 additions & 0 deletions filebeat/crawler/prospector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,23 @@ func TestProspectorInitInputTypeWrong(t *testing.T) {
assert.Nil(t, err)
assert.Equal(t, "log", prospector.ProspectorConfig.Harvester.InputType)
}

func TestProspectorFileExclude(t *testing.T) {

prospectorConfig := config.ProspectorConfig{
ExcludeFiles: []string{"\\.gz$"},
Harvester: config.HarvesterConfig{
BufferSize: 0,
},
}

prospector := Prospector{
ProspectorConfig: prospectorConfig,
}

prospector.Init()

assert.True(t, prospector.isFileExcluded("/tmp/log/logw.gz"))
assert.False(t, prospector.isFileExcluded("/tmp/log/logw.log"))

}
10 changes: 10 additions & 0 deletions filebeat/docs/configuration.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,16 @@ If both `include_lines` and `exclude_lines` are defined, then include_lines is c
exclude_lines: ["^DBG"]
-------------------------------------------------------------------------------------

===== exclude_files

A list of regular expressions to match the files to be ignored. By default no file is excluded.

[source,yaml]
-------------------------------------------------------------------------------------
exclude_files: [".gz$"]
-------------------------------------------------------------------------------------
To ignore all the files with the `gz` extension.

[[configuration-fields]]
===== fields

Expand Down
4 changes: 4 additions & 0 deletions filebeat/etc/beat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ filebeat:
# exclude_lines. By default, all the lines are exported.
# include_lines: ["^ERR", "^WARN"]

# Exclude files. A list of regular expressions to match. Filebeat drops the files that
# are matching any regular expression from the list. By default, no files are dropped.
# exclude_files: [".gz$"]

# Optional additional fields. These field can be freely picked
# to add additional information to the crawled log files for filtering
#fields:
Expand Down
4 changes: 4 additions & 0 deletions filebeat/etc/filebeat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ filebeat:
# exclude_lines. By default, all the lines are exported.
# include_lines: ["^ERR", "^WARN"]

# Exclude files. A list of regular expressions to match. Filebeat drops the files that
# are matching any regular expression from the list. By default, no files are dropped.
# exclude_files: [".gz$"]

# Optional additional fields. These field can be freely picked
# to add additional information to the crawled log files for filtering
#fields:
Expand Down
30 changes: 0 additions & 30 deletions filebeat/harvester/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"
"io"
"os"
"regexp"
"time"

"github.com/elastic/beats/filebeat/config"
Expand Down Expand Up @@ -332,35 +331,6 @@ func (h *Harvester) handleReadlineError(lastTimeRead time.Time, err error) error
func (h *Harvester) Stop() {
}

func InitRegexps(exprs []string) ([]*regexp.Regexp, error) {

result := []*regexp.Regexp{}

for _, exp := range exprs {

rexp, err := regexp.CompilePOSIX(exp)
if err != nil {
logp.Err("Fail to compile the regexp %s: %s", exp, err)
return nil, err
}
result = append(result, rexp)
}
return result, nil
}

func MatchAnyRegexps(regexps []*regexp.Regexp, text string) bool {

for _, rexp := range regexps {
if rexp.MatchString(text) {
// drop line
return true

}
}

return false
}

const maxConsecutiveEmptyReads = 100

// timedReader keeps track of last time bytes have been read from underlying
Expand Down
33 changes: 32 additions & 1 deletion filebeat/harvester/util.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package harvester

import (
"regexp"
"time"

"github.com/elastic/beats/filebeat/harvester/encoding"
"github.com/elastic/beats/libbeat/logp"
"time"
)

// isLine checks if the given byte array is a line, means has a line ending \n
Expand Down Expand Up @@ -60,3 +62,32 @@ func readlineString(bytes []byte, size int) (string, int, error) {
s := string(bytes)[:len(bytes)-lineEndingChars(bytes)]
return s, size, nil
}

// InitRegexps initializes a list of compiled regular expressions.
func InitRegexps(exprs []string) ([]*regexp.Regexp, error) {

result := []*regexp.Regexp{}

for _, exp := range exprs {

rexp, err := regexp.CompilePOSIX(exp)
if err != nil {
logp.Err("Fail to compile the regexp %s: %s", exp, err)
return nil, err
}
result = append(result, rexp)
}
return result, nil
}

// MatchAnyRegexps checks if the text matches any of the regular expressions
func MatchAnyRegexps(regexps []*regexp.Regexp, text string) bool {

for _, rexp := range regexps {
if rexp.MatchString(text) {
return true
}
}

return false
}
19 changes: 19 additions & 0 deletions filebeat/harvester/util_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package harvester

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestMatchAnyRegexps(t *testing.T) {

patterns := []string{"\\.gz$"}

regexps, err := InitRegexps(patterns)

assert.Nil(t, err)

assert.Equal(t, MatchAnyRegexps(regexps, "/var/log/log.gz"), true)

}
5 changes: 4 additions & 1 deletion filebeat/tests/system/config/filebeat.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ filebeat:
{% if exclude_lines %}
exclude_lines: {{exclude_lines}}
{% endif %}

{% if exclude_files %}
exclude_files: {{exclude_files}}
{% endif %}

spool_size:
idle_timeout: 0.1s
registry_file: {{ fb.working_dir + '/' }}{{ registryFile|default(".filebeat")}}
Expand Down
36 changes: 35 additions & 1 deletion filebeat/tests/system/test_prospector.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def test_stdin(self):
lambda: self.output_has(lines=iterations1+iterations2),
max_timeout=15)


proc.kill_and_wait()

objs = self.read_output()
Expand Down Expand Up @@ -140,3 +139,38 @@ def test_rotating_ignore_older_larger_write_rate(self):
max_timeout=15)

proc.kill_and_wait()

def test_exclude_files(self):

self.render_config_template(
path=os.path.abspath(self.working_dir) + "/log/*",
exclude_files=[".gz$"]
)
os.mkdir(self.working_dir + "/log/")

testfile = self.working_dir + "/log/test.gz"
file = open(testfile, 'w')
file.write("line in gz file\n")
file.close()

testfile = self.working_dir + "/log/test.log"
file = open(testfile, 'w')
file.write("line in log file\n")
file.close()

filebeat = self.start_filebeat()

self.wait_until(
lambda: self.output_has(lines=1),
max_timeout=15)

# TODO: Find better solution when filebeat did crawl the file
# Idea: Special flag to filebeat so that filebeat is only doing and
# crawl and then finishes
filebeat.kill_and_wait()

output = self.read_output()

# Check that output file has the same number of lines as the log file
assert 1 == len(output)
assert output[0]["message"] == "line in log file"