Skip to content

Commit

Permalink
v0.8.6 - extra_delimiters feature added
Browse files Browse the repository at this point in the history
  • Loading branch information
davidohana committed Nov 25, 2020
1 parent fd9d7ad commit d4ea7ca
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 3 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ Available parameters are:
- `[DRAIN]/sim_th` - similarity threshold (default 0.4)
- `[DRAIN]/depth` - depth of all leaf nodes (default 4)
- `[DRAIN]/max_children` - max number of children of an internal node (default 100)
- `[DRAIN]/extra_delimiters` - delimiters to apply when splitting log message into words (in addition to whitespace) (default none).
Format is a Python list e.g. `['_', ':']`.
- `[MASKING]/masking` - parameters masking - in json format (default "")
- `[SNAPSHOT]/snapshot_interval_minutes` - time interval for new snapshots (default 1)
- `[SNAPSHOT]/compress_state` - whether to compress the state before saving it. This can be useful when using Kafka persistence.
Expand Down Expand Up @@ -192,6 +194,9 @@ Our project welcomes external contributions. Please refer to [CONTRIBUTING.md](C

## Change Log

##### v0.8.6
* Added `extra_delimiters` configuration option to Drain

##### v0.8.5
* Profiler improvements

Expand Down
12 changes: 11 additions & 1 deletion drain3/drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,27 @@ def __init__(self, key, depth):

class Drain:

def __init__(self, depth=4, sim_th=0.4, max_children=100, profiler: Profiler = NullProfiler()):
def __init__(self,
depth=4,
sim_th=0.4,
max_children=100,
extra_delimiters=(),
profiler: Profiler = NullProfiler()):
"""
Attributes
----------
depth : depth of all leaf nodes (nodes that contain log clusters)
sim_th : similarity threshold
max_children : max number of children of an internal node
extra_delimiters: delimiters to apply when splitting log message into words (in addition to whitespace).
"""
self.depth = depth - 2 # number of prefix tokens in each tree path (exclude root and leaf node)
self.sim_th = sim_th
self.max_children = max_children
self.clusters = []
self.root_node = Node("(ROOT)", 0)
self.profiler = profiler
self.extra_delimiters = extra_delimiters

@staticmethod
def has_numbers(s):
Expand Down Expand Up @@ -231,6 +238,9 @@ def num_to_cluster_id(num):

def add_log_message(self, content: str):
content = content.strip()
for delimiter in self.extra_delimiters:
content = content.replace(delimiter, " ")

content_tokens = content.split()

if self.profiler:
Expand Down
6 changes: 6 additions & 0 deletions drain3/template_miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Author_email: david.ohana@ibm.com, moshikh@il.ibm.com, eranra@il.ibm.com
License : MIT
"""
import ast
import base64
import configparser
import logging
Expand Down Expand Up @@ -37,10 +38,15 @@ def __init__(self, persistence_handler: PersistenceHandler = None):
self.persistence_handler = persistence_handler
self.snapshot_interval_seconds = self.config.getint('SNAPSHOT', 'snapshot_interval_minutes', fallback=5) * 60
self.compress_state = self.config.getboolean('SNAPSHOT', 'compress_state', fallback=True)

extra_delimiters = self.config.get('DRAIN', 'extra_delimiters', fallback="[]")
extra_delimiters = ast.literal_eval(extra_delimiters)

self.drain = Drain(
sim_th=self.config.getfloat('DRAIN', 'sim_th', fallback=0.4),
depth=self.config.getint('DRAIN', 'depth', fallback=4),
max_children=self.config.getint('DRAIN', 'max_children', fallback=100),
extra_delimiters=extra_delimiters,
profiler=self.profiler
)
self.masker = LogMasker(self.config)
Expand Down
2 changes: 1 addition & 1 deletion examples/drain3.ini
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ masking = [
sim_th = 0.4
depth = 4
max_children = 100

extra_delimiters = ["_"]

[PROFILING]
enabled = True
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
setup(
name='drain3',
packages=['drain3'],
version="0.8.5",
version="0.8.6",
license='MIT',
description="Persistent & streaming log template miner",
long_description=long_description,
Expand Down

0 comments on commit d4ea7ca

Please sign in to comment.