Skip to content

Commit

Permalink
Drain compatibility with Python 3.7 (#18)
Browse files Browse the repository at this point in the history
* Add notebooks files to .gitignore + 
* Fix Drain compatibility with Python 3.7
  • Loading branch information
thomasryck authored and Jamie Zhu committed Feb 9, 2019
1 parent 173db48 commit 85b60bd
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 26 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,5 @@ target/
Unused/
.idea/
POP/
.ipynb_checkpoints/
*.ipynb
42 changes: 21 additions & 21 deletions demo/Drain_demo.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
#!/usr/bin/env python
import sys
sys.path.append('../')
from logparser import Drain

input_dir = '../logs/HDFS/' # The input directory of log file
output_dir = 'Drain_result/' # The output directory of parsing results
log_file = 'HDFS_2k.log' # The input log file name
log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>' # HDFS log format
# Regular expression list for optional preprocessing (default: [])
regex = [
r'blk_(|-)[0-9]+' , # block id
r'(/|)([0-9]+\.){3}[0-9]+(:[0-9]+|)(:|)', # IP
r'(?<=[^A-Za-z0-9])(\-?\+?\d+)(?=[^A-Za-z0-9])|[0-9]+$', # Numbers
]
st = 0.5 # Similarity threshold
depth = 4 # Depth of all leaf nodes

parser = Drain.LogParser(log_format, indir=input_dir, outdir=output_dir, depth=depth, st=st, rex=regex)
parser.parse(log_file)
#!/usr/bin/env python
import sys
sys.path.append('../')
from logparser import Drain

input_dir = '../logs/HDFS/' # The input directory of log file
output_dir = 'Drain_result/' # The output directory of parsing results
log_file = 'HDFS_2k.log' # The input log file name
log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>' # HDFS log format
# Regular expression list for optional preprocessing (default: [])
regex = [
r'blk_(|-)[0-9]+' , # block id
r'(/|)([0-9]+\.){3}[0-9]+(:[0-9]+|)(:|)', # IP
r'(?<=[^A-Za-z0-9])(\-?\+?\d+)(?=[^A-Za-z0-9])|[0-9]+$', # Numbers
]
st = 0.5 # Similarity threshold
depth = 4 # Depth of all leaf nodes

parser = Drain.LogParser(log_format, indir=input_dir, outdir=output_dir, depth=depth, st=st, rex=regex)
parser.parse(log_file)

8 changes: 4 additions & 4 deletions logparser/Drain/Drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def outputResult(self, logClustL):

def printTree(self, node, dep):
pStr = ''
for i in xrange(dep):
for i in range(dep):
pStr += '\t'

if node.depth == 0:
Expand All @@ -234,7 +234,7 @@ def printTree(self, node, dep):
else:
pStr += node.digitOrtoken

print pStr
print(pStr)

if node.depth == self.depth:
return 1
Expand Down Expand Up @@ -273,7 +273,7 @@ def parse(self, logName):

count += 1
if count % 1000 == 0 or count == len(self.df_log):
print 'Processed {0:.1f}% of log lines.'.format(count * 100.0 / len(self.df_log))
print('Processed {0:.1f}% of log lines.'.format(count * 100.0 / len(self.df_log)))


if not os.path.exists(self.savePath):
Expand Down Expand Up @@ -320,7 +320,7 @@ def generate_logformat_regex(self, logformat):
regex = ''
for k in range(len(splitters)):
if k % 2 == 0:
splitter = re.sub(' +', '\s+', splitters[k])
splitter = re.sub(' +', '\\\s+', splitters[k])
regex += splitter
else:
header = splitters[k].strip('<').strip('>')
Expand Down
2 changes: 1 addition & 1 deletion logparser/Drain/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from Drain import *
from .Drain import *

3 comments on commit 85b60bd

@AshimaChawla
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

error: ('bad escape \s at position 0', 'occurred at index 0') issue with Python 3.7.

Do you have any fix for this problem?

@ankit-nassa
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AshimaChawla there are 2 options:

  1. Shift to python3.6
  2. Change import re in the code to import regex as re

@AshimaChawla
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks@AnkitNassa. The code worked fine with 3.7 after changing

import regex as re and
splitter = re.sub(' +', '\\s+', splitters[k]) [Line no 326]

Please sign in to comment.