-
Notifications
You must be signed in to change notification settings - Fork 47
/
cblogredaction
156 lines (134 loc) · 6.75 KB
/
cblogredaction
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python3
# -*-python-*-
"""
Couchbase log redaction tool. This tool is used to redact Couchbase SDK and
tool log files.
"""
import argparse
import hashlib
import logging
import os
import random
import re
import signal
import sys
import time
from multiprocessing import Pool
UD_TAG_REGEX = re.compile('(<ud>)(.+?)(</ud>)')
class Redact_file(object):
def __init__(self, log, salt, path):
self.log = log
self.salt = salt
self.path = path
self._redact_file()
def _redact_tag(self, match):
"""
Takes a regex match of the tag and returns a string with the data
between the tags has been redacted.
"""
hash_object = hashlib.sha1((self.salt + str(match.group(2))).encode('utf-8'))
return match.group(1) + hash_object.hexdigest() + match.group(3)
def _redact_line(self, logline):
"""Takes a log line and return a redacted line"""
result = UD_TAG_REGEX.sub(self._redact_tag, logline)
return result
def _redact_file(self):
"""
Reads a log file and creates a new file called "redacted-[FILENAME]".
Where data between tags have been redacted. It will report warnings
if there are not matching tags on a line.
"""
try:
with open(self.log, 'r') as log_file:
try:
total_redacted_tags = 0
warning_lines = 0
_, tail = os.path.split(self.log)
redacted_log = os.path.join(self.path, 'redacted-' + tail)
if os.path.exists(redacted_log):
logging.error(f'{self.log} - {redacted_log} already exists, not redacting')
return
logging.debug(f'{self.log} - Starting redaction file size is {os.fstat(log_file.fileno()).st_size} '
f'bytes')
with open(redacted_log, 'w+') as redacted_file:
hashed_salt = hashlib.sha1((self.salt + self.salt).encode()).hexdigest()
redacted_file.write(f'Hash of the salt used to redact the file: {hashed_salt}\n')
logging.debug(f'{self.log} - Log redacted using salt: <ud>{self.salt}</ud>')
for line_number, line in enumerate(log_file, 1):
ud_start_tags = line.count('<ud>')
ud_end_tags = line.count('</ud>')
if ud_start_tags == ud_end_tags:
total_redacted_tags += ud_start_tags
else:
logging.warning(f'{self.log} - Unmatched tags detected on line {line_number}, potential'
f' data leak. Please review redacted file')
if 0 not in [ud_start_tags, ud_end_tags]:
total_redacted_tags += min(ud_start_tags, ud_end_tags)
warning_lines += 1
redacted_file.write(self._redact_line(line))
logging.info(f'{self.log} - Finished redacting, {line_number} lines processed,'
f' {total_redacted_tags} tags redacted, {warning_lines} lines with unmatched tags')
except IOError as e:
logging.error(f'{redacted_log} - {e.strerror}')
except Exception as e:
# Should not get here in a production environment, but this is useful for development
logging.error(f'{self.log} - Unexpected error: {e}')
except IOError as e:
logging.error(f'{self.log} - {e.strerror}')
def redact_file_unpack(args):
"""
Helper fuction to unpack arguments for the Redact_file class as Python2.6
does not support passing mutliple arguments to multiprocessing.Pool
"""
return Redact_file(*args)
def _init_worker_singal():
"""Initialisations the worker signal handle to allow the capturing of control+c"""
signal.signal(signal.SIGINT, signal.SIG_IGN)
def _main():
opts = argparse.ArgumentParser(description='A tool to redact log files outside of Couchbase Server such as, SDK '
'and cbbackupmgr log files. The redacted file will be named '
'redacted-[filename] and will be placed in the current working '
'directory.')
opts.add_argument('log_files', nargs='+', metavar='File', help='path to the log file(s) to redact')
salt_group = opts.add_mutually_exclusive_group(required=True)
salt_group.add_argument('-s', '--salt', type=str, metavar='<string>', help='the salt used to redact the logs')
salt_group.add_argument('-g', '--generate-salt', action='store_true', dest='generate_salt',
help='automatically generates a salt that will be used to redact the logs')
opts.add_argument('-t', '--threads', type=int, metavar='<num>', default=1,
help='number of concurrent workers threads to use')
opts.add_argument('-o', '--output-dir', type=str, metavar='<path>', dest='output_dir', default='',
help='the directory to place the redacted logs in')
opts.add_argument('-v', '--verbose', action='count', help='increase output verbosity')
args = opts.parse_args()
log_level = None
if args.verbose is None:
log_level = logging.WARN
elif args.verbose >= 2:
log_level = logging.DEBUG
elif args.verbose == 1:
log_level = logging.INFO
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y/%m/%dT%H:%M:%S', level=log_level)
# Unique list in case the same file is passed in twice
args.log_files = set(args.log_files)
if args.generate_salt:
logging.warn('Automatically generating salt. This will make it difficult to cross reference logs')
alphabet = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
args.salt = ''.join(random.choice(alphabet) for _ in range(16))
if not 1 <= args.threads <= 64:
opts.error('--threads has to be between 1 and 64')
pool = Pool(args.threads, _init_worker_singal)
# Creating a list of tuples because of the limitation of pool with multiple arguments
worker_args = [(log, args.salt, args.output_dir) for log in args.log_files]
results = pool.map_async(redact_file_unpack, worker_args)
try:
while not results.ready():
time.sleep(1)
except KeyboardInterrupt:
pool.terminate()
sys.exit('Controlled-C, exiting....')
else:
pool.close()
finally:
pool.join()
if __name__ == '__main__':
_main()