diff --git a/nirjas/binder.py b/nirjas/binder.py index 170bbf3..b989867 100644 --- a/nirjas/binder.py +++ b/nirjas/binder.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- ''' -Copyright (C) 2020 Ayush Bhardwaj (classicayush@gmail.com), Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com) +Copyright (C) 2020 Ayush Bhardwaj (classicayush@gmail.com), +Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com) SPDX-License-Identifier: LGPL-2.1 @@ -25,7 +26,7 @@ from operator import itemgetter -def readSingleLine(file, regex, sign): +def readSingleLine(file, regex): ''' Read file line by line and match the given regex to get comment. Return comments, lines read, blank lines, and lines with comments. @@ -60,7 +61,7 @@ def contSingleLines(data): for i in data[0]: lines.append(i[0]) - for a, b in groupby(enumerate(lines), lambda x: x[0] - x[1]): + for _, b in groupby(enumerate(lines), lambda x: x[0] - x[1]): temp = list(map(itemgetter(1), b)) content = "" @@ -69,7 +70,9 @@ def contSingleLines(data): end_line.append(temp[-1]) for i in temp: comment = [x[1] for x in data[0] if x[0] == i] - [data[0].remove(x) for x in data[0] if x[0] == i] + for index, x in enumerate(data[0]): + if x[0] == i: + del data[0][index] content = content + ' ' + comment[0] output.append(content) return data, start_line, end_line, output @@ -79,29 +82,38 @@ def readMultiLineSame(file, syntax: str): ''' Read multiline comments where starting and ending symbols are same. ''' - lines, output, start_line, end_line = [], [], [], [] + start_line, end_line, output = [], [], [] content = "" + if '"' in syntax: + syntax_in_string = "'" + syntax + elif "'" in syntax: + syntax_in_string = '"' + syntax closingCount, lines_of_comment = 0, 0 copy = False with open(file) as f: for line_number, line in enumerate(f, start=1): - if syntax in line: + if syntax in line and \ + syntax_in_string not in line: closingCount += 1 copy = True + if line.count(syntax) == 2: + # Start and end on same line + closingCount = 2 + content = line.replace('\n', ' ') + start_line.append(line_number) if closingCount % 2 == 0 and closingCount != 0: copy = False output.append(content.strip()) content = "" end_line.append(line_number) - lines.append(line_number) + else: + start_line.append(line_number) if copy: lines_of_comment += 1 content = content + line.replace('\n', ' ') - output = [s.strip(syntax).strip() for s in output] - - start_line = list(filter(lambda x: x not in end_line, lines)) + output = [s.strip(syntax).strip() for s in output] return start_line, end_line, output, lines_of_comment @@ -133,7 +145,7 @@ def readMultiLineDiff(file, startSyntax: str, endSyntax: str): content = content + (line.replace('\n', ' ')).strip() if line.strip() == '': blank_lines += 1 - for idx, i in enumerate(endLine): + for idx, _ in enumerate(endLine): line_of_comments = line_of_comments + (endLine[idx] - startLine[idx]) + 1 line_of_comments += len(output) output = [s.strip(startSyntax).strip(endSyntax).strip() for s in output] @@ -141,63 +153,77 @@ def readMultiLineDiff(file, startSyntax: str, endSyntax: str): class CommentSyntax: - + ''' + Class to hold various regex and helper functions based on comment format + used by a language. + ''' def __init__(self): - pass + self.sign = None + self.pattern = None + self.start = None + self.end = None def hash(self, file): ''' sign: # ''' self.sign = '#' - self.pattern_hash = r'''#+\s*(.*)''' - return readSingleLine(file, self.pattern_hash, self.sign) + self.pattern = r'''(?