Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(regex): Add negative lookbehind for string #32

Merged
merged 1 commit into from
Oct 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 55 additions & 29 deletions nirjas/binder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
Copyright (C) 2020 Ayush Bhardwaj (classicayush@gmail.com), Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com)
Copyright (C) 2020 Ayush Bhardwaj (classicayush@gmail.com),
Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com)

SPDX-License-Identifier: LGPL-2.1

Expand All @@ -25,7 +26,7 @@
from operator import itemgetter


def readSingleLine(file, regex, sign):
def readSingleLine(file, regex):
'''
Read file line by line and match the given regex to get comment.
Return comments, lines read, blank lines, and lines with comments.
Expand Down Expand Up @@ -60,7 +61,7 @@ def contSingleLines(data):
for i in data[0]:
lines.append(i[0])

for a, b in groupby(enumerate(lines), lambda x: x[0] - x[1]):
for _, b in groupby(enumerate(lines), lambda x: x[0] - x[1]):
temp = list(map(itemgetter(1), b))
content = ""

Expand All @@ -69,7 +70,9 @@ def contSingleLines(data):
end_line.append(temp[-1])
for i in temp:
comment = [x[1] for x in data[0] if x[0] == i]
[data[0].remove(x) for x in data[0] if x[0] == i]
for index, x in enumerate(data[0]):
if x[0] == i:
del data[0][index]
content = content + ' ' + comment[0]
output.append(content)
return data, start_line, end_line, output
Expand All @@ -79,29 +82,38 @@ def readMultiLineSame(file, syntax: str):
'''
Read multiline comments where starting and ending symbols are same.
'''
lines, output, start_line, end_line = [], [], [], []
start_line, end_line, output = [], [], []
content = ""
if '"' in syntax:
syntax_in_string = "'" + syntax
elif "'" in syntax:
syntax_in_string = '"' + syntax
closingCount, lines_of_comment = 0, 0
copy = False
with open(file) as f:
for line_number, line in enumerate(f, start=1):
if syntax in line:
if syntax in line and \
syntax_in_string not in line:
closingCount += 1
copy = True
if line.count(syntax) == 2:
# Start and end on same line
closingCount = 2
content = line.replace('\n', ' ')
start_line.append(line_number)
if closingCount % 2 == 0 and closingCount != 0:
copy = False
output.append(content.strip())
content = ""
end_line.append(line_number)
lines.append(line_number)
else:
start_line.append(line_number)

if copy:
lines_of_comment += 1
content = content + line.replace('\n', ' ')

output = [s.strip(syntax).strip() for s in output]

start_line = list(filter(lambda x: x not in end_line, lines))
output = [s.strip(syntax).strip() for s in output]
return start_line, end_line, output, lines_of_comment


Expand Down Expand Up @@ -133,71 +145,85 @@ def readMultiLineDiff(file, startSyntax: str, endSyntax: str):
content = content + (line.replace('\n', ' ')).strip()
if line.strip() == '':
blank_lines += 1
for idx, i in enumerate(endLine):
for idx, _ in enumerate(endLine):
line_of_comments = line_of_comments + (endLine[idx] - startLine[idx]) + 1
line_of_comments += len(output)
output = [s.strip(startSyntax).strip(endSyntax).strip() for s in output]
return startLine, endLine, output, line_of_comments, total_lines, blank_lines


class CommentSyntax:

'''
Class to hold various regex and helper functions based on comment format
used by a language.
'''
def __init__(self):
pass
self.sign = None
self.pattern = None
self.start = None
self.end = None

def hash(self, file):
'''
sign: #
'''
self.sign = '#'
self.pattern_hash = r'''#+\s*(.*)'''
return readSingleLine(file, self.pattern_hash, self.sign)
self.pattern = r'''(?<!["'`])#+\s*(.*)'''
return readSingleLine(file, self.pattern)

def hashNoCurl(self, file):
'''
sign: #
'''
self.sign = '#'
self.pattern = r'''(?<!["'`])#+(?!\{)\s*(.*)'''
return readSingleLine(file, self.pattern)

def percentage(self, file):
'''
sign: %
'''
self.sign = '%'
self.pattern_percentage = r'''\%\s*(.*)'''
return readSingleLine(file, self.pattern_percentage, self.sign)
self.pattern = r'''(?<!["'`])\%\s*(.*)'''
return readSingleLine(file, self.pattern)

def doubleSlash(self, file):
'''
sign: //
'''
self.sign = '//'
self.pattern_doubleSlash = r'''(?<![pst]:)\/\/\s*(.*)'''
return readSingleLine(file, self.pattern_doubleSlash, self.sign)
self.pattern = r'''(?<![pst'"`]:)\/\/\s*(.*)'''
return readSingleLine(file, self.pattern)

def doubleNotTripleSlash(self, file):
'''
sign: //
'''
self.sign = '//'
self.pattern_doubleNotTripleSlash = r'''(?<!\/)\/\/(?!\/)\s*(.*)'''
return readSingleLine(file, self.pattern_doubleNotTripleSlash, self.sign)
self.pattern = r'''(?<!\/)\/\/(?!\/)\s*(.*)'''
return readSingleLine(file, self.pattern)

def singleQuotes(self, file):
"""
sign: ''' '''
"""
self.syntax = "'''"
return readMultiLineSame(file, self.syntax)
self.sign = "'''"
return readMultiLineSame(file, self.sign)

def doubleQuotes(self, file):
'''
sign: """ """
'''
self.syntax = '"""'
return readMultiLineSame(file, self.syntax)
self.sign = '"""'
return readMultiLineSame(file, self.sign)

def doubleDash(self, file):
'''
sign: --
'''
self.sign = '--'
self.pattern_doubleDash = r'''\-\-\s*(.*)'''
return readSingleLine(file, self.pattern_doubleDash, self.sign)
self.pattern = r'''(?<!["'`])\-\-\s*(.*)'''
return readSingleLine(file, self.pattern)

def slashStar(self, file):
'''
Expand Down Expand Up @@ -252,8 +278,8 @@ def tripleSlash(self, file):
sign: ///
'''
self.sign = '///'
self.pattern_tripleSlash = r'''\/\/\/\s*(.*)'''
return readSingleLine(file, self.pattern_tripleSlash, self.sign)
self.pattern = r'''(?<!["'`])\/\/\/\s*(.*)'''
return readSingleLine(file, self.pattern)

def slashDoubleStar(self, file):
'''
Expand Down
2 changes: 1 addition & 1 deletion nirjas/languages/ruby.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

def rubyExtractor(file):
result = CommentSyntax()
single_line_comment = result.hash(file)
single_line_comment = result.hashNoCurl(file)
multiline_comment = result.beginEnd(file)
cont_single_line_comment = contSingleLines(single_line_comment)
file = file.split("/")
Expand Down