fossology · Kaushl2208 · Oct 6, 2021 · Apr 28, 2021
diff --git a/nirjas/binder.py b/nirjas/binder.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 '''
-Copyright (C) 2020  Ayush Bhardwaj (classicayush@gmail.com), Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com)
+Copyright (C) 2020  Ayush Bhardwaj (classicayush@gmail.com),
+Kaushlendra Pratap (kaushlendrapratap.9837@gmail.com)
 
 SPDX-License-Identifier: LGPL-2.1
 
@@ -25,7 +26,7 @@
 from operator import itemgetter
 
 
-def readSingleLine(file, regex, sign):
+def readSingleLine(file, regex):
     '''
     Read file line by line and match the given regex to get comment.
     Return comments, lines read, blank lines, and lines with comments.
@@ -60,7 +61,7 @@ def contSingleLines(data):
     for i in data[0]:
         lines.append(i[0])
 
-    for a, b in groupby(enumerate(lines), lambda x: x[0] - x[1]):
+    for _, b in groupby(enumerate(lines), lambda x: x[0] - x[1]):
         temp = list(map(itemgetter(1), b))
         content = ""
 
@@ -69,7 +70,9 @@ def contSingleLines(data):
             end_line.append(temp[-1])
             for i in temp:
                 comment = [x[1] for x in data[0] if x[0] == i]
-                [data[0].remove(x) for x in data[0] if x[0] == i]
+                for index, x in enumerate(data[0]):
+                    if x[0] == i:
+                        del data[0][index]
                 content = content + ' ' + comment[0]
             output.append(content)
     return data, start_line, end_line, output
@@ -79,29 +82,38 @@ def readMultiLineSame(file, syntax: str):
     '''
     Read multiline comments where starting and ending symbols are same.
     '''
-    lines, output, start_line, end_line = [], [], [], []
+    start_line, end_line, output = [], [], []
     content = ""
+    if '"' in syntax:
+        syntax_in_string = "'" + syntax
+    elif "'" in syntax:
+        syntax_in_string = '"' + syntax
     closingCount, lines_of_comment = 0, 0
     copy = False
     with open(file) as f:
         for line_number, line in enumerate(f, start=1):
-            if syntax in line:
+            if syntax in line and \
+                    syntax_in_string not in line:
                 closingCount += 1
                 copy = True
+                if line.count(syntax) == 2:
+                    # Start and end on same line
+                    closingCount = 2
+                    content = line.replace('\n', ' ')
+                    start_line.append(line_number)
                 if closingCount % 2 == 0 and closingCount != 0:
                     copy = False
                     output.append(content.strip())
                     content = ""
                     end_line.append(line_number)
-                lines.append(line_number)
+                else:
+                    start_line.append(line_number)
 
             if copy:
                 lines_of_comment += 1
                 content = content + line.replace('\n', ' ')
 
-            output = [s.strip(syntax).strip() for s in output]
-
-        start_line = list(filter(lambda x: x not in end_line, lines))
+    output = [s.strip(syntax).strip() for s in output]
     return start_line, end_line, output, lines_of_comment
 
 
@@ -133,71 +145,85 @@ def readMultiLineDiff(file, startSyntax: str, endSyntax: str):
                 content = content + (line.replace('\n', ' ')).strip()
             if line.strip() == '':
                 blank_lines += 1
-        for idx, i in enumerate(endLine):
+        for idx, _ in enumerate(endLine):
             line_of_comments = line_of_comments + (endLine[idx] - startLine[idx]) + 1
         line_of_comments += len(output)
         output = [s.strip(startSyntax).strip(endSyntax).strip() for s in output]
     return startLine, endLine, output, line_of_comments, total_lines, blank_lines
 
 
 class CommentSyntax:
-
+    '''
+    Class to hold various regex and helper functions based on comment format
+    used by a language.
+    '''
     def __init__(self):
-        pass
+        self.sign = None
+        self.pattern = None
+        self.start = None
+        self.end = None
 
     def hash(self, file):
         '''
         sign: #
         '''
         self.sign = '#'
-        self.pattern_hash = r'''#+\s*(.*)'''
-        return readSingleLine(file, self.pattern_hash, self.sign)
+        self.pattern = r'''(?<!["'`])#+\s*(.*)'''
+        return readSingleLine(file, self.pattern)
+
+    def hashNoCurl(self, file):
+        '''
+        sign: #
+        '''
+        self.sign = '#'
+        self.pattern = r'''(?<!["'`])#+(?!\{)\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def percentage(self, file):
         '''
         sign: %
         '''
         self.sign = '%'
-        self.pattern_percentage = r'''\%\s*(.*)'''
-        return readSingleLine(file, self.pattern_percentage, self.sign)
+        self.pattern = r'''(?<!["'`])\%\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def doubleSlash(self, file):
         '''
         sign: //
         '''
         self.sign = '//'
-        self.pattern_doubleSlash = r'''(?<![pst]:)\/\/\s*(.*)'''
-        return readSingleLine(file, self.pattern_doubleSlash, self.sign)
+        self.pattern = r'''(?<![pst'"`]:)\/\/\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def doubleNotTripleSlash(self, file):
         '''
         sign: //
         '''
         self.sign = '//'
-        self.pattern_doubleNotTripleSlash = r'''(?<!\/)\/\/(?!\/)\s*(.*)'''
-        return readSingleLine(file, self.pattern_doubleNotTripleSlash, self.sign)
+        self.pattern = r'''(?<!\/)\/\/(?!\/)\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def singleQuotes(self, file):
         """
         sign: '''  '''
         """
-        self.syntax = "'''"
-        return readMultiLineSame(file, self.syntax)
+        self.sign = "'''"
+        return readMultiLineSame(file, self.sign)
 
     def doubleQuotes(self, file):
         '''
         sign: """ """
         '''
-        self.syntax = '"""'
-        return readMultiLineSame(file, self.syntax)
+        self.sign = '"""'
+        return readMultiLineSame(file, self.sign)
 
     def doubleDash(self, file):
         '''
         sign: --
         '''
         self.sign = '--'
-        self.pattern_doubleDash = r'''\-\-\s*(.*)'''
-        return readSingleLine(file, self.pattern_doubleDash, self.sign)
+        self.pattern = r'''(?<!["'`])\-\-\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def slashStar(self, file):
         '''
@@ -252,8 +278,8 @@ def tripleSlash(self, file):
         sign: ///
         '''
         self.sign = '///'
-        self.pattern_tripleSlash = r'''\/\/\/\s*(.*)'''
-        return readSingleLine(file, self.pattern_tripleSlash, self.sign)
+        self.pattern = r'''(?<!["'`])\/\/\/\s*(.*)'''
+        return readSingleLine(file, self.pattern)
 
     def slashDoubleStar(self, file):
         '''

diff --git a/nirjas/languages/ruby.py b/nirjas/languages/ruby.py
@@ -26,7 +26,7 @@
 
 def rubyExtractor(file):
     result = CommentSyntax()
-    single_line_comment = result.hash(file)
+    single_line_comment = result.hashNoCurl(file)
     multiline_comment = result.beginEnd(file)
     cont_single_line_comment = contSingleLines(single_line_comment)
     file = file.split("/")