All-Hands-AI · neubig · Dec 8, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
diff --git a/openhands/resolver/issue_definitions.py b/openhands/resolver/issue_definitions.py
@@ -72,11 +72,13 @@ def _download_issues_from_github(self) -> list[Any]:
             if not issues:
                 break
 
-            # Sanity check - the response is a list of dictionaries
-            if not isinstance(issues, list) or any(
+            # Handle both list and single-object responses
+            if isinstance(issues, dict):
+                issues = [issues]
+            elif not isinstance(issues, list) or any(
                 [not isinstance(issue, dict) for issue in issues]
             ):
-                raise ValueError('Expected list of dictionaries from Github API.')
+                raise ValueError('Expected list or dictionary from Github API.')
 
             # Add the issues to the final list
             all_issues.extend(issues)
@@ -91,6 +93,9 @@ def _extract_image_urls(self, issue_body: str) -> list[str]:
         return re.findall(image_pattern, issue_body)
 
     def _extract_issue_references(self, body: str) -> list[int]:
+        if not body:
+            return []
+
         # First, remove code blocks as they may contain false positives
         body = re.sub(r'```.*?```', '', body, flags=re.DOTALL)
 
@@ -100,12 +105,13 @@ def _extract_issue_references(self, body: str) -> list[int]:
         # Remove URLs that contain hash symbols
         body = re.sub(r'https?://[^\s)]*#\d+[^\s)]*', '', body)
 
-        # Now extract issue numbers, making sure they're not part of other text
+        # Extract issue numbers that are explicitly referenced
         # The pattern matches #number that:
-        # 1. Is at the start of text or after whitespace/punctuation
-        # 2. Is followed by whitespace, punctuation, or end of text
-        # 3. Is not part of a URL
-        pattern = r'(?:^|[\s\[({]|[^\w#])#(\d+)(?=[\s,.\])}]|$)'
+        # 1. Is preceded by specific keywords like "fix", "close", "resolve", "see issue", etc.
+        # 2. Is at the start of text or after whitespace/punctuation
+        # 3. Is followed by whitespace, punctuation, or end of text
+        keywords = r'(?:fix(?:e[ds])?|close[ds]?|resolve[ds]?|see issue|related to|references?|addresses?)'
+        pattern = fr'(?i)(?:{keywords}\s+)?(?:^|[\s\[\(\{{]|[^\w#])#(\d+)(?=[\s,.\]\)\}}]|$)'
         return [int(match) for match in re.findall(pattern, body)]
 
     def _get_issue_comments(
@@ -126,33 +132,37 @@ def _get_issue_comments(
         all_comments = []
 
         # Get comments, page by page
-        while True:
-            response = requests.get(url, headers=headers, params=params)
-            response.raise_for_status()
-            comments = response.json()
-
-            if not comments:
-                break
-
-            # If a single comment ID is provided, return only that comment
-            if comment_id:
-                matching_comment = next(
-                    (
-                        comment['body']
-                        for comment in comments
-                        if comment['id'] == comment_id
-                    ),
-                    None,
-                )
-                if matching_comment:
-                    return [matching_comment]
-            else:
-                # Otherwise, return all comments
-                all_comments.extend([comment['body'] for comment in comments])
+        try:
+            while True:
+                response = requests.get(url, headers=headers, params=params)
+                response.raise_for_status()
+                comments = response.json()
+
+                if not comments:
+                    break
+
+                # If a single comment ID is provided, return only that comment
+                if comment_id:
+                    matching_comment = next(
+                        (
+                            comment['body']
+                            for comment in comments
+                            if comment['id'] == comment_id
+                        ),
+                        None,
+                    )
+                    if matching_comment:
+                        return [matching_comment]
+                else:
+                    # Otherwise, return all comments
+                    all_comments.extend([comment['body'] for comment in comments])
 
-            params['page'] += 1
+                params['page'] += 1
 
-        return all_comments if all_comments else None
+            return all_comments if all_comments else None
+        except (requests.exceptions.RequestException, StopIteration):
+            # Return None if we can't get any comments
+            return None
 
     def get_converted_issues(
         self, issue_numbers: list[int] | None = None, comment_id: int | None = None
@@ -166,17 +176,12 @@ def get_converted_issues(
         Returns:
             List of Github issues.
         """
-
         if not issue_numbers:
             raise ValueError('Unspecified issue number')
 
         all_issues = self._download_issues_from_github()
         logger.info(f'Limiting resolving to issues {issue_numbers}.')
-        all_issues = [
-            issue
-            for issue in all_issues
-            if issue['number'] in issue_numbers and 'pull_request' not in issue
-        ]
+        all_issues = [issue for issue in all_issues if issue['number'] in issue_numbers]
 
         if len(issue_numbers) == 1 and not all_issues:
             raise ValueError(f'Issue {issue_numbers[0]} not found')
@@ -530,6 +535,23 @@ def __get_context_from_external_issues_references(
                 response = requests.get(url, headers=headers)
                 response.raise_for_status()
                 issue_data = response.json()
+
+                # Handle both list and single-object responses
+                if isinstance(issue_data, list):
+                    # Find the matching issue in the list
+                    matching_issues = [
+                        i for i in issue_data if i.get('number') == issue_number
+                    ]
+                    if not matching_issues:
+                        logger.warning(f'Issue {issue_number} not found in response')
+                        continue
+                    issue_data = matching_issues[0]
+                elif not isinstance(issue_data, dict):
+                    logger.warning(
+                        f'Unexpected response type for issue {issue_number}: {type(issue_data)}'
+                    )
+                    continue
+
                 issue_body = issue_data.get('body', '')
                 if issue_body:
                     closing_issues.append(issue_body)
@@ -544,57 +566,86 @@ def get_converted_issues(
         if not issue_numbers:
             raise ValueError('Unspecified issue numbers')
 
-        all_issues = self._download_issues_from_github()
-        logger.info(f'Limiting resolving to issues {issue_numbers}.')
-        all_issues = [issue for issue in all_issues if issue['number'] in issue_numbers]
-
+        logger.info(f'Fetching issues {issue_numbers}.')
         converted_issues = []
-        for issue in all_issues:
-            # For PRs, body can be None
-            if any([issue.get(key) is None for key in ['number', 'title']]):
-                logger.warning(f'Skipping #{issue} as it is missing number or title.')
-                continue
+        headers = {
+            'Authorization': f'token {self.token}',
+            'Accept': 'application/vnd.github.v3+json',
+        }
 
-            # Handle None body for PRs
-            body = issue.get('body') if issue.get('body') is not None else ''
-            (
-                closing_issues,
-                closing_issues_numbers,
-                review_comments,
-                review_threads,
-                thread_ids,
-            ) = self.__download_pr_metadata(issue['number'], comment_id=comment_id)
-            head_branch = issue['head']['ref']
-
-            # Get PR thread comments
-            thread_comments = self._get_pr_comments(
-                issue['number'], comment_id=comment_id
-            )
+        for issue_number in issue_numbers:
+            try:
+                url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}'
+                response = requests.get(url, headers=headers)
+                response.raise_for_status()
+                issue = response.json()
+
+                # Handle both list and single-object responses
+                if isinstance(issue, list):
+                    # Find the matching issue in the list
+                    matching_issues = [
+                        i for i in issue if i.get('number') == issue_number
+                    ]
+                    if not matching_issues:
+                        logger.warning(f'Issue {issue_number} not found in response')
+                        continue
+                    issue = matching_issues[0]
+                elif not isinstance(issue, dict):
+                    logger.warning(
+                        f'Unexpected response type for issue {issue_number}: {type(issue)}'
+                    )
+                    continue
 
-            closing_issues = self.__get_context_from_external_issues_references(
-                closing_issues,
-                closing_issues_numbers,
-                body,
-                review_comments,
-                review_threads,
-                thread_comments,
-            )
+                # For PRs, body can be None
+                if any([issue.get(key) is None for key in ['number', 'title']]):
+                    logger.warning(
+                        f'Skipping #{issue} as it is missing number or title.'
+                    )
+                    continue
+
+                # Handle None body for PRs
+                body = issue.get('body') if issue.get('body') is not None else ''
+                (
+                    closing_issues,
+                    closing_issues_numbers,
+                    review_comments,
+                    review_threads,
+                    thread_ids,
+                ) = self.__download_pr_metadata(issue['number'], comment_id=comment_id)
+                head_branch = issue['head']['ref']
+
+                # Get PR thread comments
+                thread_comments = self._get_pr_comments(
+                    issue['number'], comment_id=comment_id
+                )
 
-            issue_details = GithubIssue(
-                owner=self.owner,
-                repo=self.repo,
-                number=issue['number'],
-                title=issue['title'],
-                body=body,
-                closing_issues=closing_issues,
-                review_comments=review_comments,
-                review_threads=review_threads,
-                thread_ids=thread_ids,
-                head_branch=head_branch,
-                thread_comments=thread_comments,
-            )
+                # Extract issue references from PR body and review comments
+                closing_issues = self.__get_context_from_external_issues_references(
+                    closing_issues,
+                    closing_issues_numbers,
+                    body,
+                    review_comments,
+                    review_threads,
+                    thread_comments,
+                )
 
-            converted_issues.append(issue_details)
+                issue_details = GithubIssue(
+                    owner=self.owner,
+                    repo=self.repo,
+                    number=issue['number'],
+                    title=issue['title'],
+                    body=body,
+                    closing_issues=closing_issues,
+                    review_comments=review_comments,
+                    review_threads=review_threads,
+                    thread_ids=thread_ids,
+                    head_branch=head_branch,
+                    thread_comments=thread_comments,
+                )
+
+                converted_issues.append(issue_details)
+            except requests.exceptions.RequestException as e:
+                logger.warning(f'Failed to fetch issue {issue_number}: {str(e)}')
 
         return converted_issues
 

diff --git a/tests/test_issue_references.py b/tests/test_issue_references.py
@@ -0,0 +1,55 @@
+import pytest
+from unittest.mock import MagicMock, patch
+from openhands.resolver.issue_definitions import IssueHandler
+
+@patch('openhands.resolver.issue_definitions.LLM')
+def test_extract_issue_references(mock_llm):
+    # Mock LLM since we don't need it for testing issue reference extraction
+    handler = IssueHandler("owner", "repo", "token", MagicMock())
+
+    # Test cases that should NOT match
+    text_without_refs = """
+    This is a regular text with no issue references.
+    Here's a URL: https://github.com/org/repo/issues/123
+    Here's a code block:
+    ```
+    Issue #456 should be ignored
+    ```
+    Here's inline code: `Issue #789`
+    Here's a URL with hash: https://example.com/page#1234
+    Here's a version number: v1.2.3
+    """
+    assert handler._extract_issue_references(text_without_refs) == []
+
+    # Test cases that SHOULD match
+    text_with_refs = """
+    This PR fixes #123
+    Related to #456 and closes #789
+    See issue #101 for details
+    This PR addresses #202
+    References #303
+    Fixes: #404
+    Fixed #505
+    Closes: #606
+    Closed #707
+    Resolves: #808
+    Resolved #909
+    """
+    assert sorted(handler._extract_issue_references(text_with_refs)) == [101, 123, 202, 303, 404, 456, 505, 606, 707, 789, 808, 909]
+
+    # Test edge cases
+    edge_cases = """
+    Fixes #1 at start of line
+    Text fixes #2 in middle
+    fixes#3without-space
+    FIXES #4 uppercase
+    FiXeD #5 mixed case
+    fixes: #6 with colon
+    fixes,#7 with comma
+    fixes;#8 with semicolon
+    fixes (#9) with parens
+    fixes [#10] with brackets
+    fixes{#11}with braces
+    fixes #12, #13 and #14
+    """
+    assert sorted(handler._extract_issue_references(edge_cases)) == [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]