From 2d5d0131bb8d3cf9c628fc2b545947f0e950c92e Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Tue, 11 Apr 2023 11:17:07 +0300
Subject: [PATCH 1/9] browse: (1) apply validation also to scrape_links(), (2)
 add tests for scrape_links()

---
 scripts/browse.py                 |  43 +++++++----
 tests/test_browse_scrape_links.py | 117 ++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+), 14 deletions(-)
 create mode 100644 tests/test_browse_scrape_links.py

diff --git a/scripts/browse.py b/scripts/browse.py
index 09f376a70a29..810606a33ccd 100644
--- a/scripts/browse.py
+++ b/scripts/browse.py
@@ -5,25 +5,38 @@
 
 cfg = Config()
 
-# Define and check for local file address prefixes
 def check_local_file_access(url):
+    # Define and check for local file address prefixes
     local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost']
     return any(url.startswith(prefix) for prefix in local_prefixes)
 
+def get_validated_response(url, headers=cfg.user_agent_header):
+    try:
+        # Restrict access to local files
+        if check_local_file_access(url):
+            raise ValueError('Access to local files is restricted')
+        
+        # Most basic check if the URL is valid:
+        if not url.startswith('http://') and not url.startswith('https://'):
+            raise ValueError('Invalid URL format')
+
+        # Make the HTTP request and return the response
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()  # Raise an exception if the response contains an HTTP error status code
+        return response, None
+    except ValueError as ve:
+        # Handle invalid URL format
+        return None, "Error: " + str(ve)
+
+    except requests.exceptions.RequestException as re:
+        # Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.)
+        return None, "Error: " + str(re)
+
 def scrape_text(url):
     """Scrape text from a webpage"""
-    # Most basic check if the URL is valid:
-    if not url.startswith('http'):
-        return "Error: Invalid URL"
-    
-    # Restrict access to local files
-    if check_local_file_access(url):
-        return "Error: Access to local files is restricted"
-    
-    try:
-        response = requests.get(url, headers=cfg.user_agent_header)
-    except requests.exceptions.RequestException as e:
-        return "Error: " + str(e)
+    response, error_message = get_validated_response(url)
+    if error_message:
+        return error_message
 
     # Check if the response contains an HTTP error
     if response.status_code >= 400:
@@ -60,7 +73,9 @@ def format_hyperlinks(hyperlinks):
 
 def scrape_links(url):
     """Scrape links from a webpage"""
-    response = requests.get(url, headers=cfg.user_agent_header)
+    response, error_message = get_validated_response(url)
+    if error_message:
+        return error_message
 
     # Check if the response contains an HTTP error
     if response.status_code >= 400:
diff --git a/tests/test_browse_scrape_links.py b/tests/test_browse_scrape_links.py
new file mode 100644
index 000000000000..908e43b9f1d5
--- /dev/null
+++ b/tests/test_browse_scrape_links.py
@@ -0,0 +1,117 @@
+
+# Generated by CodiumAI
+from scripts.browse import scrape_links
+
+
+# Dependencies:
+# pip install pytest-mock
+import pytest
+
+"""
+Code Analysis
+
+Objective:
+The objective of the 'scrape_links' function is to scrape hyperlinks from a given URL and return them in a formatted way.
+
+Inputs:
+- url: a string representing the URL to be scraped.
+
+Flow:
+1. Send a GET request to the given URL using the requests library and the user agent header from the config file.
+2. Check if the response contains an HTTP error. If it does, return "error".
+3. Parse the HTML content of the response using the BeautifulSoup library.
+4. Remove any script and style tags from the parsed HTML.
+5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function.
+6. Format the extracted hyperlinks using the 'format_hyperlinks' function.
+7. Return the formatted hyperlinks.
+
+Outputs:
+- A list of formatted hyperlinks.
+
+Additional aspects:
+- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP requests and parse HTML content, respectively.
+- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML.
+- The 'format_hyperlinks' function is called to format the extracted hyperlinks.
+- The function checks for HTTP errors and returns "error" if any are found.
+"""
+
+
+
+class TestScrapeLinks:
+
+    # Tests that the function returns a list of formatted hyperlinks when provided with a valid url that returns a webpage with hyperlinks. 
+    def test_valid_url_with_hyperlinks(self):
+        url = "https://www.google.com"
+        result = scrape_links(url)
+        assert len(result) > 0
+        assert isinstance(result, list)
+        assert isinstance(result[0], str)
+        
+    # Tests that the function returns correctly formatted hyperlinks when given a valid url. 
+    def test_valid_url(self, mocker):
+        # Mock the requests.get() function to return a response with sample HTML containing hyperlinks
+        mock_response = mocker.Mock()
+        mock_response.status_code = 200
+        mock_response.text = "<html><body><a href='https://www.google.com'>Google</a></body></html>"
+        mocker.patch('requests.get', return_value=mock_response)
+
+        # Call the function with a valid URL
+        result = scrape_links("https://www.example.com")
+
+        # Assert that the function returns correctly formatted hyperlinks
+        assert result == ["Google (https://www.google.com)"]
+
+    # Tests that the function returns "error" when given an invalid url. 
+    def test_invalid_url(self, mocker):
+        # Mock the requests.get() function to return an HTTP error response
+        mock_response = mocker.Mock()
+        mock_response.status_code = 404
+        mocker.patch('requests.get', return_value=mock_response)
+
+        # Call the function with an invalid URL
+        result = scrape_links("https://www.invalidurl.com")
+
+        # Assert that the function returns "error"
+        assert result == "error"
+
+    # Tests that the function returns an empty list when the html contains no hyperlinks. 
+    def test_no_hyperlinks(self, mocker):
+        # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks
+        mock_response = mocker.Mock()
+        mock_response.status_code = 200
+        mock_response.text = "<html><body><p>No hyperlinks here</p></body></html>"
+        mocker.patch('requests.get', return_value=mock_response)
+
+        # Call the function with a URL containing no hyperlinks
+        result = scrape_links("https://www.example.com")
+
+        # Assert that the function returns an empty list
+        assert result == []
+
+    # Tests that scrape_links() correctly extracts and formats hyperlinks from a sample HTML containing a few hyperlinks. 
+    def test_scrape_links_with_few_hyperlinks(self, mocker):
+        # Mock the requests.get() function to return a response with a sample HTML containing hyperlinks
+        mock_response = mocker.Mock()
+        mock_response.status_code = 200
+        mock_response.text = """
+            <html>
+                <body>
+                    <div id="google-link"><a href="https://www.google.com">Google</a></div>
+                    <div id="github"><a href="https://github.com">GitHub</a></div>
+                    <div id="CodiumAI"><a href="https://www.codium.ai">CodiumAI</a></div>
+                </body>
+            </html>
+        """
+        mocker.patch('requests.get', return_value=mock_response)
+
+        # Call the function being tested
+        result = scrape_links("https://www.example.com")
+
+        # Assert that the function returns a list of formatted hyperlinks
+        assert isinstance(result, list)
+        assert len(result) == 3
+        assert result[0] == "Google (https://www.google.com)"
+        assert result[1] == "GitHub (https://github.com)"
+        assert result[2] == "CodiumAI (https://www.codium.ai)"
+
+

From 64c21ee8f7ad80006218bfa380a8a8e59f72b55d Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Tue, 11 Apr 2023 11:40:52 +0300
Subject: [PATCH 2/9] browse: make scrape_links() & scrape_text() "status_code
 >= 400" error message the same

---
 scripts/browse.py                 | 2 +-
 tests/test_browse_scrape_links.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/browse.py b/scripts/browse.py
index 810606a33ccd..2edc156b2490 100644
--- a/scripts/browse.py
+++ b/scripts/browse.py
@@ -79,7 +79,7 @@ def scrape_links(url):
 
     # Check if the response contains an HTTP error
     if response.status_code >= 400:
-        return "error"
+        return "Error: HTTP " + str(response.status_code) + " error"
 
     soup = BeautifulSoup(response.text, "html.parser")
 
diff --git a/tests/test_browse_scrape_links.py b/tests/test_browse_scrape_links.py
index 908e43b9f1d5..48d74167f60c 100644
--- a/tests/test_browse_scrape_links.py
+++ b/tests/test_browse_scrape_links.py
@@ -72,7 +72,7 @@ def test_invalid_url(self, mocker):
         result = scrape_links("https://www.invalidurl.com")
 
         # Assert that the function returns "error"
-        assert result == "error"
+        assert "Error:" in result
 
     # Tests that the function returns an empty list when the html contains no hyperlinks. 
     def test_no_hyperlinks(self, mocker):

From 2ec42bf3e8f3acde81b423fcfe3e866543addb71 Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Wed, 12 Apr 2023 12:21:53 +0300
Subject: [PATCH 3/9] removing compliant whitespace

---
 tests/test_browse_scrape_links.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_browse_scrape_links.py b/tests/test_browse_scrape_links.py
index 48d74167f60c..b3acf394dd40 100644
--- a/tests/test_browse_scrape_links.py
+++ b/tests/test_browse_scrape_links.py
@@ -112,6 +112,4 @@ def test_scrape_links_with_few_hyperlinks(self, mocker):
         assert len(result) == 3
         assert result[0] == "Google (https://www.google.com)"
         assert result[1] == "GitHub (https://github.com)"
-        assert result[2] == "CodiumAI (https://www.codium.ai)"
-
-
+        assert result[2] == "CodiumAI (https://www.codium.ai)"
\ No newline at end of file

From c63645cbbafa3c84ee8b46d0e8806ab6d0fc4009 Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Wed, 12 Apr 2023 22:41:23 +0300
Subject: [PATCH 4/9] redo suggested changes. move unit test files to the
 fitting directory

---
 scripts/browse.py                            | 56 ++++++--------------
 tests/{ => unit}/test_browse_scrape_links.py |  4 +-
 tests/{ => unit}/test_browse_scrape_text.py  |  0
 3 files changed, 17 insertions(+), 43 deletions(-)
 rename tests/{ => unit}/test_browse_scrape_links.py (100%)
 rename tests/{ => unit}/test_browse_scrape_text.py (100%)

diff --git a/scripts/browse.py b/scripts/browse.py
index 046ff6ae9fd5..912d5635e2a6 100644
--- a/scripts/browse.py
+++ b/scripts/browse.py
@@ -19,22 +19,13 @@ def is_valid_url(url):
 def sanitize_url(url):
     return urljoin(url, urlparse(url).path)
 
-# Function to make a request with a specified timeout and handle exceptions
-def make_request(url, timeout=10):
-    try:
-        response = requests.get(url, headers=cfg.user_agent_header, timeout=timeout)
-        response.raise_for_status()
-        return response
-    except requests.exceptions.RequestException as e:
-        return "Error: " + str(e)
-
 # Define and check for local file address prefixes
 def check_local_file_access(url):
     # Define and check for local file address prefixes
     local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost']
     return any(url.startswith(prefix) for prefix in local_prefixes)
 
-def get_validated_response(url, headers=cfg.user_agent_header):
+def get_response(url, headers=cfg.user_agent_header, timeout=10):
     try:
         # Restrict access to local files
         if check_local_file_access(url):
@@ -44,9 +35,14 @@ def get_validated_response(url, headers=cfg.user_agent_header):
         if not url.startswith('http://') and not url.startswith('https://'):
             raise ValueError('Invalid URL format')
 
-        # Make the HTTP request and return the response
-        response = requests.get(url, headers=headers)
-        response.raise_for_status()  # Raise an exception if the response contains an HTTP error status code
+        sanitized_url = sanitize_url(url)
+
+        response = requests.get(sanitized_url, headers=headers, timeout=timeout)
+
+        # Check if the response contains an HTTP error
+        if response.status_code >= 400:
+            return None, "Error: HTTP " + str(response.status_code) + " error"
+        
         return response, None
     except ValueError as ve:
         # Handle invalid URL format
@@ -58,29 +54,9 @@ def get_validated_response(url, headers=cfg.user_agent_header):
 
 def scrape_text(url):
     """Scrape text from a webpage"""
-    # Basic check if the URL is valid
-    if not url.startswith('http'):
-        return "Error: Invalid URL"
-
-    # Restrict access to local files
-    if check_local_file_access(url):
-        return "Error: Access to local files is restricted"
-    
-    # Validate the input URL
-    if not is_valid_url(url):
-        # Sanitize the input URL
-        sanitized_url = sanitize_url(url)
-
-        # Make the request with a timeout and handle exceptions
-        response = make_request(sanitized_url)
-
-        if isinstance(response, str):
-            return response
-    else:
-        # Sanitize the input URL
-        sanitized_url = sanitize_url(url)
-
-        response = requests.get(sanitized_url, headers=cfg.user_agent_header)
+    response, error_message = get_response(url)
+    if error_message:
+        return error_message
 
     soup = BeautifulSoup(response.text, "html.parser")
 
@@ -113,11 +89,9 @@ def format_hyperlinks(hyperlinks):
 
 def scrape_links(url):
     """Scrape links from a webpage"""
-    response = requests.get(url, headers=cfg.user_agent_header)
-
-    # Check if the response contains an HTTP error
-    if response.status_code >= 400:
-        return "Error: HTTP " + str(response.status_code) + " error"
+    response, error_message = get_response(url)
+    if error_message:
+        return error_message
 
     soup = BeautifulSoup(response.text, "html.parser")
 
diff --git a/tests/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py
similarity index 100%
rename from tests/test_browse_scrape_links.py
rename to tests/unit/test_browse_scrape_links.py
index b3acf394dd40..fdacf4c0f155 100644
--- a/tests/test_browse_scrape_links.py
+++ b/tests/unit/test_browse_scrape_links.py
@@ -1,12 +1,12 @@
 
 # Generated by CodiumAI
-from scripts.browse import scrape_links
-
 
 # Dependencies:
 # pip install pytest-mock
 import pytest
 
+from scripts.browse import scrape_links
+
 """
 Code Analysis
 
diff --git a/tests/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py
similarity index 100%
rename from tests/test_browse_scrape_text.py
rename to tests/unit/test_browse_scrape_text.py

From 57bca3620eb5ccd941726e358b3b4c6c9db33072 Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Wed, 12 Apr 2023 23:04:43 +0300
Subject: [PATCH 5/9] minor style

---
 scripts/browse.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/scripts/browse.py b/scripts/browse.py
index 912d5635e2a6..78fe4ae252fe 100644
--- a/scripts/browse.py
+++ b/scripts/browse.py
@@ -6,7 +6,6 @@
 
 cfg = Config()
 
-
 # Function to check if the URL is valid
 def is_valid_url(url):
     try:
@@ -21,7 +20,6 @@ def sanitize_url(url):
 
 # Define and check for local file address prefixes
 def check_local_file_access(url):
-    # Define and check for local file address prefixes
     local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost']
     return any(url.startswith(prefix) for prefix in local_prefixes)
 

From a40ccc1e5de3bfbe2b6c38648e716e33b8182e3a Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Wed, 12 Apr 2023 23:53:40 +0300
Subject: [PATCH 6/9] flake8 style

---
 tests/unit/test_browse_scrape_links.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py
index fdacf4c0f155..9b69b27baead 100644
--- a/tests/unit/test_browse_scrape_links.py
+++ b/tests/unit/test_browse_scrape_links.py
@@ -112,4 +112,4 @@ def test_scrape_links_with_few_hyperlinks(self, mocker):
         assert len(result) == 3
         assert result[0] == "Google (https://www.google.com)"
         assert result[1] == "GitHub (https://github.com)"
-        assert result[2] == "CodiumAI (https://www.codium.ai)"
\ No newline at end of file
+        assert result[2] == "CodiumAI (https://www.codium.ai)"

From 9f972f4ee9c938c9e64dcc314b920e66c53b3c45 Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Thu, 13 Apr 2023 00:00:33 +0300
Subject: [PATCH 7/9] flake8 style

---
 scripts/browse.py                      |  7 ++++++-
 tests/unit/test_browse_scrape_links.py | 10 ++++++----
 tests/unit/test_browse_scrape_text.py  |  5 +++--
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/scripts/browse.py b/scripts/browse.py
index 6d473ed6bac6..e224b04d8021 100644
--- a/scripts/browse.py
+++ b/scripts/browse.py
@@ -6,6 +6,7 @@
 
 cfg = Config()
 
+
 # Function to check if the URL is valid
 def is_valid_url(url):
     try:
@@ -14,15 +15,18 @@ def is_valid_url(url):
     except ValueError:
         return False
 
+
 # Function to sanitize the URL
 def sanitize_url(url):
     return urljoin(url, urlparse(url).path)
 
+
 # Define and check for local file address prefixes
 def check_local_file_access(url):
     local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost']
     return any(url.startswith(prefix) for prefix in local_prefixes)
 
+
 def get_response(url, headers=cfg.user_agent_header, timeout=10):
     try:
         # Restrict access to local files
@@ -33,7 +37,6 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10):
         if not url.startswith('http://') and not url.startswith('https://'):
             raise ValueError('Invalid URL format')
 
-
         sanitized_url = sanitize_url(url)
 
         response = requests.get(sanitized_url, headers=headers, timeout=timeout)
@@ -51,6 +54,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10):
         # Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.)
         return None, "Error: " + str(re)
 
+
 def scrape_text(url):
     """Scrape text from a webpage"""
     response, error_message = get_response(url)
@@ -128,6 +132,7 @@ def create_message(chunk, question):
         "content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text."
     }
 
+
 def summarize_text(text, question):
     """Summarize text using the LLM model"""
     if not text:
diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py
index 9b69b27baead..639987a24197 100644
--- a/tests/unit/test_browse_scrape_links.py
+++ b/tests/unit/test_browse_scrape_links.py
@@ -11,7 +11,8 @@
 Code Analysis
 
 Objective:
-The objective of the 'scrape_links' function is to scrape hyperlinks from a given URL and return them in a formatted way.
+The objective of the 'scrape_links' function is to scrape hyperlinks from a 
+given URL and return them in a formatted way.
 
 Inputs:
 - url: a string representing the URL to be scraped.
@@ -29,17 +30,18 @@
 - A list of formatted hyperlinks.
 
 Additional aspects:
-- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP requests and parse HTML content, respectively.
+- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP 
+requests and parse HTML content, respectively.
 - The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML.
 - The 'format_hyperlinks' function is called to format the extracted hyperlinks.
 - The function checks for HTTP errors and returns "error" if any are found.
 """
 
 
-
 class TestScrapeLinks:
 
-    # Tests that the function returns a list of formatted hyperlinks when provided with a valid url that returns a webpage with hyperlinks. 
+    # Tests that the function returns a list of formatted hyperlinks when 
+    # provided with a valid url that returns a webpage with hyperlinks. 
     def test_valid_url_with_hyperlinks(self):
         url = "https://www.google.com"
         result = scrape_links(url)
diff --git a/tests/unit/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py
index 775eefcd253f..760722762327 100644
--- a/tests/unit/test_browse_scrape_text.py
+++ b/tests/unit/test_browse_scrape_text.py
@@ -2,7 +2,6 @@
 # Generated by CodiumAI
 
 import requests
-import tests.context
 
 from scripts.browse import scrape_text
 
@@ -10,7 +9,8 @@
 Code Analysis
 
 Objective:
-The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts.
+The objective of the "scrape_text" function is to scrape the text content from 
+a given URL and return it as a string, after removing any unwanted HTML tags and scripts.
 
 Inputs:
 - url: a string representing the URL of the webpage to be scraped.
@@ -33,6 +33,7 @@
 - The function uses a generator expression to split the text into lines and chunks, which can improve performance for large amounts of text.
 """
 
+
 class TestScrapeText:
 
     # Tests that scrape_text() returns the expected text when given a valid URL.

From bf3c76ced73d6620ef895e96e6c5c55e1a6e37bc Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Thu, 13 Apr 2023 00:04:08 +0300
Subject: [PATCH 8/9] flake8 style

---
 scripts/browse.py                      |  4 ++--
 tests/unit/test_browse_scrape_links.py | 19 ++++++++++---------
 tests/unit/test_browse_scrape_text.py  |  2 +-
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/scripts/browse.py b/scripts/browse.py
index e224b04d8021..c26cde65de71 100644
--- a/scripts/browse.py
+++ b/scripts/browse.py
@@ -32,7 +32,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10):
         # Restrict access to local files
         if check_local_file_access(url):
             raise ValueError('Access to local files is restricted')
-        
+       
         # Most basic check if the URL is valid:
         if not url.startswith('http://') and not url.startswith('https://'):
             raise ValueError('Invalid URL format')
@@ -44,7 +44,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10):
         # Check if the response contains an HTTP error
         if response.status_code >= 400:
             return None, "Error: HTTP " + str(response.status_code) + " error"
-        
+      
         return response, None
     except ValueError as ve:
         # Handle invalid URL format
diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py
index 639987a24197..bd57fb84d356 100644
--- a/tests/unit/test_browse_scrape_links.py
+++ b/tests/unit/test_browse_scrape_links.py
@@ -11,7 +11,7 @@
 Code Analysis
 
 Objective:
-The objective of the 'scrape_links' function is to scrape hyperlinks from a 
+The objective of the 'scrape_links' function is to scrape hyperlinks from a
 given URL and return them in a formatted way.
 
 Inputs:
@@ -30,7 +30,7 @@
 - A list of formatted hyperlinks.
 
 Additional aspects:
-- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP 
+- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP
 requests and parse HTML content, respectively.
 - The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML.
 - The 'format_hyperlinks' function is called to format the extracted hyperlinks.
@@ -40,16 +40,16 @@
 
 class TestScrapeLinks:
 
-    # Tests that the function returns a list of formatted hyperlinks when 
-    # provided with a valid url that returns a webpage with hyperlinks. 
+    # Tests that the function returns a list of formatted hyperlinks when
+    # provided with a valid url that returns a webpage with hyperlinks.
     def test_valid_url_with_hyperlinks(self):
         url = "https://www.google.com"
         result = scrape_links(url)
         assert len(result) > 0
         assert isinstance(result, list)
         assert isinstance(result[0], str)
-        
-    # Tests that the function returns correctly formatted hyperlinks when given a valid url. 
+      
+    # Tests that the function returns correctly formatted hyperlinks when given a valid url.
     def test_valid_url(self, mocker):
         # Mock the requests.get() function to return a response with sample HTML containing hyperlinks
         mock_response = mocker.Mock()
@@ -63,7 +63,7 @@ def test_valid_url(self, mocker):
         # Assert that the function returns correctly formatted hyperlinks
         assert result == ["Google (https://www.google.com)"]
 
-    # Tests that the function returns "error" when given an invalid url. 
+    # Tests that the function returns "error" when given an invalid url.
     def test_invalid_url(self, mocker):
         # Mock the requests.get() function to return an HTTP error response
         mock_response = mocker.Mock()
@@ -76,7 +76,7 @@ def test_invalid_url(self, mocker):
         # Assert that the function returns "error"
         assert "Error:" in result
 
-    # Tests that the function returns an empty list when the html contains no hyperlinks. 
+    # Tests that the function returns an empty list when the html contains no hyperlinks.
     def test_no_hyperlinks(self, mocker):
         # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks
         mock_response = mocker.Mock()
@@ -90,7 +90,8 @@ def test_no_hyperlinks(self, mocker):
         # Assert that the function returns an empty list
         assert result == []
 
-    # Tests that scrape_links() correctly extracts and formats hyperlinks from a sample HTML containing a few hyperlinks. 
+    # Tests that scrape_links() correctly extracts and formats hyperlinks from 
+    # a sample HTML containing a few hyperlinks.
     def test_scrape_links_with_few_hyperlinks(self, mocker):
         # Mock the requests.get() function to return a response with a sample HTML containing hyperlinks
         mock_response = mocker.Mock()
diff --git a/tests/unit/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py
index 760722762327..9385cde71b07 100644
--- a/tests/unit/test_browse_scrape_text.py
+++ b/tests/unit/test_browse_scrape_text.py
@@ -9,7 +9,7 @@
 Code Analysis
 
 Objective:
-The objective of the "scrape_text" function is to scrape the text content from 
+The objective of the "scrape_text" function is to scrape the text content from
 a given URL and return it as a string, after removing any unwanted HTML tags and scripts.
 
 Inputs:

From 3e53e976a52004179a9fc42b224a4b3bcdd8738a Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Thu, 13 Apr 2023 00:06:23 +0300
Subject: [PATCH 9/9] flake8 style

---
 scripts/browse.py                      | 4 ++--
 tests/unit/test_browse_scrape_links.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/browse.py b/scripts/browse.py
index c26cde65de71..9e93c55a338b 100644
--- a/scripts/browse.py
+++ b/scripts/browse.py
@@ -32,7 +32,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10):
         # Restrict access to local files
         if check_local_file_access(url):
             raise ValueError('Access to local files is restricted')
-       
+
         # Most basic check if the URL is valid:
         if not url.startswith('http://') and not url.startswith('https://'):
             raise ValueError('Invalid URL format')
@@ -44,7 +44,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10):
         # Check if the response contains an HTTP error
         if response.status_code >= 400:
             return None, "Error: HTTP " + str(response.status_code) + " error"
-      
+
         return response, None
     except ValueError as ve:
         # Handle invalid URL format
diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py
index bd57fb84d356..2172d1a2bef6 100644
--- a/tests/unit/test_browse_scrape_links.py
+++ b/tests/unit/test_browse_scrape_links.py
@@ -48,7 +48,7 @@ def test_valid_url_with_hyperlinks(self):
         assert len(result) > 0
         assert isinstance(result, list)
         assert isinstance(result[0], str)
-      
+
     # Tests that the function returns correctly formatted hyperlinks when given a valid url.
     def test_valid_url(self, mocker):
         # Mock the requests.get() function to return a response with sample HTML containing hyperlinks
@@ -90,7 +90,7 @@ def test_no_hyperlinks(self, mocker):
         # Assert that the function returns an empty list
         assert result == []
 
-    # Tests that scrape_links() correctly extracts and formats hyperlinks from 
+    # Tests that scrape_links() correctly extracts and formats hyperlinks from
     # a sample HTML containing a few hyperlinks.
     def test_scrape_links_with_few_hyperlinks(self, mocker):
         # Mock the requests.get() function to return a response with a sample HTML containing hyperlinks