From 2d5d0131bb8d3cf9c628fc2b545947f0e950c92e Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Tue, 11 Apr 2023 11:17:07 +0300 Subject: [PATCH 1/9] browse: (1) apply validation also to scrape_links(), (2) add tests for scrape_links() --- scripts/browse.py | 43 +++++++---- tests/test_browse_scrape_links.py | 117 ++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 14 deletions(-) create mode 100644 tests/test_browse_scrape_links.py diff --git a/scripts/browse.py b/scripts/browse.py index 09f376a70a29..810606a33ccd 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -5,25 +5,38 @@ cfg = Config() -# Define and check for local file address prefixes def check_local_file_access(url): + # Define and check for local file address prefixes local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost'] return any(url.startswith(prefix) for prefix in local_prefixes) +def get_validated_response(url, headers=cfg.user_agent_header): + try: + # Restrict access to local files + if check_local_file_access(url): + raise ValueError('Access to local files is restricted') + + # Most basic check if the URL is valid: + if not url.startswith('http://') and not url.startswith('https://'): + raise ValueError('Invalid URL format') + + # Make the HTTP request and return the response + response = requests.get(url, headers=headers) + response.raise_for_status() # Raise an exception if the response contains an HTTP error status code + return response, None + except ValueError as ve: + # Handle invalid URL format + return None, "Error: " + str(ve) + + except requests.exceptions.RequestException as re: + # Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.) + return None, "Error: " + str(re) + def scrape_text(url): """Scrape text from a webpage""" - # Most basic check if the URL is valid: - if not url.startswith('http'): - return "Error: Invalid URL" - - # Restrict access to local files - if check_local_file_access(url): - return "Error: Access to local files is restricted" - - try: - response = requests.get(url, headers=cfg.user_agent_header) - except requests.exceptions.RequestException as e: - return "Error: " + str(e) + response, error_message = get_validated_response(url) + if error_message: + return error_message # Check if the response contains an HTTP error if response.status_code >= 400: @@ -60,7 +73,9 @@ def format_hyperlinks(hyperlinks): def scrape_links(url): """Scrape links from a webpage""" - response = requests.get(url, headers=cfg.user_agent_header) + response, error_message = get_validated_response(url) + if error_message: + return error_message # Check if the response contains an HTTP error if response.status_code >= 400: diff --git a/tests/test_browse_scrape_links.py b/tests/test_browse_scrape_links.py new file mode 100644 index 000000000000..908e43b9f1d5 --- /dev/null +++ b/tests/test_browse_scrape_links.py @@ -0,0 +1,117 @@ + +# Generated by CodiumAI +from scripts.browse import scrape_links + + +# Dependencies: +# pip install pytest-mock +import pytest + +""" +Code Analysis + +Objective: +The objective of the 'scrape_links' function is to scrape hyperlinks from a given URL and return them in a formatted way. + +Inputs: +- url: a string representing the URL to be scraped. + +Flow: +1. Send a GET request to the given URL using the requests library and the user agent header from the config file. +2. Check if the response contains an HTTP error. If it does, return "error". +3. Parse the HTML content of the response using the BeautifulSoup library. +4. Remove any script and style tags from the parsed HTML. +5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function. +6. Format the extracted hyperlinks using the 'format_hyperlinks' function. +7. Return the formatted hyperlinks. + +Outputs: +- A list of formatted hyperlinks. + +Additional aspects: +- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP requests and parse HTML content, respectively. +- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML. +- The 'format_hyperlinks' function is called to format the extracted hyperlinks. +- The function checks for HTTP errors and returns "error" if any are found. +""" + + + +class TestScrapeLinks: + + # Tests that the function returns a list of formatted hyperlinks when provided with a valid url that returns a webpage with hyperlinks. + def test_valid_url_with_hyperlinks(self): + url = "https://www.google.com" + result = scrape_links(url) + assert len(result) > 0 + assert isinstance(result, list) + assert isinstance(result[0], str) + + # Tests that the function returns correctly formatted hyperlinks when given a valid url. + def test_valid_url(self, mocker): + # Mock the requests.get() function to return a response with sample HTML containing hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "Google" + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with a valid URL + result = scrape_links("https://www.example.com") + + # Assert that the function returns correctly formatted hyperlinks + assert result == ["Google (https://www.google.com)"] + + # Tests that the function returns "error" when given an invalid url. + def test_invalid_url(self, mocker): + # Mock the requests.get() function to return an HTTP error response + mock_response = mocker.Mock() + mock_response.status_code = 404 + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with an invalid URL + result = scrape_links("https://www.invalidurl.com") + + # Assert that the function returns "error" + assert result == "error" + + # Tests that the function returns an empty list when the html contains no hyperlinks. + def test_no_hyperlinks(self, mocker): + # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "

No hyperlinks here

" + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with a URL containing no hyperlinks + result = scrape_links("https://www.example.com") + + # Assert that the function returns an empty list + assert result == [] + + # Tests that scrape_links() correctly extracts and formats hyperlinks from a sample HTML containing a few hyperlinks. + def test_scrape_links_with_few_hyperlinks(self, mocker): + # Mock the requests.get() function to return a response with a sample HTML containing hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = """ + + + +
GitHub
+
CodiumAI
+ + + """ + mocker.patch('requests.get', return_value=mock_response) + + # Call the function being tested + result = scrape_links("https://www.example.com") + + # Assert that the function returns a list of formatted hyperlinks + assert isinstance(result, list) + assert len(result) == 3 + assert result[0] == "Google (https://www.google.com)" + assert result[1] == "GitHub (https://github.com)" + assert result[2] == "CodiumAI (https://www.codium.ai)" + + From 64c21ee8f7ad80006218bfa380a8a8e59f72b55d Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Tue, 11 Apr 2023 11:40:52 +0300 Subject: [PATCH 2/9] browse: make scrape_links() & scrape_text() "status_code >= 400" error message the same --- scripts/browse.py | 2 +- tests/test_browse_scrape_links.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/browse.py b/scripts/browse.py index 810606a33ccd..2edc156b2490 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -79,7 +79,7 @@ def scrape_links(url): # Check if the response contains an HTTP error if response.status_code >= 400: - return "error" + return "Error: HTTP " + str(response.status_code) + " error" soup = BeautifulSoup(response.text, "html.parser") diff --git a/tests/test_browse_scrape_links.py b/tests/test_browse_scrape_links.py index 908e43b9f1d5..48d74167f60c 100644 --- a/tests/test_browse_scrape_links.py +++ b/tests/test_browse_scrape_links.py @@ -72,7 +72,7 @@ def test_invalid_url(self, mocker): result = scrape_links("https://www.invalidurl.com") # Assert that the function returns "error" - assert result == "error" + assert "Error:" in result # Tests that the function returns an empty list when the html contains no hyperlinks. def test_no_hyperlinks(self, mocker): From 2ec42bf3e8f3acde81b423fcfe3e866543addb71 Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Wed, 12 Apr 2023 12:21:53 +0300 Subject: [PATCH 3/9] removing compliant whitespace --- tests/test_browse_scrape_links.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_browse_scrape_links.py b/tests/test_browse_scrape_links.py index 48d74167f60c..b3acf394dd40 100644 --- a/tests/test_browse_scrape_links.py +++ b/tests/test_browse_scrape_links.py @@ -112,6 +112,4 @@ def test_scrape_links_with_few_hyperlinks(self, mocker): assert len(result) == 3 assert result[0] == "Google (https://www.google.com)" assert result[1] == "GitHub (https://github.com)" - assert result[2] == "CodiumAI (https://www.codium.ai)" - - + assert result[2] == "CodiumAI (https://www.codium.ai)" \ No newline at end of file From c63645cbbafa3c84ee8b46d0e8806ab6d0fc4009 Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Wed, 12 Apr 2023 22:41:23 +0300 Subject: [PATCH 4/9] redo suggested changes. move unit test files to the fitting directory --- scripts/browse.py | 56 ++++++-------------- tests/{ => unit}/test_browse_scrape_links.py | 4 +- tests/{ => unit}/test_browse_scrape_text.py | 0 3 files changed, 17 insertions(+), 43 deletions(-) rename tests/{ => unit}/test_browse_scrape_links.py (100%) rename tests/{ => unit}/test_browse_scrape_text.py (100%) diff --git a/scripts/browse.py b/scripts/browse.py index 046ff6ae9fd5..912d5635e2a6 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -19,22 +19,13 @@ def is_valid_url(url): def sanitize_url(url): return urljoin(url, urlparse(url).path) -# Function to make a request with a specified timeout and handle exceptions -def make_request(url, timeout=10): - try: - response = requests.get(url, headers=cfg.user_agent_header, timeout=timeout) - response.raise_for_status() - return response - except requests.exceptions.RequestException as e: - return "Error: " + str(e) - # Define and check for local file address prefixes def check_local_file_access(url): # Define and check for local file address prefixes local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost'] return any(url.startswith(prefix) for prefix in local_prefixes) -def get_validated_response(url, headers=cfg.user_agent_header): +def get_response(url, headers=cfg.user_agent_header, timeout=10): try: # Restrict access to local files if check_local_file_access(url): @@ -44,9 +35,14 @@ def get_validated_response(url, headers=cfg.user_agent_header): if not url.startswith('http://') and not url.startswith('https://'): raise ValueError('Invalid URL format') - # Make the HTTP request and return the response - response = requests.get(url, headers=headers) - response.raise_for_status() # Raise an exception if the response contains an HTTP error status code + sanitized_url = sanitize_url(url) + + response = requests.get(sanitized_url, headers=headers, timeout=timeout) + + # Check if the response contains an HTTP error + if response.status_code >= 400: + return None, "Error: HTTP " + str(response.status_code) + " error" + return response, None except ValueError as ve: # Handle invalid URL format @@ -58,29 +54,9 @@ def get_validated_response(url, headers=cfg.user_agent_header): def scrape_text(url): """Scrape text from a webpage""" - # Basic check if the URL is valid - if not url.startswith('http'): - return "Error: Invalid URL" - - # Restrict access to local files - if check_local_file_access(url): - return "Error: Access to local files is restricted" - - # Validate the input URL - if not is_valid_url(url): - # Sanitize the input URL - sanitized_url = sanitize_url(url) - - # Make the request with a timeout and handle exceptions - response = make_request(sanitized_url) - - if isinstance(response, str): - return response - else: - # Sanitize the input URL - sanitized_url = sanitize_url(url) - - response = requests.get(sanitized_url, headers=cfg.user_agent_header) + response, error_message = get_response(url) + if error_message: + return error_message soup = BeautifulSoup(response.text, "html.parser") @@ -113,11 +89,9 @@ def format_hyperlinks(hyperlinks): def scrape_links(url): """Scrape links from a webpage""" - response = requests.get(url, headers=cfg.user_agent_header) - - # Check if the response contains an HTTP error - if response.status_code >= 400: - return "Error: HTTP " + str(response.status_code) + " error" + response, error_message = get_response(url) + if error_message: + return error_message soup = BeautifulSoup(response.text, "html.parser") diff --git a/tests/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py similarity index 100% rename from tests/test_browse_scrape_links.py rename to tests/unit/test_browse_scrape_links.py index b3acf394dd40..fdacf4c0f155 100644 --- a/tests/test_browse_scrape_links.py +++ b/tests/unit/test_browse_scrape_links.py @@ -1,12 +1,12 @@ # Generated by CodiumAI -from scripts.browse import scrape_links - # Dependencies: # pip install pytest-mock import pytest +from scripts.browse import scrape_links + """ Code Analysis diff --git a/tests/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py similarity index 100% rename from tests/test_browse_scrape_text.py rename to tests/unit/test_browse_scrape_text.py From 57bca3620eb5ccd941726e358b3b4c6c9db33072 Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Wed, 12 Apr 2023 23:04:43 +0300 Subject: [PATCH 5/9] minor style --- scripts/browse.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/browse.py b/scripts/browse.py index 912d5635e2a6..78fe4ae252fe 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -6,7 +6,6 @@ cfg = Config() - # Function to check if the URL is valid def is_valid_url(url): try: @@ -21,7 +20,6 @@ def sanitize_url(url): # Define and check for local file address prefixes def check_local_file_access(url): - # Define and check for local file address prefixes local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost'] return any(url.startswith(prefix) for prefix in local_prefixes) From a40ccc1e5de3bfbe2b6c38648e716e33b8182e3a Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Wed, 12 Apr 2023 23:53:40 +0300 Subject: [PATCH 6/9] flake8 style --- tests/unit/test_browse_scrape_links.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py index fdacf4c0f155..9b69b27baead 100644 --- a/tests/unit/test_browse_scrape_links.py +++ b/tests/unit/test_browse_scrape_links.py @@ -112,4 +112,4 @@ def test_scrape_links_with_few_hyperlinks(self, mocker): assert len(result) == 3 assert result[0] == "Google (https://www.google.com)" assert result[1] == "GitHub (https://github.com)" - assert result[2] == "CodiumAI (https://www.codium.ai)" \ No newline at end of file + assert result[2] == "CodiumAI (https://www.codium.ai)" From 9f972f4ee9c938c9e64dcc314b920e66c53b3c45 Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Thu, 13 Apr 2023 00:00:33 +0300 Subject: [PATCH 7/9] flake8 style --- scripts/browse.py | 7 ++++++- tests/unit/test_browse_scrape_links.py | 10 ++++++---- tests/unit/test_browse_scrape_text.py | 5 +++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/scripts/browse.py b/scripts/browse.py index 6d473ed6bac6..e224b04d8021 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -6,6 +6,7 @@ cfg = Config() + # Function to check if the URL is valid def is_valid_url(url): try: @@ -14,15 +15,18 @@ def is_valid_url(url): except ValueError: return False + # Function to sanitize the URL def sanitize_url(url): return urljoin(url, urlparse(url).path) + # Define and check for local file address prefixes def check_local_file_access(url): local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost'] return any(url.startswith(prefix) for prefix in local_prefixes) + def get_response(url, headers=cfg.user_agent_header, timeout=10): try: # Restrict access to local files @@ -33,7 +37,6 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10): if not url.startswith('http://') and not url.startswith('https://'): raise ValueError('Invalid URL format') - sanitized_url = sanitize_url(url) response = requests.get(sanitized_url, headers=headers, timeout=timeout) @@ -51,6 +54,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10): # Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.) return None, "Error: " + str(re) + def scrape_text(url): """Scrape text from a webpage""" response, error_message = get_response(url) @@ -128,6 +132,7 @@ def create_message(chunk, question): "content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text." } + def summarize_text(text, question): """Summarize text using the LLM model""" if not text: diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py index 9b69b27baead..639987a24197 100644 --- a/tests/unit/test_browse_scrape_links.py +++ b/tests/unit/test_browse_scrape_links.py @@ -11,7 +11,8 @@ Code Analysis Objective: -The objective of the 'scrape_links' function is to scrape hyperlinks from a given URL and return them in a formatted way. +The objective of the 'scrape_links' function is to scrape hyperlinks from a +given URL and return them in a formatted way. Inputs: - url: a string representing the URL to be scraped. @@ -29,17 +30,18 @@ - A list of formatted hyperlinks. Additional aspects: -- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP requests and parse HTML content, respectively. +- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP +requests and parse HTML content, respectively. - The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML. - The 'format_hyperlinks' function is called to format the extracted hyperlinks. - The function checks for HTTP errors and returns "error" if any are found. """ - class TestScrapeLinks: - # Tests that the function returns a list of formatted hyperlinks when provided with a valid url that returns a webpage with hyperlinks. + # Tests that the function returns a list of formatted hyperlinks when + # provided with a valid url that returns a webpage with hyperlinks. def test_valid_url_with_hyperlinks(self): url = "https://www.google.com" result = scrape_links(url) diff --git a/tests/unit/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py index 775eefcd253f..760722762327 100644 --- a/tests/unit/test_browse_scrape_text.py +++ b/tests/unit/test_browse_scrape_text.py @@ -2,7 +2,6 @@ # Generated by CodiumAI import requests -import tests.context from scripts.browse import scrape_text @@ -10,7 +9,8 @@ Code Analysis Objective: -The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts. +The objective of the "scrape_text" function is to scrape the text content from +a given URL and return it as a string, after removing any unwanted HTML tags and scripts. Inputs: - url: a string representing the URL of the webpage to be scraped. @@ -33,6 +33,7 @@ - The function uses a generator expression to split the text into lines and chunks, which can improve performance for large amounts of text. """ + class TestScrapeText: # Tests that scrape_text() returns the expected text when given a valid URL. From bf3c76ced73d6620ef895e96e6c5c55e1a6e37bc Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Thu, 13 Apr 2023 00:04:08 +0300 Subject: [PATCH 8/9] flake8 style --- scripts/browse.py | 4 ++-- tests/unit/test_browse_scrape_links.py | 19 ++++++++++--------- tests/unit/test_browse_scrape_text.py | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/scripts/browse.py b/scripts/browse.py index e224b04d8021..c26cde65de71 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -32,7 +32,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10): # Restrict access to local files if check_local_file_access(url): raise ValueError('Access to local files is restricted') - + # Most basic check if the URL is valid: if not url.startswith('http://') and not url.startswith('https://'): raise ValueError('Invalid URL format') @@ -44,7 +44,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10): # Check if the response contains an HTTP error if response.status_code >= 400: return None, "Error: HTTP " + str(response.status_code) + " error" - + return response, None except ValueError as ve: # Handle invalid URL format diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py index 639987a24197..bd57fb84d356 100644 --- a/tests/unit/test_browse_scrape_links.py +++ b/tests/unit/test_browse_scrape_links.py @@ -11,7 +11,7 @@ Code Analysis Objective: -The objective of the 'scrape_links' function is to scrape hyperlinks from a +The objective of the 'scrape_links' function is to scrape hyperlinks from a given URL and return them in a formatted way. Inputs: @@ -30,7 +30,7 @@ - A list of formatted hyperlinks. Additional aspects: -- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP +- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP requests and parse HTML content, respectively. - The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML. - The 'format_hyperlinks' function is called to format the extracted hyperlinks. @@ -40,16 +40,16 @@ class TestScrapeLinks: - # Tests that the function returns a list of formatted hyperlinks when - # provided with a valid url that returns a webpage with hyperlinks. + # Tests that the function returns a list of formatted hyperlinks when + # provided with a valid url that returns a webpage with hyperlinks. def test_valid_url_with_hyperlinks(self): url = "https://www.google.com" result = scrape_links(url) assert len(result) > 0 assert isinstance(result, list) assert isinstance(result[0], str) - - # Tests that the function returns correctly formatted hyperlinks when given a valid url. + + # Tests that the function returns correctly formatted hyperlinks when given a valid url. def test_valid_url(self, mocker): # Mock the requests.get() function to return a response with sample HTML containing hyperlinks mock_response = mocker.Mock() @@ -63,7 +63,7 @@ def test_valid_url(self, mocker): # Assert that the function returns correctly formatted hyperlinks assert result == ["Google (https://www.google.com)"] - # Tests that the function returns "error" when given an invalid url. + # Tests that the function returns "error" when given an invalid url. def test_invalid_url(self, mocker): # Mock the requests.get() function to return an HTTP error response mock_response = mocker.Mock() @@ -76,7 +76,7 @@ def test_invalid_url(self, mocker): # Assert that the function returns "error" assert "Error:" in result - # Tests that the function returns an empty list when the html contains no hyperlinks. + # Tests that the function returns an empty list when the html contains no hyperlinks. def test_no_hyperlinks(self, mocker): # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks mock_response = mocker.Mock() @@ -90,7 +90,8 @@ def test_no_hyperlinks(self, mocker): # Assert that the function returns an empty list assert result == [] - # Tests that scrape_links() correctly extracts and formats hyperlinks from a sample HTML containing a few hyperlinks. + # Tests that scrape_links() correctly extracts and formats hyperlinks from + # a sample HTML containing a few hyperlinks. def test_scrape_links_with_few_hyperlinks(self, mocker): # Mock the requests.get() function to return a response with a sample HTML containing hyperlinks mock_response = mocker.Mock() diff --git a/tests/unit/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py index 760722762327..9385cde71b07 100644 --- a/tests/unit/test_browse_scrape_text.py +++ b/tests/unit/test_browse_scrape_text.py @@ -9,7 +9,7 @@ Code Analysis Objective: -The objective of the "scrape_text" function is to scrape the text content from +The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts. Inputs: From 3e53e976a52004179a9fc42b224a4b3bcdd8738a Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Thu, 13 Apr 2023 00:06:23 +0300 Subject: [PATCH 9/9] flake8 style --- scripts/browse.py | 4 ++-- tests/unit/test_browse_scrape_links.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/browse.py b/scripts/browse.py index c26cde65de71..9e93c55a338b 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -32,7 +32,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10): # Restrict access to local files if check_local_file_access(url): raise ValueError('Access to local files is restricted') - + # Most basic check if the URL is valid: if not url.startswith('http://') and not url.startswith('https://'): raise ValueError('Invalid URL format') @@ -44,7 +44,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10): # Check if the response contains an HTTP error if response.status_code >= 400: return None, "Error: HTTP " + str(response.status_code) + " error" - + return response, None except ValueError as ve: # Handle invalid URL format diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py index bd57fb84d356..2172d1a2bef6 100644 --- a/tests/unit/test_browse_scrape_links.py +++ b/tests/unit/test_browse_scrape_links.py @@ -48,7 +48,7 @@ def test_valid_url_with_hyperlinks(self): assert len(result) > 0 assert isinstance(result, list) assert isinstance(result[0], str) - + # Tests that the function returns correctly formatted hyperlinks when given a valid url. def test_valid_url(self, mocker): # Mock the requests.get() function to return a response with sample HTML containing hyperlinks @@ -90,7 +90,7 @@ def test_no_hyperlinks(self, mocker): # Assert that the function returns an empty list assert result == [] - # Tests that scrape_links() correctly extracts and formats hyperlinks from + # Tests that scrape_links() correctly extracts and formats hyperlinks from # a sample HTML containing a few hyperlinks. def test_scrape_links_with_few_hyperlinks(self, mocker): # Mock the requests.get() function to return a response with a sample HTML containing hyperlinks