Skip to content

Commit

Permalink
issue #2: tests and fix for sanitize
Browse files Browse the repository at this point in the history
  • Loading branch information
k-allagbe committed Nov 27, 2023
1 parent f6150ea commit 1318eef
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 6 deletions.
3 changes: 3 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,6 @@ FINESSE_BACKEND_GITHUB_STATIC_FILE_URL=https://api.github.com/repos/ai-cfia/fine
# Threshold for fuzzy matching queries to finesse-data files. Represents the minimum
# score (out of 100) for a match to be considered close enough. Optional.
# FINESSE_BACKEND_FUZZY_MATCH_THRESHOLD=90

# Regular expression pattern used for sanitizing input to prevent log injection. Optional.
# FINESSE_BACKEND_SANITIZE_PATTERN="[^\w \d\"#\$%&'\(\)\*\+,-\.\/:;?@\^_`{\|}~]+|\%\w+|;|/|\(|\)"
9 changes: 3 additions & 6 deletions app/blueprints/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from index_search import AzureIndexSearchQueryError, search

from app.finesse_data import FinesseDataFetchException, fetch_data
from app.utils import sanitize

search_blueprint = Blueprint("finesse", __name__)

Expand All @@ -20,15 +21,11 @@ def decorated_function(*args, **kwargs):
return decorated_function


def sanitize(input):
return re.sub("[^\w\s\d\"#\$%&'\(\)\*\+,-\./:;<=>\?@\[\\\]\^_`{\|}~]+", "", input)


@search_blueprint.route("/azure", methods=["POST"])
@require_non_empty_query
def search_azure():
query = request.json["query"]
query = sanitize(query)
query = sanitize(query, current_app.config["SANITIZE_PATTERN"])
try:
results = search(query, current_app.config["AZURE_CONFIG"])
return jsonify(results)
Expand All @@ -43,7 +40,7 @@ def search_azure():
def search_static():
finesse_data_url = current_app.config["FINESSE_DATA_URL"]
query = request.json["query"]
query = sanitize(query)
query = sanitize(query, current_app.config["SANITIZE_PATTERN"])
match_threshold = current_app.config["FUZZY_MATCH_THRESHOLD"]
try:
data = fetch_data(finesse_data_url, query, match_threshold)
Expand Down
6 changes: 6 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
DEFAULT_ERROR_FINESSE_DATA_FAILED = "finesse-data static search failed"
DEFAULT_ERROR_UNEXPECTED = "Unexpected error."
DEFAULT_FUZZY_MATCH_THRESHOLD = 90
DEFAULT_SANITIZE_PATTERN = (
"[^\w \d\"#\$%&'\(\)\*\+,-\.\/:;?@\^_`{\|}~]+|\%\w+|;|/|\(|\)"
)


@dataclass
Expand Down Expand Up @@ -50,3 +53,6 @@ class Config:
"FINESSE_BACKEND_FUZZY_MATCH_THRESHOLD", DEFAULT_FUZZY_MATCH_THRESHOLD
)
)
SANITIZE_PATTERN = os.getenv(
"FINESSE_BACKEND_SANITIZE_PATTERN", DEFAULT_SANITIZE_PATTERN
)
6 changes: 6 additions & 0 deletions app/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import re


def sanitize(input, pattern):
"""Mitigates log injection risks."""
return re.sub(pattern, "", input)
20 changes: 20 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import unittest
from app.utils import sanitize


class TestSanitize(unittest.TestCase):
def setUp(self):
self.invalid_chars = ["\n", "\r", "\t", "<", ">", "%s", ";", "/", "(", ")", "\u202e", "\x00"]
self.base_string = "Hello{}World"
self.pattern = "[^\w \d\"#\$%&'\(\)\*\+,-\.\/:;?@\^_`{\|}~]+|\%\w+|;|/|\(|\)"

def test_sanitize_invalid_characters(self):
for char in self.invalid_chars:
test_string = self.base_string.format(char)
with self.subTest(char=char):
sanitized = sanitize(test_string, self.pattern)
self.assertNotIn(char, sanitized, f"Invalid character '{char}' was not removed")


if __name__ == "__main__":
unittest.main()

0 comments on commit 1318eef

Please sign in to comment.