From c3dffd0d90616b3ebe59ab44e4e8a5adf23156a3 Mon Sep 17 00:00:00 2001 From: "W. Leighton Dawson" Date: Tue, 14 Feb 2023 08:52:33 +0200 Subject: [PATCH] T5182: Email and domain blocking port to v3 (#169) * add initial email and domain blocking * add unit test for email blocking and unblocking * add error codes to responses (frontend still doesn't show the right thing) * add v3 deploy workflow * remove redundant build workflow * add unit test for email blocking * add /legal and banners, make script runnable directly * enable tests, add integration test for error codes * separate poetry call from install use pipx to install poetry on windows * exclude skipped tests from coverage --- .github/workflows/build.yml | 163 -- .github/workflows/build_docker.yml | 13 +- .github/workflows/test.yml | 14 +- backend/app.py | 70 +- block_user.py | 69 +- canarytokens/constants.py | 14 + canarytokens/models.py | 28 +- canarytokens/pdfgen.py | 4 +- canarytokens/queries.py | 55 +- canarytokens/redismanager.py | 2 + canarytokens/tokens.py | 56 +- templates/generate_new.html | 9 +- templates/history.html | 2 +- templates/manage_new.html | 1 - templates/static/styles.css | 2037 ++++++++--------- templates/static/styles.min.css | 2 +- .../integration/test_against_token_server.py | 108 +- tests/integration/test_aws_key_token.py | 15 +- tests/integration/test_custom_image.py | 12 +- tests/integration/test_kubeconfig_token.py | 6 + .../test_mysql_token_against_server.py | 4 +- tests/integration/test_pdf_token.py | 3 +- tests/units/test_backend.py | 50 + tests/units/test_kubeconfig.py | 3 +- tests/units/test_queries.py | 23 + 25 files changed, 1408 insertions(+), 1355 deletions(-) delete mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index ecec7dd93..000000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,163 +0,0 @@ -name: Build -on: - push: - branches: - - "T4627_py3_main" - # workflow_dispatch: - # inputs: - # force_build: - # description: 'Force a build.' - # required: false - # default: false - # type: boolean - # workflow_run: - # workflows: - # - TestSuite - # types: - # - completed - # branches: - # - main - # - T4627_py3_main - -jobs: - build: - # if: ${{ github.event.workflow_run.conclusion == 'success' || inputs.force_build}} - runs-on: ubuntu-latest - env: - AWS_REGION: 'eu-west-1' - permissions: - id-token: write - contents: read - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - # FIXME: Add account number via a secret / inject - role-to-assume: arn:aws:iam::164951611079:role/Canarytokens-staging-github-action - role-session-name: GitHubActions-${{ github.actor }}-${{ github.workflow }}-${{ github.run_id }}-${{ github.run_number }} - aws-region: ${{ env.AWS_REGION }} - - uses: actions/checkout@v2 - - name: Set up Python 3.10 - uses: actions/setup-python@v2 - with: - python-version: "3.10" - - name: Install deps - run: | - curl -sSL https://install.python-poetry.org | python - - poetry config virtualenvs.in-project true - - name: Build python wheel - run: | - poetry export --extras twisted --extras web --output backend/requirements.txt - poetry export --extras twisted --extras web --output switchboard/requirements.txt - poetry build --format wheel - cp dist/*.whl backend/ - cp dist/*.whl switchboard/ - - - name: Copy templates - run: | - cp -r templates switchboard/ - cp -r templates backend/ - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Docker meta (Switchboard) - id: meta_switchboard - uses: docker/metadata-action@v3 - with: - images: | - thinkst/canarytokens_switchboard - tags: | - type=raw,value=latest,enable=${{ endsWith(github.ref, github.event.repository.default_branch) }} - type=schedule - type=ref,event=branch - type=ref,event=pr - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=semver,pattern={{major}} - type=sha - - - name: Build and push (Switchboard) - id: docker_build_switchboard - uses: docker/build-push-action@v2 - with: - context: ./switchboard - push: true - tags: ${{ steps.meta_switchboard.outputs.tags }} - - - name: Docker meta (Backend) - id: meta_backend - uses: docker/metadata-action@v3 - with: - images: | - thinkst/canarytokens_backend - tags: | - type=raw,value=latest,enable=${{ endsWith(github.ref, github.event.repository.default_branch) }} - type=schedule - type=ref,event=branch - type=ref,event=pr - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=semver,pattern={{major}} - type=sha - - - name: Build and push (Backend) - id: docker_build_backend - uses: docker/build-push-action@v2 - with: - context: ./backend - push: true - tags: ${{ steps.meta_backend.outputs.tags }} - - - name: Build, tag, and push Backend image to Amazon ECR - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: canarytokens-backend-staging - IMAGE_TAG: ${{ github.sha }} - run: | - pushd backend - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG --build-arg COMMIT_SHA=$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - popd - - name: Build, tag, and push Backend latest image to Amazon ECR - if: ${{github.ref == 'refs/heads/main'}} - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: canarytokens-backend-staging - IMAGE_TAG: ${{ github.sha }} - run: | - pushd backend - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:latest --build-arg COMMIT_SHA=$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:latest - popd - - name: Build, tag, and push Switchboard image to Amazon ECR - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: canarytokens-switchboard-staging - IMAGE_TAG: ${{ github.sha }} - run: | - pushd switchboard - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG --build-arg COMMIT_SHA=$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - popd - - - name: Build, tag, and push Switchboard latest image to Amazon ECR - if: ${{github.ref == 'refs/heads/main'}} - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: canarytokens-switchboard-staging - IMAGE_TAG: ${{ github.sha }} - run: | - pushd switchboard - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:latest --build-arg COMMIT_SHA=$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:latest - popd diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index d597bedaa..204d1e8ab 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -5,6 +5,7 @@ on: branches: - "master" - "dev" + - "dev_v3" workflow_dispatch: inputs: @@ -76,7 +77,7 @@ jobs: CACHE_BUSTER_COMMIT=${{ github.sha }} dev-deploy: - if: github.repository == 'thinkst/canarytokens' && github.ref == 'refs/heads/dev' + if: github.repository == 'thinkst/canarytokens' && github.ref == 'refs/heads/dev_v3' runs-on: [self-hosted, dev] needs: build steps: @@ -86,12 +87,12 @@ jobs: ./canarytokensdb_s3backup.sh cd /home/ubuntu/canarytokens-docker - sed "s/thinkst\/canarytokens:dev/thinkst\/canarytokens:${GITHUB_REF##*/}/g" docker-compose-letsencrypt.yml.tpl > docker-compose-letsencrypt.yml - sed -i'' "s/CANARY_DEV_BUILD_ID=.*/CANARY_DEV_BUILD_ID=${GITHUB_SHA:0:8}/" frontend.env + sed "s/thinkst\/canarytokens:dev_v3/thinkst\/canarytokens:${GITHUB_REF##*/}/g" docker-compose-v3.yml.tpl > docker-compose-v3.yml + sed -i'' "s/CANARY_DEV_BUILD_ID=.*/CANARY_DEV_BUILD_ID=${GITHUB_SHA:0:8}/" backend-v3.env sudo docker pull thinkst/canarytokens:${GITHUB_REF##*/} - sudo docker-compose -f docker-compose-letsencrypt.yml pull - sudo docker-compose -f docker-compose-letsencrypt.yml down - sudo docker-compose -f docker-compose-letsencrypt.yml up -d + sudo docker-compose -f docker-compose-v3.yml pull + sudo docker-compose -f docker-compose-v3.yml down + sudo docker-compose -f docker-compose-v3.yml up -d sudo docker system prune -f -a staging-deploy: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 40d87b54d..ded986f22 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,6 +4,7 @@ on: push: branches: - "T4627_py3_main" + - "T5182_email_block_list_py3" jobs: tests: @@ -146,15 +147,18 @@ jobs: - uses: actions/checkout@v2 - name: Install deps run: | - (Invoke-WebRequest -Uri https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py -UseBasicParsing).Content | python - - $env:path = $env:path + ";C:\Users\runneradmin\.poetry\bin"; poetry config virtualenvs.in-project true + python -m pip install --user pipx + python -m pipx ensurepath + python -m pipx install poetry==1.3.2 + # (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python - - name: Install python dependencies # poetry cache clear --all pypi ref: https://stackoverflow.com/questions/72551057/poetry-gives-toomanyindirects-error-suddenly # Remove when this is resolved. run: | - $env:path = $env:path + ";C:\Users\runneradmin\.poetry\bin"; poetry cache clear --all pypi - $env:path = $env:path + ";C:\Users\runneradmin\.poetry\bin"; poetry install -E 'twisted web' + poetry config virtualenvs.in-project true + poetry cache clear --all pypi + poetry install -E 'twisted web' - name: Integration Tests run: | $env:LIVE = 'True' - $env:path = $env:path + ";C:\Users\runneradmin\.poetry\bin"; poetry run coverage run --source=.\tests\integration -m pytest .\tests\integration\test_custom_binary.py --runv2 + poetry run coverage run --source=.\tests\integration -m pytest .\tests\integration\test_custom_binary.py --runv2 diff --git a/backend/app.py b/backend/app.py index 6071861cf..95af9e6dd 100644 --- a/backend/app.py +++ b/backend/app.py @@ -27,7 +27,7 @@ from fastapi.security import APIKeyQuery from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates -from pydantic import HttpUrl, parse_obj_as +from pydantic import HttpUrl, ValidationError, parse_obj_as from sentry_sdk.integrations.asgi import SentryAsgiMiddleware from sentry_sdk.integrations.fastapi import FastApiIntegration from sentry_sdk.integrations.redis import RedisIntegration @@ -125,6 +125,8 @@ add_canary_path_element, get_all_canary_domains, get_all_canary_sites, + is_email_blocked, + is_valid_email, remove_canary_domain, save_canarydrop, validate_webhook, @@ -293,21 +295,41 @@ def generate_page(request: Request) -> HTMLResponse: "/generate", tags=["Create Canarytokens"], ) -async def generate(request: Request) -> AnyTokenResponse: +async def generate(request: Request) -> AnyTokenResponse: # noqa: C901 # gen is large """ Whatt """ + response_error = ( + lambda error, message: JSONResponse( # noqa: E731 # lambda is cleaner + { + "error": str(error), + "error_message": message, + "url": "", + "url_components": None, + "token": "", + "email": "", + "hostname": "", + "auth": "", + } + ) + ) + if request.headers.get("Content-Type", "application/json") == "application/json": - data = await request.json() - token_request_details = parse_obj_as(AnyTokenRequest, data) + token_request_data = await request.json() else: # Need a mutable copy of the form data - token_request_form = dict(await request.form()) - token_request_form["token_type"] = token_request_form.pop( - "type", token_request_form.get("token_type", None) + token_request_data = dict(await request.form()) + token_request_data["token_type"] = token_request_data.pop( + "type", token_request_data.get("token_type", None) ) - token_request_details = parse_obj_as(AnyTokenRequest, token_request_form) + try: + token_request_details = parse_obj_as(AnyTokenRequest, token_request_data) + except ValidationError: # DESIGN: can we specialise on what went wrong? + return response_error(1, "No email/webhook supplied or malformed request") + + if not token_request_details.memo: + return response_error(2, "No memo supplied") if token_request_details.webhook_url: try: @@ -315,10 +337,27 @@ async def generate(request: Request) -> AnyTokenResponse: token_request_details.webhook_url, token_request_details.token_type ) except requests.exceptions.HTTPError: - raise HTTPException(status_code=400, detail="Failed to validate webhook") + # raise HTTPException(status_code=400, detail="Failed to validate webhook") + return response_error( + 3, "Invalid webhook supplied. Confirm you can POST to this URL." + ) except requests.exceptions.ConnectTimeout: - raise HTTPException( - status_code=400, detail="Failed to validate webhook - timed out." + # raise HTTPException( + # status_code=400, detail="Failed to validate webhook - timed out." + # ) + return response_error( + 3, "Webhook timed out. Confirm you can POST to this URL." + ) + + if token_request_details.email: + if not is_valid_email(token_request_details.email): + return response_error(5, "Invalid email supplied") + + if is_email_blocked(token_request_details.email): + # raise HTTPException(status_code=400, detail="Email is blocked.") + return response_error( + 6, + "Blocked email supplied. Please see our Acceptable Use Policy at https://canarytokens.org/legal", ) # TODO: refactor this. KUBECONFIG token creates it's own token # value and cannot follow same path as before. @@ -458,6 +497,15 @@ async def settings_post( return JSONResponse({"message": "failure"}, status_code=400) +@app.get( + "/legal", + tags=["Canarytokens legal page"], + response_class=HTMLResponse, +) +def legal_page(request: Request) -> HTMLResponse: + return templates.TemplateResponse("legal.html", {"request": request}) + + @app.get( "/download", tags=["Canarytokens Downloads"], diff --git a/block_user.py b/block_user.py index ce42c93a2..11acae343 100755 --- a/block_user.py +++ b/block_user.py @@ -1,40 +1,63 @@ +#! /usr/bin/env python import argparse -from queries import block_domain, block_email, is_email_blocked, unblock_domain, unblock_email +import os +from distutils.util import strtobool -parser = argparse.ArgumentParser(description='Block emails or domains from creating canarytokens') -parser.add_argument('users', metavar='user', type=str, nargs='+', - help='an email address or domain to block') -parser.add_argument('-u', '--unblock', dest='mode', action='store_const', - const='unblock', default='block', - help='unblock instead') +from canarytokens.queries import ( + block_domain, + block_email, + is_email_blocked, + unblock_domain, + unblock_email, +) +from canarytokens.redismanager import DB + +parser = argparse.ArgumentParser( + description="Block emails or domains from creating canarytokens" +) +parser.add_argument( + "users", + metavar="user", + type=str, + nargs="+", + help="an email address or domain to block", +) +parser.add_argument( + "-u", + "--unblock", + dest="mode", + action="store_const", + const="unblock", + default="block", + help="unblock instead", +) args = parser.parse_args() +redis_hostname = "localhost" if strtobool(os.getenv("CI", "False")) else "redis" +DB.set_db_details(hostname=redis_hostname, port=6379) + funcs = { - 'block': { - 'domain': block_domain, - 'email': block_email - }, - 'unblock': { - 'domain': unblock_domain, - 'email': unblock_email - } + "block": {"domain": block_domain, "email": block_email}, + "unblock": {"domain": unblock_domain, "email": unblock_email}, } for user in args.users: - if '@' in user: - kind, test_target = 'email', user + if "@" in user: + kind, test_target = "email", user else: - kind, test_target = 'domain', 'anything@'+user + kind, test_target = "domain", "anything@" + user block_func = funcs[args.mode][kind] try: print('\n[*] {}ing {}: "{}"'.format(args.mode, kind, user)) block_func(user) print('[>] checking if "{}" is blocked'.format(test_target)) - assert (is_email_blocked(test_target) if args.mode == 'block' - else not is_email_blocked(test_target)) + assert ( + is_email_blocked(test_target) + if args.mode == "block" + else not is_email_blocked(test_target) + ) print('[o] successfully {}ed "{}"'.format(args.mode, test_target)) - - except: + except Exception: print('[x] failed to {} "{}"'.format(args.mode, test_target)) -print('\n[;] done blocking') \ No newline at end of file +print("\n[;] done blocking") diff --git a/canarytokens/constants.py b/canarytokens/constants.py index 94d5dd0b5..cff650c64 100644 --- a/canarytokens/constants.py +++ b/canarytokens/constants.py @@ -11,3 +11,17 @@ INPUT_CHANNEL_MTLS = "Kubeconfig" INPUT_CHANNEL_MYSQL = "MYSQL" INPUT_CHANNEL_WIREGUARD = "WireGuard" + +# DESIGN: We'll want a constraint on this but what is sensible as a user and what is practical for our system? +MEMO_MAX_CHARACTERS = 1000 +# fmt: off +CANARYTOKEN_ALPHABET = ['0', '1', '2', '3', '4', '5', + '6', '7', '8', '9', 'a', 'b', + 'c', 'd', 'e', 'f', 'g', 'h', + 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', + 'u', 'v', 'w', 'x', 'y', 'z'] +# fmt: on +CANARYTOKEN_LENGTH = 25 # equivalent to 128-bit id + +CANARY_PDF_TEMPLATE_OFFSET: int = 793 diff --git a/canarytokens/models.py b/canarytokens/models.py index e16771e3d..ef68393b8 100644 --- a/canarytokens/models.py +++ b/canarytokens/models.py @@ -38,23 +38,17 @@ ) from pydantic.generics import GenericModel from typing_extensions import Annotated +from canarytokens.constants import ( + CANARYTOKEN_ALPHABET, + CANARYTOKEN_LENGTH, + MEMO_MAX_CHARACTERS, +) -# DESIGN: We'll want a constraint on this but what is sensible as a user and what is practical for out system? -MEMO_MAX_CHARACTERS = 1000 -# fmt: off -CANARYTOKEN_ALPHABET = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', - 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', - 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9'] -# fmt: on -CANARYTOKEN_LENGTH = 25 # equivalent to 128-bit id CANARYTOKEN_RE = re.compile( ".*([" + "".join(CANARYTOKEN_ALPHABET) + "]{" + str(CANARYTOKEN_LENGTH) + "}).*", re.IGNORECASE, ) -CANARY_PDF_TEMPLATE_OFFSET: int = 793 - class Memo(ConstrainedStr): max_length: int = MEMO_MAX_CHARACTERS @@ -268,7 +262,7 @@ def __init__(__pydantic_self__, **data: Any) -> None: @root_validator def check_email_or_webhook_opt(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if not (values.get("webhook_url") or values.get("email")): + if not values.get("webhook_url") and not values.get("email"): raise ValueError("either webhook or email is required") return values @@ -530,7 +524,7 @@ class TokenResponse(BaseModel): @root_validator(pre=True) # TODO: fix pydantic vs mypy - it's possible - def normalize_names(cls, values: dict[str, Any]) -> dict[str, any]: # type: ignore + def normalize_names(cls, values: dict[str, Any]) -> dict[str, Any]: # type: ignore keys_to_convert = [ # TODO: make is consistent. ("Auth", "auth_token"), @@ -689,14 +683,14 @@ class Log4ShellTokenResponse(TokenResponse): # src_data: dict[str, str] @root_validator(pre=True) - def set_token_usage_info(cls, values: dict[str, Any]) -> dict[str, any]: # type: ignore + def set_token_usage_info(cls, values: dict[str, Any]) -> dict[str, Any]: # type: ignore values[ "token_with_usage_info" ] = f"{cls._hostname_marker}{{hostname}}.{cls._token_marker}.{values['hostname']}" return values @root_validator(pre=True) - def set_token_usage(cls, values: dict[str, Any]) -> dict[str, any]: # type: ignore + def set_token_usage(cls, values: dict[str, Any]) -> dict[str, Any]: # type: ignore values[ "token_usage" ] = f"${{jndi:ldap://{cls._hostname_marker}${{hostName}}.{cls._token_marker}.{values['hostname']}/a}}" @@ -910,7 +904,7 @@ class AWSKeyAdditionalInfo(BaseModel): aws_key_log_data: dict[str, list[str]] @root_validator(pre=True) - def normalize_additional_info_names(cls, values: dict[str, Any]) -> dict[str, any]: # type: ignore + def normalize_additional_info_names(cls, values: dict[str, Any]) -> dict[str, Any]: # type: ignore keys_to_convert = [ # TODO: make this consistent. ("AWS Key Log Data", "aws_key_log_data"), @@ -942,7 +936,7 @@ def serialize_for_v2(self) -> dict: return data @root_validator(pre=True) - def normalize_additional_info_names(cls, values: dict[str, Any]) -> dict[str, any]: # type: ignore + def normalize_additional_info_names(cls, values: dict[str, Any]) -> dict[str, Any]: # type: ignore keys_to_convert = [ # TODO: make is consistent. ("MySQL Client", "mysql_client"), diff --git a/canarytokens/pdfgen.py b/canarytokens/pdfgen.py index 59f60aed7..b64f05d6f 100644 --- a/canarytokens/pdfgen.py +++ b/canarytokens/pdfgen.py @@ -4,9 +4,11 @@ from io import BytesIO from pathlib import Path +from canarytokens.constants import CANARY_PDF_TEMPLATE_OFFSET as STREAM_OFFSET + # PDF_FILE=settings.CANARY_PDF_TEMPLATE # STREAM_OFFSET=settings.CANARY_PDF_TEMPLATE_OFFSET -STREAM_OFFSET = 793 +# STREAM_OFFSET = 793 # CANARY_PDF_TEMPLATE_OFFSET=793 diff --git a/canarytokens/queries.py b/canarytokens/queries.py index 705c60bfc..413ba5e62 100644 --- a/canarytokens/queries.py +++ b/canarytokens/queries.py @@ -4,6 +4,7 @@ import base64 import datetime import json +import re import secrets from ipaddress import IPv4Address from typing import Dict, List, Literal, Optional, Tuple, Union @@ -28,6 +29,8 @@ KEY_CANARYDROP, KEY_CANARYDROPS_TIMELINE, KEY_CANARYTOKEN_ALERT_COUNT, + KEY_DOMAIN_BLOCK_LIST, + KEY_EMAIL_BLOCK_LIST, KEY_EMAIL_IDX, KEY_KUBECONFIG_CERTS, KEY_KUBECONFIG_SERVEREP, @@ -848,7 +851,7 @@ def validate_webhook(url, token_type: models.TokenTypes): headers={"content-type": "application/json"}, timeout=10, ) - # TODO: this accepts 3xx which is probably too leanient. We probably want any 2xx code. + # TODO: this accepts 3xx which is probably too lenient. We probably want any 2xx code. response.raise_for_status() # return True # except requests.exceptions.Timeout as e: @@ -864,6 +867,56 @@ def validate_webhook(url, token_type: models.TokenTypes): # return False +def is_valid_email(email): + # This validation checks that no disallowed characters are in the section of the email + # address before the @ + # Ripped from https://www.regular-expressions.info/email.html + regex = re.compile( + r"^[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?$" + ) + match = regex.search(email.lower()) + if not match: + return False + else: + return True + + +def normalize_email(email): + [user, domain] = email.lower().split("@") + if domain in ["gmail.com", "googlemail.com", "google.com"]: + delabelled = user.split("+")[0] + san_user = delabelled.replace(".", "") + return "{}@{}".format(san_user, domain) + else: + return email + + +def block_email(email): + san = normalize_email(email).lower() + DB.get_db().sadd(KEY_EMAIL_BLOCK_LIST, san) + + +def unblock_email(email): + san = normalize_email(email).lower() + DB.get_db().srem(KEY_EMAIL_BLOCK_LIST, san) + + +def block_domain(domain): + DB.get_db().sadd(KEY_DOMAIN_BLOCK_LIST, domain.lower()) + + +def unblock_domain(domain): + DB.get_db().srem(KEY_DOMAIN_BLOCK_LIST, domain.lower()) + + +def is_email_blocked(email): + san = normalize_email(email).lower() + domain = email.split("@")[1].lower() + return DB.get_db().sismember( + KEY_DOMAIN_BLOCK_LIST, domain + ) or DB.get_db().sismember(KEY_EMAIL_BLOCK_LIST, san) + + def is_tor_relay(ip): if not DB.get_db().exists(KEY_TOR_EXIT_NODES): diff --git a/canarytokens/redismanager.py b/canarytokens/redismanager.py index dd88135bd..dab984213 100644 --- a/canarytokens/redismanager.py +++ b/canarytokens/redismanager.py @@ -69,6 +69,8 @@ def create_db(cls, *, hostname, port, logical_db=0): KEY_WEBHOOK_IDX = "alertchannel_webhook:" KEY_EMAIL_IDX = "alertchannel_email:" KEY_AUTH_IDX = "auth:" +KEY_EMAIL_BLOCK_LIST = "email_block_list" +KEY_DOMAIN_BLOCK_LIST = "domain_block_list" KEY_WIREGUARD_KEYMAP = "wireguard-keymap" KEY_KUBECONFIG_SERVEREP = "kubeconfig_server_endpoint" KEY_KUBECONFIG_CERTS = "certificate:" diff --git a/canarytokens/tokens.py b/canarytokens/tokens.py index 159b9fdf0..4aba0bf98 100644 --- a/canarytokens/tokens.py +++ b/canarytokens/tokens.py @@ -13,50 +13,14 @@ from twisted.web.util import redirectTo from canarytokens import canarydrop, queries -from canarytokens.constants import INPUT_CHANNEL_HTTP +from canarytokens.constants import ( + CANARYTOKEN_ALPHABET, + CANARYTOKEN_LENGTH, + INPUT_CHANNEL_HTTP, +) from canarytokens.exceptions import NoCanarytokenFound from canarytokens.models import AnyTokenHit, AWSKeyTokenHit, TokenTypes -canarytoken_ALPHABET = [ - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "0", - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", -] -canarytoken_LENGTH = 25 # equivalent to 128-bit id - # TODO: put these in a nicer place. Ensure re.compile is called only once at startup # add a naming convention for easy reading when seen in other files. # Check that state is not stored in these eg: x=re.compile(...) x.match() === A and then x.match() === A still @@ -121,9 +85,9 @@ def get_template_env(): class Canarytoken(object): CANARY_RE = re.compile( ".*([" - + "".join(canarytoken_ALPHABET) + + "".join(CANARYTOKEN_ALPHABET) + "]{" - + str(canarytoken_LENGTH) + + str(CANARYTOKEN_LENGTH) + "}).*", re.IGNORECASE, ) @@ -149,12 +113,8 @@ def __init__(self, value: Optional[AnyStr] = None): @staticmethod def generate() -> str: """Return a new canarytoken.""" - # TODO: Use random.choice return "".join( - [ - canarytoken_ALPHABET[random.randint(0, len(canarytoken_ALPHABET) - 1)] - for x in range(0, canarytoken_LENGTH) - ], + [random.choice(CANARYTOKEN_ALPHABET) for x in range(0, CANARYTOKEN_LENGTH)], ) @staticmethod diff --git a/templates/generate_new.html b/templates/generate_new.html index 2c7b2ef2d..c0cd49c9d 100644 --- a/templates/generate_new.html +++ b/templates/generate_new.html @@ -73,8 +73,7 @@ - - +