From 564f8589d65e2aac52011d5bff12ed8f2c57012f Mon Sep 17 00:00:00 2001 From: Luis Schulte <63458548+lsg551@users.noreply.github.com> Date: Tue, 4 Jun 2024 18:08:39 +0200 Subject: [PATCH] draft: wip newsfeed mailer --- examples/newsfeed-mailer/check-newsfeed.py | 21 ++++++-- examples/newsfeed-mailer/install-cronjob.sh | 59 +++++++++++++++++++++ 2 files changed, 75 insertions(+), 5 deletions(-) create mode 100755 examples/newsfeed-mailer/install-cronjob.sh diff --git a/examples/newsfeed-mailer/check-newsfeed.py b/examples/newsfeed-mailer/check-newsfeed.py index 3dcc875..ca1af4f 100755 --- a/examples/newsfeed-mailer/check-newsfeed.py +++ b/examples/newsfeed-mailer/check-newsfeed.py @@ -17,7 +17,12 @@ If all arguments were provided correctly, this script will: -[...] +1. Scrapes newsfeed data from Matricula's website. +`--schedule` in hours determines the last n days to fetch (e.g. 24: 48/24=2 => last 2 days will be fetched). + +2. It will look for substrings of `keywords` in the headlines and previews of the scraped articles. + +3. If matches were found, a mail will be sent to the user with the matches. EXAMPLE ======= @@ -45,10 +50,10 @@ # -------------------- logging -------------------- - JOB_ID = uuid.uuid4() JOB_DATE = datetime.now() -LOG_FILE = Path("matricula-newsfeed-mailer.log") +APP_DIR = Path("~/.matricula-online-scraper/").expanduser().absolute() +LOG_FILE = Path(APP_DIR, "matricula-newsfeed-mailer.log") logger = logging.getLogger(__name__) logger_extra = { @@ -56,7 +61,9 @@ "bot_version": VERSION, } logging.basicConfig( - filename=LOG_FILE, + handlers=[ + logging.FileHandler(LOG_FILE), + ], encoding="utf-8", level=logging.DEBUG, format="[%(asctime)s] %(levelname)s (%(job_id)s @ v%(bot_version)s) : %(message)s", @@ -214,7 +221,7 @@ def parse_args() -> Options: # -------------------- Data processing / CSV parsing -------------------- # folder where scraped data is stored -DATA_STORE = Path("~/matricula-newsfeed-scraper").expanduser() +DATA_STORE = Path(APP_DIR, "scraper-data") def fetch_newsfeed(*, last_n_days: int) -> Path: @@ -437,6 +444,10 @@ def send_mail( f"Found {len(matches)} matches for keywords {keywords} in: {file.absolute()}" ) + if len(matches) == 0: + logger.debug("No matches found. Aborting.") + exit(0) + # build message # history = History(LOG_FILE) subject = Subject(num_matches=len(matches)) diff --git a/examples/newsfeed-mailer/install-cronjob.sh b/examples/newsfeed-mailer/install-cronjob.sh new file mode 100755 index 0000000..efaa21e --- /dev/null +++ b/examples/newsfeed-mailer/install-cronjob.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# ! ATTENTION ! +# ============= +# +# This file automatically adds a cron job to the user's crontab. +# +# Please fill in the following variables with your data. +# + +# email, e.g. your@email.com +FROM= +# your email's password +PASSWORD= +# to whom the email should be sent, to@me.com +TO= +# smtp server, e.g. smtp.gmail.com +SMTP_SERVER= +# smtp port, e.g. 465 +SMTP_PORT= +# space separated keywords, e.g. "keyword1 keyword2" +KEYWORDS= +# int, e.g. 24 (it will look for news every 24 hours) +SCHEDULE= +# cron schedule, e.g. "0 16 * * *" (every day at 16:00) +CRON= + +# -------------------------------------------- # +# END OF USER VARIABLES | DO NOT CHANGE BELOW # +# -------------------------------------------- # + +# check variables are set +if [ -z "$FROM" ] || [ -z "$PASSWORD" ] || [ -z "$TO" ] || [ -z "$SMTP_SERVER" ] || [ -z "$SMTP_PORT" ] || [ -z "$KEYWORDS" ] || [ -z "$SCHEDULE" ] || [ -z "$CRON" ]; then + echo "Error: Please fill in the variables in the script." + exit 1 +fi + +# check execution path is correct +SCRIPT_PATH="$PWD/check-newsfeed.py" +if [ ! -f "$SCRIPT_PATH" ]; then + echo "Error: Script not found at $SCRIPT_PATH" + exit 1 +fi + +CMD="$SCRIPT_PATH --from $FROM --password $PASSWORD --smtp-server $SMTP_SERVER -p $SMTP_PORT --to $TO --schedule $SCHEDULE -k $KEYWORDS" +CRONJOB="$CRON $CMD" +COMMENT="# cronjob for https://github.com/lsg551/matricula-online-scraper/tree/main/examples/newsfeed-mailer" +COMMENT2="# see $SCRIPT_PATH for more details" + +# add cronjob to user's crontab +TMP_FILE="user_crontab_l" +crontab -l > $TMP_FILE +echo "$COMMENT" >> $TMP_FILE +echo "$COMMENT2" >> $TMP_FILE +echo "$CRONJOB" >> $TMP_FILE +crontab $TMP_FILE +rm $TMP_FILE + +echo "Cronjob added successfully: $CRONJOB"