Skip to content

Commit

Permalink
draft: wip newsfeed mailer
Browse files Browse the repository at this point in the history
  • Loading branch information
lsg551 committed Jun 4, 2024
1 parent b23b43d commit 564f858
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 5 deletions.
21 changes: 16 additions & 5 deletions examples/newsfeed-mailer/check-newsfeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
If all arguments were provided correctly, this script will:
[...]
1. Scrapes newsfeed data from Matricula's website.
`--schedule` in hours determines the last n days to fetch (e.g. 24: 48/24=2 => last 2 days will be fetched).
2. It will look for substrings of `keywords` in the headlines and previews of the scraped articles.
3. If matches were found, a mail will be sent to the user with the matches.
EXAMPLE
=======
Expand Down Expand Up @@ -45,18 +50,20 @@

# -------------------- logging --------------------


JOB_ID = uuid.uuid4()
JOB_DATE = datetime.now()
LOG_FILE = Path("matricula-newsfeed-mailer.log")
APP_DIR = Path("~/.matricula-online-scraper/").expanduser().absolute()
LOG_FILE = Path(APP_DIR, "matricula-newsfeed-mailer.log")

logger = logging.getLogger(__name__)
logger_extra = {
"job_id": JOB_ID,
"bot_version": VERSION,
}
logging.basicConfig(
filename=LOG_FILE,
handlers=[
logging.FileHandler(LOG_FILE),
],
encoding="utf-8",
level=logging.DEBUG,
format="[%(asctime)s] %(levelname)s (%(job_id)s @ v%(bot_version)s) : %(message)s",
Expand Down Expand Up @@ -214,7 +221,7 @@ def parse_args() -> Options:
# -------------------- Data processing / CSV parsing --------------------

# folder where scraped data is stored
DATA_STORE = Path("~/matricula-newsfeed-scraper").expanduser()
DATA_STORE = Path(APP_DIR, "scraper-data")


def fetch_newsfeed(*, last_n_days: int) -> Path:
Expand Down Expand Up @@ -437,6 +444,10 @@ def send_mail(
f"Found {len(matches)} matches for keywords {keywords} in: {file.absolute()}"
)

if len(matches) == 0:
logger.debug("No matches found. Aborting.")
exit(0)

# build message
# history = History(LOG_FILE)
subject = Subject(num_matches=len(matches))
Expand Down
59 changes: 59 additions & 0 deletions examples/newsfeed-mailer/install-cronjob.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash

# ! ATTENTION !
# =============
#
# This file automatically adds a cron job to the user's crontab.
#
# Please fill in the following variables with your data.
#

# email, e.g. your@email.com
FROM=
# your email's password
PASSWORD=
# to whom the email should be sent, to@me.com
TO=
# smtp server, e.g. smtp.gmail.com
SMTP_SERVER=
# smtp port, e.g. 465
SMTP_PORT=
# space separated keywords, e.g. "keyword1 keyword2"
KEYWORDS=
# int, e.g. 24 (it will look for news every 24 hours)
SCHEDULE=
# cron schedule, e.g. "0 16 * * *" (every day at 16:00)
CRON=

# -------------------------------------------- #
# END OF USER VARIABLES | DO NOT CHANGE BELOW #
# -------------------------------------------- #

# check variables are set
if [ -z "$FROM" ] || [ -z "$PASSWORD" ] || [ -z "$TO" ] || [ -z "$SMTP_SERVER" ] || [ -z "$SMTP_PORT" ] || [ -z "$KEYWORDS" ] || [ -z "$SCHEDULE" ] || [ -z "$CRON" ]; then
echo "Error: Please fill in the variables in the script."
exit 1
fi

# check execution path is correct
SCRIPT_PATH="$PWD/check-newsfeed.py"
if [ ! -f "$SCRIPT_PATH" ]; then
echo "Error: Script not found at $SCRIPT_PATH"
exit 1
fi

CMD="$SCRIPT_PATH --from $FROM --password $PASSWORD --smtp-server $SMTP_SERVER -p $SMTP_PORT --to $TO --schedule $SCHEDULE -k $KEYWORDS"
CRONJOB="$CRON $CMD"
COMMENT="# cronjob for https://github.com/lsg551/matricula-online-scraper/tree/main/examples/newsfeed-mailer"
COMMENT2="# see $SCRIPT_PATH for more details"

# add cronjob to user's crontab
TMP_FILE="user_crontab_l"
crontab -l > $TMP_FILE
echo "$COMMENT" >> $TMP_FILE
echo "$COMMENT2" >> $TMP_FILE
echo "$CRONJOB" >> $TMP_FILE
crontab $TMP_FILE
rm $TMP_FILE

echo "Cronjob added successfully: $CRONJOB"

0 comments on commit 564f858

Please sign in to comment.