Skip to content

Commit

Permalink
Merge branch 'master' into clearer-queue
Browse files Browse the repository at this point in the history
# Conflicts:
#	webtool/views/api_tool.py
#	webtool/views/views_dataset.py
  • Loading branch information
stijn-uva committed Oct 9, 2023
2 parents cbb81e3 + fc4d35c commit 69f60da
Show file tree
Hide file tree
Showing 307 changed files with 15,788 additions and 3,805 deletions.
8 changes: 8 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# 4CAT Version: Update with latest release tag or 'latest'
# https://hub.docker.com/repository/docker/digitalmethodsinitiative/4cat/tags?page=1&ordering=last_updated
DOCKER_TAG=stable
POSTGRES_TAG=latest

# Database setup
POSTGRES_USER=fourcat
Expand All @@ -9,6 +10,7 @@ POSTGRES_DB=fourcat
POSTGRES_HOST_AUTH_METHOD=trust
# POSTGRES_HOST should correspond with the database container name set in docker-compose.yml
POSTGRES_HOST=db
POSTGRES_PORT=5432 # Docker postgres image uses port 5432

# Server information
# SERVER_NAME is only used on first run; afterwards it can be set in the frontend
Expand All @@ -23,3 +25,9 @@ PUBLIC_API_PORT=4444

# Telegram apparently needs its own port
TELEGRAM_PORT=443

# Docker Volume Names
DOCKER_DB_VOL=4cat_4cat_db
DOCKER_DATA_VOL=4cat_4cat_data
DOCKER_CONFIG_VOL=4cat_4cat_share
DOCKER_LOGS_VOL=4cat_4cat_logs
14 changes: 9 additions & 5 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
---
name: Bug report
about: Create a report to help us improve
about: Report a specific issue where something is not working.
title: ''
labels: ''
assignees: ''

---

**Describe the bug**
A clear and concise description of what the bug is.
A clear and concise description of what the bug is. What are you trying to do and what is not working?

**To Reproduce**
Steps to reproduce the behavior:
Expand All @@ -21,8 +21,12 @@ Steps to reproduce the behavior:
A clear and concise description of what you expected to happen.

**4CAT Environment**
- How are you accessing 4CAT? [e.g. 4cat.oilab.nl or your own server/desktop]
- If accessing via your own server/desktop, what is the environment and are you using Docker?
- How are you accessing 4CAT? If possible, include the 4CAT web interface URL. Otherwise, explain if you are running 4CAT on your own device or accessing it remotely.
- What version of 4CAT are you using? Find the version here:
- In the latest versions of 4CAT, the version number is visible at the bottom of the interface
- If you don't see the version there, that in itself is useful information
- If you are using [Zeeschuimer](github.com/digitalmethodsinitiative/zeeschuimer), include its version number as well. Find it at the top of the interface.
- If accessing via your own server/desktop, what is the environment (e.g. the operating system) and are you using Docker?

**Screenshots, links to datasets, and any additional context**
If your 4CAT is available online (e.g. 4cat.oilab.nl) and it is related to a specific dataset, please post the link.
If your 4CAT is available online and your issue is related to a specific dataset, please post the link.
25 changes: 23 additions & 2 deletions .github/workflows/docker_pr_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,26 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Run docker-compose up
run: docker-compose -f docker-compose_build.yml up -d
- name: Run docker compose up
run: docker compose -f docker-compose_build.yml up -d
- name: Wait and check log
run: |
test_case=" INFO at api.py:65: Local API listening for requests at backend:4444"
sleep 30 && var=$(docker exec 4cat_backend tail -n 1 logs/backend_4cat.log)
echo "::group::Backend test"
if [ "$(echo "$var" | tr "|" "\n" | sed -n '2p')" = "$test_case" ]; then
echo "Backend running as expected"
else
echo "::error::Backend failed to start"
echo "Test:$test_case"
echo "Log :$(echo "$var" | tr "|" "\n" | sed -n '2p')"
exit 1
fi
echo "::endgroup::"
- name: Print log on failure
if: failure()
run: |
docker cp 4cat_backend:/usr/src/app/logs/backend_4cat.log ./backend_4cat.log
echo "::group::Backend logs"
cat backend_4cat.log
echo "::endgroup::"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

# actual files that are part of 4CAT but should not be included
config.py
module_config.bin
.current-version
deploy.sh
module_cache.pb
Expand Down
2 changes: 1 addition & 1 deletion .zenodo.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"license": "MPL-2.0",
"title": "4CAT Capture and Analysis Toolkit",
"upload_type": "software",
"version": "v1.33",
"version": "v1.36",
"keywords": [
"webmining",
"scraping",
Expand Down
2 changes: 1 addition & 1 deletion 4cat-daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
# we can only import this here, because the version check above needs to be
# done first, as it may detect that the user needs to migrate first before
# the config manager can be run properly
import common.config_manager as config
from common.config_manager import config
from common.lib.helpers import call_api
# ---------------------------------------------
# Check validity of configuration file
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
4CAT: Capture and Analysis Toolkit
Copyright (c) 2018-2020, Open Intelligence Lab, https://oilab.eu
Copyright (c) 2018-2023, Open Intelligence Lab, https://oilab.eu

This Source Code Form, except for third-party libraries included and
listed in the LICENSE-3DPARTY file, is subject to the terms of the
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ actively and can be used to collect data with 4CAT:
The following platforms are supported through other tools, with which you can
collect data to import data into 4CAT for analysis:

* Instagram, TikTok, 9gag, Imgur, LinkedIn, Parler (via
* Instagram, TikTok, 9gag, Imgur, LinkedIn, Parler, Douyin and Twitter (via
[Zeeschuimer](https://github.com/digitalmethodsinitiative/zeeschuimer))
* Facebook and Instagram (via [CrowdTangle](https://www.crowdtangle.com) exports)

Expand Down Expand Up @@ -66,6 +66,9 @@ Please check our
[issues](https://github.com/digitalmethodsinitiative/4cat/issues) and create
one if you experience any problems (pull requests are also very welcome).

### Upgrading 4CAT
Instructions on upgrading 4CAT from previous versions [can be found in our wiki](https://github.com/digitalmethodsinitiative/4cat/wiki/Upgrading-4CAT).

## Modules
4CAT is a modular tool and easy to extend. The following two folders in the
repository are of interest for this:
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
1.33
1.37

This file should not be modified. It is used by 4CAT to determine whether it
needs to run migration scripts to e.g. update the database structure to a more
Expand Down
9 changes: 7 additions & 2 deletions backend/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from backend.lib.manager import WorkerManager
from common.lib.logger import Logger

import common.config_manager as config
from common.config_manager import config

def run(as_daemon=True):
pidfile = Path(config.get('PATH_ROOT'), config.get('PATH_LOCKFILE'), "4cat.pid")
Expand Down Expand Up @@ -54,13 +54,18 @@ def run(as_daemon=True):
log = Logger(output=not as_daemon)

log.info("4CAT Backend started, logger initialised")
db = Database(logger=log, appname="main")
db = Database(logger=log, appname="main",
dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT)
queue = JobQueue(logger=log, database=db)

# clean up after ourselves
db.commit()
queue.release_all()

# ensure database consistency for settings table
config.with_db(db)
config.ensure_database()

# make it happen
# this is blocking until the back-end is shut down
WorkerManager(logger=log, database=db, queue=queue, as_daemon=as_daemon)
Expand Down
71 changes: 33 additions & 38 deletions backend/database.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@

-- 4CAT settings table
CREATE TABLE IF NOT EXISTS settings (
name TEXT UNIQUE PRIMARY KEY,
value TEXT DEFAULT '{}'
name TEXT DEFAULT '' NOT NULL,
value TEXT DEFAULT '{}' NOT NULL,
tag TEXT DEFAULT '' NOT NULL
);

CREATE UNIQUE INDEX IF NOT EXISTS unique_setting
ON settings (
name, tag
);

-- jobs table
CREATE TABLE IF NOT EXISTS jobs (
id SERIAL PRIMARY KEY,
Expand Down Expand Up @@ -37,8 +43,8 @@ CREATE TABLE IF NOT EXISTS datasets (
id SERIAL PRIMARY KEY,
key text,
type text DEFAULT 'search',
key_parent text DEFAULT '',
owner VARCHAR DEFAULT 'anonymous',
key_parent text DEFAULT '' NOT NULL,
creator VARCHAR DEFAULT 'anonymous',
query text,
job integer DEFAULT 0,
parameters text,
Expand All @@ -54,6 +60,15 @@ CREATE TABLE IF NOT EXISTS datasets (
annotation_fields text DEFAULT ''
);

CREATE TABLE datasets_owners (
"name" text DEFAULT 'anonymous'::text,
key text NOT NULL,
role TEXT DEFAULT 'owner'
);

CREATE UNIQUE INDEX datasets_owners_user_key_idx ON datasets_owners("name" text_ops,key text_ops);


-- annotations
CREATE TABLE IF NOT EXISTS annotations (
key text UNIQUE PRIMARY KEY,
Expand All @@ -73,12 +88,13 @@ CREATE TABLE IF NOT EXISTS metrics (
CREATE TABLE IF NOT EXISTS users (
name TEXT UNIQUE PRIMARY KEY,
password TEXT,
is_admin BOOLEAN DEFAULT FALSE,
register_token TEXT DEFAULT '',
timestamp_created INTEGER DEFAULT 0,
timestamp_token INTEGER DEFAULT 0,
timestamp_seen INTEGER DEFAULT 0,
userdata TEXT DEFAULT '{}',
is_deactivated BOOLEAN DEFAULT FALSE
is_deactivated BOOLEAN DEFAULT FALSE,
tags JSONB DEFAULT '[]'
);

INSERT INTO users
Expand Down Expand Up @@ -136,35 +152,14 @@ CREATE FUNCTION count_estimate(query text) RETURNS bigint AS $$
END;
$$ LANGUAGE plpgsql VOLATILE STRICT;


-- fourcat settings insert default settings
-- TODO SHOULD BE ABLE TO REMOVE; all these should have corresponding values in common/lib/config_definitions given defaults
INSERT INTO settings
(name, value)
Values
('4cat.datasources', '["bitchute", "custom", "douban", "customimport", "reddit", "telegram", "twitterv2"]'),
('4cat.name', '"4CAT"'),
('4cat.name_long', '"4CAT: Capture and Analysis Toolkit"'),
('4cat.github_url', '"https://github.com/digitalmethodsinitiative/4cat"'),
('4cat.phone_home_url', '"https://ping.4cat.nl"'),
('path.versionfile', '".git-checked-out"'),
('expire.timeout', '0'),
('expire.allow_optout', 'true'),
('expire.datasources', '{"tumblr": {"timeout": 259200, "allow_optout": false}}'),
('logging.slack.level', '"WARNING"'),
('logging.slack.webhook', 'null'),
('mail.admin_email', 'null'),
('mail.ssl', 'false'),
('mail.username', 'null'),
('mail.password', 'null'),
('mail.noreply', '"noreply@localhost"'),
('fourchan.image_interval', '3600'),
('explorer.max_posts', '100000'),
('flask.flask_app', '"webtool/fourcat"'),
('flask.secret_key', concat('"', substr(md5(random()::text), 0, 25), '"')),
('flask.https', 'false'),
('flask.server_name', '"localhost"'),
('flask.autologin.name', '"Automatic login"'),
('flask.autologin.hostnames', '["localhost"]'),
('flask.autologin.api', '["localhost"]')
ON CONFLICT DO NOTHING;
-- default admin privileges
INSERT INTO settings (name, value, tag) VALUES
('privileges.admin.can_view_status', 'true', 'admin'),
('privileges.admin.can_manage_users', 'true', 'admin'),
('privileges.admin.can_manage_settings', 'true', 'admin'),
('privileges.admin.can_manage_notifications', 'true', 'admin'),
('privileges.admin.can_manage_tags', 'true', 'admin'),
('privileges.admin.can_restart', 'true', 'admin'),
('privileges.admin.can_manipulate_all_datasets', 'true', 'admin'),
('privileges.can_view_all_datasets', 'true', 'admin'),
('privileges.can_view_private_datasets', 'true', 'admin');
2 changes: 0 additions & 2 deletions backend/lib/database_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import pymysql.connections as mysqlconnections
import pymysql

import common.config_manager as config

class MySQLDatabase:
"""
Simple database handler for MySQL connections
Expand Down
12 changes: 10 additions & 2 deletions backend/lib/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,14 @@ def __init__(self, queue, database, logger, as_daemon=True):
if hasattr(worker, "ensure_job"):
self.queue.add_job(jobtype=worker_name, **worker.ensure_job)

self.log.info('4CAT Started')
self.log.info("4CAT Started")

# flush module collector log buffer
# the logger is not available when this initialises
# but it is now!
if all_modules.log_buffer:
self.log.warning(all_modules.log_buffer)
all_modules.log_buffer = ""

# it's time
self.loop()
Expand Down Expand Up @@ -87,14 +94,15 @@ def delegate(self):
# worker slots, start a new worker to run it
if len(self.worker_pool[jobtype]) < worker_class.max_workers:
try:
self.log.debug("Starting new worker for job %s" % jobtype)
job.claim()
worker = worker_class(logger=self.log, manager=self, job=job, modules=all_modules)
worker.start()
self.worker_pool[jobtype].append(worker)
except JobClaimedException:
# it's fine
pass
else:
self.log.error("Unknown job type: %s" % jobtype)

time.sleep(1)

Expand Down
2 changes: 1 addition & 1 deletion backend/abstract/preset.py → backend/lib/preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Queue a series of processors at once via a preset
"""
import abc
from backend.abstract.processor import BasicProcessor
from backend.lib.processor import BasicProcessor

from common.lib.dataset import DataSet

Expand Down
Loading

0 comments on commit 69f60da

Please sign in to comment.