Skip to content

Commit

Permalink
add cdn nginx cache and logs --skip-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
OriHoch committed Apr 29, 2024
1 parent c4d8bfb commit 158eb45
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 23 deletions.
139 changes: 134 additions & 5 deletions cwm_worker_operator/deployments_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,23 +82,94 @@
# Pulled Feb 26, 2024
image: nginx@sha256:c26ae7472d624ba1fafd296e73cecc4f93f853088e6a9c13c0d52f6ca5865107
volumeMounts: {volume_mounts_json}
- name: fluentbit
# Pulled Apr 29, 2024
image: cr.fluentbit.io/fluent/fluent-bit:3.0.2@sha256:ed0214b0b0c6bff7474c739d9c8c2e128d378b053769c2b12da06296be883898
args: ["-c", "/fluent-bit/etc/fluent-bit.conf"]
volumeMounts:
- name: fluentbit-config
mountPath: /fluent-bit/etc/cwmparsers.conf
subPath: cwmparsers.conf
- name: fluentbit-config
mountPath: /fluent-bit/etc/fluent-bit.conf
subPath: fluent-bit.conf
- name: access-logs
mountPath: /var/log/nginx/cwm-access-logs
volumes: {volumes_json}
''').strip()
NGINX_INCLUDES_CONFIGMAP_TEMPLATE = dedent('''
apiVersion: v1
kind: ConfigMap
metadata:
name: nginx-includes
data:
cache_location.conf: |
proxy_cache minio;
# Buffering is required to enable cache
proxy_buffering on;
# Sets the number and size of the buffers used for reading a response from the
# proxied server, for a single connection.
proxy_buffers 8 16k;
# Sets the size of the buffer used for reading the first part of the response
# received from the proxied server. This part usually contains a small response
# header.
proxy_buffer_size 16k;
# When buffering of responses from the proxied server is enabled, limits the
# total size of buffers that can be busy sending a response to the client while
# the response is not yet fully read. In the meantime, the rest of the buffers
# can be used for reading the response and, if needed, buffering part of the
# response to a temporary file.
proxy_busy_buffers_size 32k;
proxy_cache_valid 200 1m;
# the following lines are required to fix handling of HEAD requests by minio
proxy_cache_convert_head off;
proxy_cache_key "$request_method$request_uri$is_args$args";
proxy_cache_methods GET HEAD;
# when caching is enabled some headers are not passed, we need to explicitly pass them
proxy_set_header If-Match $http_if_match;
proxy_set_header Range $http_range;
add_header X-Cache-Status $upstream_cache_status;
''').strip()
NGINX_HOST_BUCKET_HTTP_CONFIGMAP_TEMPLATE = dedent('''
apiVersion: v1
kind: ConfigMap
metadata:
name: {name}
data:
conf: |
default_conf: |
proxy_cache_path /var/cache/nginx/minio/cache levels=1:2 keys_zone=minio:10m max_size=1g inactive=1m use_temp_path=on;
proxy_temp_path /var/cache/nginx/minio/temp;
log_format json escape=json '{{'
'"bytes_sent": "$bytes_sent", '
'"request_length": "$request_length", '
'"request": "$request", '
'"status": "$status", '
'"server_name": "$server_name", '
'"scheme": "$scheme", '
'"https": "$https", '
'"hostname": "$hostname", '
'"host": "$host", '
'"upstream_cache_status": "$upstream_cache_status", '
'"request_time": "$request_time"'
'}}';
conf: |
server {{
listen 80;
server_name {server_name};
location /{bucket_name} {{
proxy_pass http://cwm-minio.minio-tenant-main.svc.cluster.local;
include /etc/nginx/includes/cache_location.conf;
}}
access_log syslog:server=unix:/var/log/nginx/cwm-access-logs/syslog.sock json;
}}
default_conf: ""
''').strip()
NGINX_HOST_BUCKET_HTTPS_CONFIGMAP_TEMPLATE = dedent('''
apiVersion: v1
Expand All @@ -120,12 +191,35 @@
ssl_prefer_server_ciphers on;
location /{bucket_name} {{
proxy_pass http://cwm-minio.minio-tenant-main.svc.cluster.local;
include /etc/nginx/includes/cache_location.conf;
}}
access_log syslog:server=unix:/var/log/nginx/cwm-access-logs/syslog.sock json;
}}
fullchain: "{fullchain}"
key: "{privkey}"
chain: "{chain}"
''').strip()
FLUENT_BIT_CONFIGMAP_TEMPLATE = dedent('''
apiVersion: v1
kind: ConfigMap
metadata:
name: {name}
data:
fluent-bit.conf: |
[SERVICE]
Parsers_File parsers.conf
Parsers_File cwmparsers.conf
[INPUT]
Name syslog
Path /var/log/nginx/cwm-access-logs/syslog.sock
Unix_Perm 0666
[OUTPUT]
Name kafka
Brokers {kafka_brokers}
Topics {kafka_topic}
''').strip()


def kubectl_create(obj, namespace_name='default'):
Expand Down Expand Up @@ -343,9 +437,44 @@ def deploy_cdn(self, deployment_config, dry_run=False):
out.append(f"create namespace: {namespace_name}")
else:
subprocess.check_call(['kubectl', 'create', 'namespace', namespace_name])
update_hash = []
volumes = []
volume_mounts = []
from .kafka_streamer import MINIO_TENANT_MAIN_AUDIT_LOGS_TOPIC
configmap_input = FLUENT_BIT_CONFIGMAP_TEMPLATE.format(
name='fluentbit-config',
kafka_brokers='minio-audit-kafka-bootstrap.strimzi.svc.cluster.local:9092',
kafka_topic=MINIO_TENANT_MAIN_AUDIT_LOGS_TOPIC
)
if dry_run:
out.append(f"create configmap: fluentbit-config")
out.append(configmap_input)
else:
subprocess.run([
'kubectl', '-n', namespace_name, 'apply', '-f', '-'
], input=configmap_input.encode())
update_hash = [
configmap_input
]
configmap_input = NGINX_INCLUDES_CONFIGMAP_TEMPLATE.format()
if dry_run:
out.append(f"create configmap: nginx-includes")
out.append(configmap_input)
else:
subprocess.run([
'kubectl', '-n', namespace_name, 'apply', '-f', '-'
], input=configmap_input.encode())
update_hash.append(configmap_input)
volumes = [
{'name': 'access-logs', 'emptyDir': {}},
{'name': 'fluentbit-config', 'configMap': {'name': 'fluentbit-config'}},
{'name': 'nginx-includes', 'configMap': {'name': 'nginx-includes'}},
{'name': 'nginx-cache', 'emptyDir': {}},
{'name': 'nginx-cache-temp', 'emptyDir': {}},
]
volume_mounts = [
{"name": "access-logs", "mountPath": "/var/log/nginx/cwm-access-logs"},
{'name': 'nginx-includes', 'mountPath': '/etc/nginx/includes'},
{'name': 'nginx-cache', 'mountPath': '/var/cache/nginx/minio/cache'},
{'name': 'nginx-cache-temp', 'mountPath': '/var/cache/nginx/minio/temp'},
]
for i, hostname in enumerate(deployment_config['minio']['nginx']['hostnames']):
id_ = hostname['id']
name = hostname['name']
Expand Down
60 changes: 42 additions & 18 deletions cwm_worker_operator/kafka_streamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
import os
import json
import functools
import subprocess
from textwrap import dedent

Expand Down Expand Up @@ -33,34 +34,56 @@ def get_request_type(name):
return 'misc'


def process_minio_tenant_main_audit_logs(data, agg_data):
def process_minio_tenant_main_audit_logs_update_agg_data(agg_data, namespace_name, request_type, tx, rx):
if namespace_name not in agg_data:
logs.debug(f"process_minio_tenant_main_audit_logs: {namespace_name}", 10)
agg_data[namespace_name] = DEPLOYMENT_API_METRICS_BASE_DATA.copy()
agg_data[namespace_name][f'bytes_in'] += int(rx)
agg_data[namespace_name][f'bytes_out'] += int(tx)
agg_data[namespace_name][f'num_requests_{request_type}'] += 1


def process_minio_tenant_main_audit_logs(data, agg_data, domains_config):
data_api = data.get('api', {})
bucket = data_api.get('bucket') or None
bucket = data_api.get('bucket')
if bucket:
namespace_name = common.get_namespace_name_from_bucket_name(bucket)
if namespace_name:
if namespace_name not in agg_data:
logs.debug(f"process_minio_tenant_main_audit_logs: {namespace_name}", 8)
agg_data[namespace_name] = DEPLOYMENT_API_METRICS_BASE_DATA.copy()
logs.debug('process_minio_tenant_main_audit_logs', 10, data_api=data_api)
tx = data_api.get('tx') or 0
rx = data_api.get('rx') or 0
agg_data[namespace_name][f'bytes_in'] += rx
agg_data[namespace_name][f'bytes_out'] += tx
request_type = get_request_type(data_api.get('name'))
agg_data[namespace_name][f'num_requests_{request_type}'] += 1
process_minio_tenant_main_audit_logs_update_agg_data(agg_data, namespace_name, request_type, tx, rx)
logs.debug('process_minio_tenant_main_audit_logs (minio)', 10, data_api=data_api)
elif data.get('message') and (data.get('ident') or '').startswith('nginx-'):
message = json.loads(data['message'])
host = message.get('host')
upstream_cache_status = message.get('upstream_cache_status')
if host and upstream_cache_status == 'HIT':
try:
worker_id = domains_config.get_cwm_api_volume_config(hostname=host).id
except:
worker_id = None
if worker_id:
namespace_name = common.get_namespace_name_from_worker_id(worker_id)
if namespace_name:
request = message.get('request') or ''
request_type = 'out' if request.startswith('GET ') else 'misc'
tx = message.get('bytes_sent') or 0
rx = message.get('request_length') or 0
process_minio_tenant_main_audit_logs_update_agg_data(agg_data, namespace_name, request_type, tx, rx)
logs.debug('process_minio_tenant_main_audit_logs (cdn)', 10, message=message)


def commit_minio_tenant_main_audit_logs(domains_config, agg_data):
logs.debug(f"commit_minio_tenant_main_audit_logs: {agg_data}", 8)
logs.debug(f"commit_minio_tenant_main_audit_logs: {agg_data}", 10)
for namespace_name, data in agg_data.items():
domains_config.update_deployment_api_metrics(namespace_name, data)
domains_config.set_deployment_last_action(namespace_name)


def process_data(topic, data, agg_data):
def process_data(topic, data, agg_data, domains_config):
if topic == MINIO_TENANT_MAIN_AUDIT_LOGS_TOPIC:
process_minio_tenant_main_audit_logs(data, agg_data)
process_minio_tenant_main_audit_logs(data, agg_data, domains_config)
else:
raise NotImplementedError(f"topic {topic} is not supported")

Expand All @@ -81,7 +104,7 @@ def delete_records(topic, latest_partition_offset):
]
if len(partitions) > 0:
offset_json = json.dumps({'partitions': partitions, 'version': 1})
logs.debug(f"Deleting records: {offset_json}", 8)
logs.debug(f"Deleting records: {offset_json}", 10)
subprocess.check_call([
'kubectl', 'exec', '-n', config.KAFKA_STREAMER_POD_NAMESPACE, config.KAFKA_STREAMER_POD_NAME, '--', 'bash', '-c', dedent(f'''
TMPFILE=$(mktemp) &&\
Expand All @@ -96,7 +119,7 @@ def run_single_iteration(domains_config: DomainsConfig, topic, daemon, no_kafka_
start_time = common.now()
assert topic, "topic is required"
assert config.KAFKA_STREAMER_BOOTSTRAP_SERVERS
logs.debug(f"running iteration for topic: {topic}", 8)
logs.debug(f"running iteration for topic: {topic}", 10)
consumer = Consumer({
'bootstrap.servers': config.KAFKA_STREAMER_BOOTSTRAP_SERVERS,
'group.id': config.KAFKA_STREAMER_OPERATOR_GROUP_ID,
Expand All @@ -106,20 +129,21 @@ def run_single_iteration(domains_config: DomainsConfig, topic, daemon, no_kafka_
latest_partition_offset = {}
try:
agg_data = {}
commit_ = functools.partial(commit, topic, consumer, domains_config, agg_data, no_kafka_commit=no_kafka_commit)
while (common.now() - start_time).total_seconds() < config.KAFKA_STREAMER_POLL_TIME_SECONDS and not daemon.terminate_requested:
msg = consumer.poll(timeout=config.KAFKA_STREAMER_CONSUMER_POLL_TIMEOUT_SECONDS)
if msg is None:
# logs.debug("Waiting for messages...", 10)
pass
logs.debug("Waiting for messages...", 10)
commit_()
elif msg.error():
raise Exception(f"Message ERROR: {msg.error()}")
else:
offset = msg.offset()
partition = msg.partition()
latest_partition_offset[partition] = offset
data = json.loads(msg.value())
process_data(topic, data, agg_data)
commit(topic, consumer, domains_config, agg_data, no_kafka_commit=no_kafka_commit)
process_data(topic, data, agg_data, domains_config)
commit_()
except KeyboardInterrupt:
pass
finally:
Expand Down

0 comments on commit 158eb45

Please sign in to comment.