Skip to content

Commit

Permalink
feature: 数据清理二期-订阅实例表(closed TencentBlueKing#1896)
Browse files Browse the repository at this point in the history
  • Loading branch information
neko12583 committed Nov 23, 2023
1 parent bbbd26a commit 1f7038d
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 0 deletions.
4 changes: 4 additions & 0 deletions apps/backend/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,7 @@ def _get_member__alias_map(cls) -> Dict[Enum, str]:
class ProxyConfigFile(enum.EnhanceEnum):
V1 = ["agent.conf", "btsvr.conf", "transit.conf", "opts.conf", "plugin_info.json", "data.conf", "dataflow.conf"]
V2 = ["gse_agent.conf", "gse_data_proxy.conf", "gse_file_proxy.conf"]


DEFAULT_ALIVE_TIME = 30
DEFAULT_CLEAN_RECORD_LIMIT = 5000
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
"""
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-节点管理(BlueKing-BK-NODEMAN) available.
Copyright (C) 2017-2022 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at https://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
from datetime import timedelta
from typing import Dict, Union

from celery.schedules import crontab
from celery.task import periodic_task
from django.core.cache import cache
from django.db import transaction
from django.db.models import Q
from django.utils import timezone

from apps.backend import constants
from common.log import logger
from apps.node_man import models


@periodic_task(
queue="default",
options={"queue": "default"},
run_every=crontab(minute="*/5"),
)
def clean_subscription_instance_record_data():
clean_subscription_data_map: Dict[str, Union[int, str, bool]] = (
models.GlobalSettings.get_config(models.GlobalSettings.KeyEnum.CLEAN_SUBSCRIPTION_DATA_MAP.value) or {}
)

# 清理订阅数据开关
enable_clean_subscription_data: bool = clean_subscription_data_map.get("enable_clean_subscription_data", True)
if not enable_clean_subscription_data:
logger.info("clean_subscription_data is not enable, delete subscription data will be skipped")
return

logger.info(
f"periodic_task -> clean_subscription_instance_record_data, start to clean subscription instance record data,"
f" clean rule: {models.GlobalSettings.KeyEnum.CLEAN_SUBSCRIPTION_DATA_MAP.value}"
f" -> [{clean_subscription_data_map}]"
)

limit: int = clean_subscription_data_map.get("limit", constants.DEFAULT_CLEAN_RECORD_LIMIT)
alive_days: int = clean_subscription_data_map.get("alive_days", constants.DEFAULT_ALIVE_TIME)
alive_days_to_datetime = timezone.now() - timedelta(days=alive_days)

# 有数据保留天数限制,策略类型任务 id 数据长期有效
policy_subscription_ids = cache.get("policy_subscription_ids")
if not policy_subscription_ids:
policy_subscription_ids = models.Subscription.objects.filter(
category=models.Subscription.CategoryType.POLICY
).values_list("id", flat=True)

# 本地测试使用 Q 对象查询时间比 exclude 与 filter 少 10%
need_clean_instance_records = models.SubscriptionInstanceRecord.objects.filter(
~Q(subscription_id__in=policy_subscription_ids) | Q(create_time__lt=alive_days_to_datetime) | Q(is_latest=False)
)[:limit]

# 需要去重,但使用 limit ,values_list 之后不能使用 distinct
need_clean_subscription_task_ids = set(need_clean_instance_records.values_list("task_id", flat=True))
need_clean_subscription_task = models.SubscriptionTask.objects.filter(
id__in=need_clean_subscription_task_ids
)

need_clean_pipeline_tree_ids = need_clean_subscription_task.values_list("pipeline_id", flat=True)
need_clean_pipeline_tree = models.PipelineTree.objects.filter(
id__in=need_clean_pipeline_tree_ids,
)

# 批量删除
with transaction.atomic():
need_clean_instance_records.delete()
need_clean_subscription_task.delete()
need_clean_pipeline_tree.delete()

cache.set("policy_subscription_ids", policy_subscription_ids, 60 * 15)

0 comments on commit 1f7038d

Please sign in to comment.