WIP

Signed-off-by: Tyler Gu <jiaweig3@illinois.edu>
xlab-uiuc · Jul 25, 2023 · ab0d17f · ab0d17f
1 parent 23393fc
commit ab0d17f
Show file tree

Hide file tree

Showing 122 changed files with 7,159 additions and 39 deletions.
diff --git a/acto/engine.py b/acto/engine.py
@@ -226,7 +226,7 @@ def __init__(self, context: dict, input_model: InputModel, deploy: Deploy, runne
 
         self.curr_trial = 0
 
-    def run(self, mode: str = InputModel.NORMAL):
+    def run(self, errors: List[RunResult], mode: str = InputModel.NORMAL):
         logger = get_thread_logger(with_prefix=True)
 
         self.input_model.set_worker_id(self.worker_id)
@@ -267,6 +267,7 @@ def run(self, mode: str = InputModel.NORMAL):
             delete_operator_pod(apiclient, self.context['namespace'])
             save_result(trial_dir, trial_err, num_tests, trial_elapsed)
             self.curr_trial = self.curr_trial + 1
+            errors.append(trial_err)
 
             if self.input_model.is_empty():
                 logger.info('Test finished')
@@ -674,6 +675,7 @@ def __init__(self,
         if operator_config.k8s_fields is not None:
             module = importlib.import_module(operator_config.k8s_fields)
             if hasattr(module,'BLACKBOX') and actoConfig.mode == 'blackbox':
+                applied_custom_k8s_fields = True
                 for k8s_field in module.BLACKBOX:
                     self.input_model.apply_k8s_schema(k8s_field)
             elif hasattr(module,'WHITEBOX') and actoConfig.mode == 'whitebox':
@@ -814,7 +816,8 @@ def __learn(self, context_file, helper_crd, analysis_only=False):
             with open(context_file, 'w') as context_fout:
                 json.dump(self.context, context_fout, cls=ContextEncoder, indent=4, sort_keys=True)
 
-    def run(self, modes: list = ['normal', 'overspecified', 'copiedover']):
+    def run(self, modes: list = ['normal', 'overspecified', 'copiedover']) -> List[RunResult]:
+        # TODO: return the alarms here
         logger = get_thread_logger(with_prefix=True)
 
         # Build an archive to be preloaded
@@ -829,6 +832,7 @@ def run(self, modes: list = ['normal', 'overspecified', 'copiedover']):
 
         start_time = time.time()
 
+        errors: List[RunResult] = []
         runners: List[TrialRunner] = []
         for i in range(self.num_workers):
             runner = TrialRunner(self.context, self.input_model, self.deploy, self.runner_type,
@@ -841,7 +845,7 @@ def run(self, modes: list = ['normal', 'overspecified', 'copiedover']):
         if 'normal' in modes:
             threads = []
             for runner in runners:
-                t = threading.Thread(target=runner.run, args=([]))
+                t = threading.Thread(target=runner.run, args=([errors]))
                 t.start()
                 threads.append(t)
 
@@ -853,7 +857,7 @@ def run(self, modes: list = ['normal', 'overspecified', 'copiedover']):
         if 'overspecified' in modes:
             threads = []
             for runner in runners:
-                t = threading.Thread(target=runner.run, args=([InputModel.OVERSPECIFIED]))
+                t = threading.Thread(target=runner.run, args=([errors, InputModel.OVERSPECIFIED]))
                 t.start()
                 threads.append(t)
 
@@ -865,7 +869,7 @@ def run(self, modes: list = ['normal', 'overspecified', 'copiedover']):
         if 'copiedover' in modes:
             threads = []
             for runner in runners:
-                t = threading.Thread(target=runner.run, args=([InputModel.COPIED_OVER]))
+                t = threading.Thread(target=runner.run, args=([errors, InputModel.COPIED_OVER]))
                 t.start()
                 threads.append(t)
 
@@ -877,7 +881,7 @@ def run(self, modes: list = ['normal', 'overspecified', 'copiedover']):
         if InputModel.ADDITIONAL_SEMANTIC in modes:
             threads = []
             for runner in runners:
-                t = threading.Thread(target=runner.run, args=([InputModel.ADDITIONAL_SEMANTIC]))
+                t = threading.Thread(target=runner.run, args=([errors, InputModel.ADDITIONAL_SEMANTIC]))
                 t.start()
                 threads.append(t)
 
@@ -904,3 +908,4 @@ def run(self, modes: list = ['normal', 'overspecified', 'copiedover']):
             json.dump(testrun_info, info_file, cls=ActoEncoder, indent=4)
 
         logger.info('All tests finished')
+        return errors
diff --git a/acto/post_process/count_invalid_tests.py b/acto/post_process/count_invalid_tests.py
@@ -0,0 +1,60 @@
+import json
+
+from acto.common import InvalidInputResult
+from .post_process import PostProcessor
+from acto.utils.config import OperatorConfig
+
+class InvalidTestCounter(PostProcessor):
+
+    def __init__(self, testrun_dir: str, config: OperatorConfig):
+        super().__init__(testrun_dir, config)
+        self._invalid_tests = []
+        self._valid_tests = []
+
+    def post_process(self):
+        test_case_set = set()
+        invalid_test_case_set = set()
+        total_count = 0
+        invalid_count = 0
+        for trial in self.trial_to_steps:
+            for step in self.trial_to_steps[trial]:
+                runtime_result = step.runtime_result
+                testcase_signature = runtime_result.testcase_signature
+
+                if runtime_result.revert == True:
+                    continue
+                if testcase_signature is None:
+                    continue
+                if 'k8s' in testcase_signature['testcase']:
+                    continue
+                test_case_set.add(testcase_signature['field'] + testcase_signature['testcase'])
+                total_count += 1
+                if isinstance(runtime_result.input_result, InvalidInputResult):
+                    # print(f'Invalid log message: {step.trial_dir} gen: {step.gen}')
+                    # invalid_test_case_set.add(testcase_signature['field'] + testcase_signature['testcase'])
+                    # invalid_count += 1
+                    pass
+                elif isinstance(runtime_result.log_result, InvalidInputResult):
+                    # print(f'Invalid log message: {step.trial_dir} gen: {step.gen}')
+                    invalid_test_case_set.add(testcase_signature['field'] + testcase_signature['testcase'])
+                    invalid_count += 1
+
+        print('Total number of tests: ', len(test_case_set))
+        print('Total number of invalid tests: ', len(invalid_test_case_set))
+        print('Total count: ', total_count)
+        print('Invalid count: ', invalid_count)
+
+
+
+if __name__ == '__main__':
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config', type=str, required=True)
+    parser.add_argument('--testrun-dir', type=str, required=True)
+    args = parser.parse_args()
+
+    with open(args.config, 'r') as config_file:
+        config = OperatorConfig(**json.load(config_file))
+    p = InvalidTestCounter(testrun_dir=args.testrun_dir, config=config)
+    p.post_process()
diff --git a/acto/post_process/post_diff_test.py b/acto/post_process/post_diff_test.py
@@ -88,6 +88,10 @@ def compare_system_equality(curr_system_state: Dict,
                     obj['data'][key] = json.loads(data)
                 except:
                     pass
+
+    if len(curr_system_state['secret']) != len(prev_system_state['secret']):
+        logger.debug(f"failed attempt recovering to seed state - secret count mismatch")
+        return RecoveryResult(delta='Secret number changed', from_=prev_system_state, to_=curr_system_state)
 
     # remove custom resource from both states
     curr_system_state.pop('custom_resource_spec', None)
@@ -264,8 +268,8 @@ def __init__(self, context: Dict, deploy: Deploy, workdir: str, cluster: base.Ku
         self._cluster = cluster
         self._worker_id = worker_id
         self._cluster_name = f"acto-cluster-{worker_id}"
-        self._kubeconfig = os.path.join(os.path.expanduser('~'), '.kube', self._cluster_name)
         self._context_name = cluster.get_context_name(f"acto-cluster-{worker_id}")
+        self._kubeconfig = os.path.join(os.path.expanduser('~'), '.kube', self._context_name)
         self._generation = 0
 
     def run_cr(self, cr, trial, gen):
@@ -304,8 +308,8 @@ def __init__(self, workqueue: multiprocessing.Queue, context: Dict, deploy: Depl
         self._cluster = cluster
         self._worker_id = worker_id
         self._cluster_name = f"acto-cluster-{worker_id}"
-        self._kubeconfig = os.path.join(os.path.expanduser('~'), '.kube', self._cluster_name)
         self._context_name = cluster.get_context_name(f"acto-cluster-{worker_id}")
+        self._kubeconfig = os.path.join(os.path.expanduser('~'), '.kube', self._context_name)
         self._images_archive = os.path.join(workdir, 'images.tar')
 
     def run(self):
@@ -360,15 +364,15 @@ def run(self):
 
 class PostDiffTest(PostProcessor):
 
-    def __init__(self, testrun_dir: str, config: OperatorConfig):
+    def __init__(self, testrun_dir: str, config: OperatorConfig, ignore_invalid: bool = False):
         super().__init__(testrun_dir, config)
         logger = get_thread_logger(with_prefix=True)
 
         self.all_inputs = []
         for trial, steps in self.trial_to_steps.items():
             for step in steps:
                 invalid, _ = step.runtime_result.is_invalid()
-                if invalid:
+                if invalid and not ignore_invalid:
                     continue
                 self.all_inputs.append({
                     'trial': trial,
@@ -511,8 +515,7 @@ def check_diff_test_step(diff_test_result: Dict,
         trial_dir = original_result.trial_dir
         gen = original_result.gen
 
-        invalid, _ = original_result.runtime_result.is_invalid()
-        if isinstance(original_result.runtime_result.health_result, ErrorResult) or invalid:
+        if isinstance(original_result.runtime_result.health_result, ErrorResult):
             return
 
         original_operator_log = original_result.operator_log

diff --git a/acto/reproduce.py b/acto/reproduce.py
@@ -11,9 +11,7 @@
 
 from acto.engine import Acto
 from acto.input.valuegenerator import extract_schema_with_value_generator
-
-from acto.schema import BaseSchema, OpaqueSchema
-from acto.input.testplan import TestGroup, TreeNode
+from acto.post_process.post_diff_test import PostDiffTest
 
 from acto.input.value_with_schema import ValueWithSchema
 from acto.input import TestCase
@@ -123,9 +121,6 @@ def next_test(self) -> list:
     def apply_k8s_schema(self, k8s_field):
         pass
 
-    def apply_custom_field(self, custom_field: CustomField):
-        pass
-
 
 def repro_precondition(v):
     return True
@@ -139,8 +134,17 @@ def repro_setup(v):
     return None
 
 
-def reproduce(workdir_path: str, reproduce_dir: str, operator_config: OperatorConfig, **kwargs):
-
+def reproduce(workdir_path: str, reproduce_dir: str, operator_config: OperatorConfig, **kwargs) -> bool:
+    os.makedirs(workdir_path, exist_ok=True)
+    # Setting up log infra
+    logging.basicConfig(
+        filename=os.path.join(workdir_path, 'test.log'),
+        level=logging.DEBUG,
+        filemode='w',
+        format='%(asctime)s %(levelname)-7s, %(name)s, %(filename)-9s:%(lineno)d, %(message)s')
+    logging.getLogger("kubernetes").setLevel(logging.ERROR)
+    logging.getLogger("sh").setLevel(logging.ERROR)
+
     with open(operator_config, 'r') as config_file:
         config = OperatorConfig(**json.load(config_file))
     context_cache = os.path.join(os.path.dirname(config.seed_custom_resource), 'context.json')
@@ -158,15 +162,30 @@ def reproduce(workdir_path: str, reproduce_dir: str, operator_config: OperatorCo
                 num_cases=1,
                 dryrun=False,
                 analysis_only=False,
-                is_reproduce=is_reproduce,
+                is_reproduce=True,
                 input_model=input_model,
                 apply_testcase_f=apply_testcase_f,
                 reproduce_dir=reproduce_dir)
 
-    acto.run(modes=['normal'])
+    errors = acto.run(modes=['normal'])
+    if any(x is not None for x in errors):
+        return True
+    else:
+        return False
 
+def reproduce_postdiff(workdir_path: str, operator_config: OperatorConfig, **kwargs) -> bool:
+    with open(operator_config, 'r') as config_file:
+        config = OperatorConfig(**json.load(config_file))
+    post_diff_test_dir = os.path.join(workdir_path, 'post_diff_test')
+    logs = glob(workdir_path + '/*/operator-*.log')
+    for log in logs:
+        open(log, 'w').close()
+    p = PostDiffTest(testrun_dir=workdir_path, config=config, ignore_invalid=True)
+    p.post_process(post_diff_test_dir, num_workers=1)
+    p.check(post_diff_test_dir, num_workers=1)
+
+    return len(glob(os.path.join(post_diff_test_dir, 'compare-results-*.json'))) > 0
 
-# TODO add main function
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
         description='Automatic, Continuous Testing for k8s/openshift Operators')
@@ -188,23 +207,11 @@ def reproduce(workdir_path: str, reproduce_dir: str, operator_config: OperatorCo
     args = parser.parse_args()
 
     workdir_path = 'testrun-%s' % datetime.now().strftime('%Y-%m-%d-%H-%M')
-    os.makedirs(workdir_path, exist_ok=True)
-    # Setting up log infra
-    logging.basicConfig(
-        filename=os.path.join(workdir_path, 'test.log'),
-        level=logging.DEBUG,
-        filemode='w',
-        format='%(asctime)s %(levelname)-7s, %(name)s, %(filename)-9s:%(lineno)d, %(message)s')
-    logging.getLogger("kubernetes").setLevel(logging.ERROR)
-    logging.getLogger("sh").setLevel(logging.ERROR)
-
-    logger = get_thread_logger(with_prefix=False)
 
     is_reproduce = True
     start_time = datetime.now()
     reproduce(workdir_path=workdir_path,
               reproduce_dir=args.reproduce_dir,
               operator_config=args.config,
               cluster_runtime=args.cluster_runtime)
-    end_time = datetime.now()
-    logger.info('Acto finished in %s', end_time - start_time)
+    end_time = datetime.now()
diff --git a/data/cass-operator/config.json b/data/cass-operator/config.json
@@ -30,6 +30,7 @@
         "\\['service_account'\\]\\['cass\\-operator\\-controller\\-manager'\\]\\['secrets'\\]\\[.*\\]\\['name'\\]",
         "\\['service_account'\\]\\['default'\\]\\['secrets'\\]\\[.*\\]\\['name'\\]",
         "\\['stateful_set'\\]\\['cluster1\\-test\\-cluster\\-default\\-sts'\\]\\['spec'\\]\\['update_strategy'\\]\\['rolling_update'\\]\\['max_unavailable'\\]",
-        "\\['stateful_set'\\]\\['cluster1\\-test\\-cluster\\-default\\-sts'\\]\\['spec'\\]\\['persistent_volume_claim_retention_policy'\\]"
+        "\\['stateful_set'\\]\\['cluster1\\-test\\-cluster\\-default\\-sts'\\]\\['spec'\\]\\['persistent_volume_claim_retention_policy'\\]",
+        "\\['cassandra\\.datastax\\.com/resource\\-hash'\\]"
     ]
 }
diff --git a/data/mongodb-community-operator/config.json b/data/mongodb-community-operator/config.json
@@ -16,5 +16,8 @@
         "entrypoint": "cmd/manager",
         "type": "MongoDBCommunity",
         "package": "github.com/mongodb/mongodb-kubernetes-operator/api/v1"
-    }
+    },
+    "diff_ignore_fields": [
+        "\\['metadata'\\]\\['annotations'\\]\\['agent\\.mongodb\\.com\\\/version'\\]"
+    ]
 }
diff --git a/data/percona-server-mongodb-operator/config.json b/data/percona-server-mongodb-operator/config.json
@@ -24,6 +24,7 @@
         "\\['secret'\\]\\['percona\\-server\\-mongodb\\-operator\\-token-.*'\\]",
         "\\['secret'\\]\\[.*\\]\\['data'\\]",
         "\\['metadata'\\]\\['annotations'\\]\\['percona\\.com\\\/ssl\\-hash'\\]",
-        "\\['metadata'\\]\\['annotations'\\]\\['percona\\.com\\\/ssl\\-internal\\-hash'\\]"
+        "\\['metadata'\\]\\['annotations'\\]\\['percona\\.com\\\/ssl\\-internal\\-hash'\\]",
+        "\\['metadata'\\]\\['annotations'\\]\\['percona\\.com\\\/last\\-config\\-hash'\\]"
     ]
 }
diff --git a/test/cassop-334/mutated-0.yaml b/test/cassop-334/mutated-0.yaml
@@ -0,0 +1,27 @@
+apiVersion: cassandra.datastax.com/v1beta1
+kind: CassandraDatacenter
+metadata:
+  name: test-cluster
+spec:
+  clusterName: cluster1
+  config:
+    cassandra-yaml:
+      authenticator: org.apache.cassandra.auth.PasswordAuthenticator
+      authorizer: org.apache.cassandra.auth.CassandraAuthorizer
+      role_manager: org.apache.cassandra.auth.CassandraRoleManager
+    jvm-options:
+      initial_heap_size: 800M
+      max_heap_size: 800M
+  managementApiAuth:
+    insecure: {}
+  serverType: cassandra
+  serverVersion: 3.11.7
+  size: 3
+  storageConfig:
+    cassandraDataVolumeClaimSpec:
+      accessModes:
+      - ReadWriteOnce
+      resources:
+        requests:
+          storage: 3Gi
+      storageClassName: server-storage
diff --git a/test/cassop-334/mutated-1.yaml b/test/cassop-334/mutated-1.yaml
@@ -0,0 +1,29 @@
+apiVersion: cassandra.datastax.com/v1beta1
+kind: CassandraDatacenter
+metadata:
+  name: test-cluster
+spec:
+  clusterName: cluster1
+  config:
+    cassandra-yaml:
+      authenticator: org.apache.cassandra.auth.PasswordAuthenticator
+      authorizer: org.apache.cassandra.auth.CassandraAuthorizer
+      role_manager: org.apache.cassandra.auth.CassandraRoleManager
+    jvm-options:
+      initial_heap_size: 800M
+      max_heap_size: 800M
+  managementApiAuth:
+    insecure: {}
+  serverType: cassandra
+  serverVersion: 3.11.7
+  nodeAffinityLabels:
+    dc: someLabel1
+  size: 3
+  storageConfig:
+    cassandraDataVolumeClaimSpec:
+      accessModes:
+      - ReadWriteOnce
+      resources:
+        requests:
+          storage: 3Gi
+      storageClassName: server-storage