From 5c6149161a99c419d5b090a3a7b21ac9a3c836aa Mon Sep 17 00:00:00 2001 From: Alexander Kiel Date: Thu, 24 Oct 2024 21:42:17 +0200 Subject: [PATCH] [WIP] Fix Negative History Total After Patient Purge --- ...eck-resource-totals-after-patient-purge.sh | 9 + .github/scripts/patient-purge-all.sh | 7 + .github/workflows/build.yml | 65 ++++++ docs/implementation/database.md | 10 +- modules/db/src/blaze/db/api.clj | 3 + .../src/blaze/db/node/tx_indexer/verify.clj | 26 ++- modules/db/test/blaze/db/api_test.clj | 217 +++++++++++++++++- 7 files changed, 311 insertions(+), 26 deletions(-) create mode 100755 .github/scripts/check-resource-totals-after-patient-purge.sh create mode 100755 .github/scripts/patient-purge-all.sh diff --git a/.github/scripts/check-resource-totals-after-patient-purge.sh b/.github/scripts/check-resource-totals-after-patient-purge.sh new file mode 100755 index 000000000..cfeb97d32 --- /dev/null +++ b/.github/scripts/check-resource-totals-after-patient-purge.sh @@ -0,0 +1,9 @@ +#!/bin/bash -e + +SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" +. "$SCRIPT_DIR/util.sh" + +BASE="http://localhost:8080/fhir" +ACTUAL_TOTALS="$(curl -sH 'Accept: application/fhir+json' "$BASE/\$totals" | jq -r '.parameter[] | [.name, .valueUnsignedInt] | @csv')" + +test "resource totals" "$ACTUAL_TOTALS" "\"Medication\",1016" diff --git a/.github/scripts/patient-purge-all.sh b/.github/scripts/patient-purge-all.sh new file mode 100755 index 000000000..8b5d0caad --- /dev/null +++ b/.github/scripts/patient-purge-all.sh @@ -0,0 +1,7 @@ +#!/bin/bash -e + +BASE="http://localhost:8080/fhir" + +for ID in $(curl -s "$BASE/Patient?_count=10000&_elements=id" | jq -r '.entry[].resource.id'); do + curl -s -XPOST "$BASE/Patient/$ID/\$purge" -o /dev/null +done diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 378d0e184..ab5b7514e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1163,6 +1163,71 @@ jobs: - name: Docker Stats run: docker stats --no-stream + integration-test-patient-purge: + needs: build + runs-on: ubuntu-24.04 + + steps: + - name: Check out Git repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + + - name: Install Blazectl + run: .github/scripts/install-blazectl.sh + + - name: Download Blaze Image + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4 + with: + name: blaze-image + path: /tmp + + - name: Load Blaze Image + run: docker load --input /tmp/blaze.tar + + - name: Run Blaze + run: docker run --name blaze -d -e JAVA_TOOL_OPTIONS=-Xmx2g -e DB_BLOCK_CACHE_SIZE=2048 -e ENABLE_OPERATION_PATIENT_PURGE=true -p 8080:8080 --read-only --tmpfs /tmp:exec -v blaze-data:/app/data blaze:latest + + - name: Wait for Blaze + run: .github/scripts/wait-for-url.sh http://localhost:8080/health + + - name: Docker Logs + run: docker logs blaze + + - name: Check Capability Statement + run: .github/scripts/check-capability-statement.sh + + - name: Check Referential Integrity Enforced + run: .github/scripts/check-referential-integrity-enforced.sh + + - name: Download Synthea Test Data + run: wget https://speicherwolke.uni-leipzig.de/index.php/s/kDsa2ifeMFdqK35/download/synthea-1000.tar + + - name: Create Synthea Test Data Dir + run: mkdir test-data-synthea-1000 + + - name: Unpack Synthea Test Data + run: tar -C test-data-synthea-1000 -xf synthea-1000.tar + + - name: Load Data + run: blazectl --no-progress --server http://localhost:8080/fhir upload test-data-synthea-1000 + + - name: Check Total-Number of Resources are 1099594 + run: .github/scripts/check-total-number-of-resources.sh 1099594 + + - name: Patient Purge all + run: .github/scripts/patient-purge-all.sh + + - name: Download All Resources + run: .github/scripts/download-all-resources.sh + + - name: Download Patient Resources + run: .github/scripts/download-resources.sh Patient + + - name: Check Resource Totals + run: .github/scripts/check-resource-totals-after-patient-purge.sh + + - name: Docker Stats + run: docker stats --no-stream + not-enforcing-referential-integrity-test: needs: build runs-on: ubuntu-24.04 diff --git a/docs/implementation/database.md b/docs/implementation/database.md index ba6f3dd8c..164d5c389 100644 --- a/docs/implementation/database.md +++ b/docs/implementation/database.md @@ -36,7 +36,7 @@ Another data model is the document model used in document stores like [MongoDB][ There are two different sets of indices, ones which depend on the database value point in time `t` and the ones which only depend on the content hash of resource versions. The former indices are used to build up the database values were the latter are used to enable queries based on search parameters. -### Indices depending on t +### Transaction Indices | Name | Key Parts | Value | |-------------------|-----------|-------------------------------------------| @@ -118,7 +118,7 @@ The `TypeStats` index keeps track of the total number of resources, and the numb The `SystemStats` index keeps track of the total number of resources, and the number of changes to all resources at a particular point in time `t`. This statistic is used to populate the total count property in system listings and system history listings in case there are no filters applied. -### Indices not depending on t +### Search Param Indices The indices not depending on `t` directly point to the resource versions by their content hash. @@ -328,6 +328,12 @@ The `delete-history` command is used to delete the history of a resource. | type | yes | string | resource type | | id | yes | string | resource id | +#### Execution + +* get all instance history entries +* add the `t` of the transaction as `purged-at?` to the value of each of the history entries not only in the ResourceAsOf index, but also in the TypeAsOf and SystemAsOf index +* remove the number of history entries purged from the number of changes of `type` and thw whole system + ### Patient Purge The `patient-purge` command is used to remove all current and historical versions for all resources in a patient compartment. diff --git a/modules/db/src/blaze/db/api.clj b/modules/db/src/blaze/db/api.clj index b1a56e162..b4440a57c 100644 --- a/modules/db/src/blaze/db/api.clj +++ b/modules/db/src/blaze/db/api.clj @@ -490,6 +490,9 @@ "Returns a CompletableFuture that will complete with the resource of `resource-handle` or an anomaly in case of errors. + Note: If an deleted resource is pulled, a stub with type, id and meta will be + returned. + Functions applied after the returned future are executed on the common ForkJoinPool." [node-or-db resource-handle] diff --git a/modules/db/src/blaze/db/node/tx_indexer/verify.clj b/modules/db/src/blaze/db/node/tx_indexer/verify.clj index 95604e005..8c83f2c37 100644 --- a/modules/db/src/blaze/db/node/tx_indexer/verify.clj +++ b/modules/db/src/blaze/db/node/tx_indexer/verify.clj @@ -83,8 +83,13 @@ refs))) (def ^:private inc-0 (fnil inc 0)) +(def ^:private dec-0 (fnil dec 0)) (def ^:private minus-0 (fnil - 0)) +(defn- inc-num-changes-and-total [stat] + (-> (update stat :num-changes inc-0) + (update :total inc-0))) + (defmethod verify "create" [db-before t res {:keys [type id hash refs]}] (log/trace (verify-tx-cmd-create-msg type id)) (with-open [_ (prom/timer tx-u/duration-seconds "verify-create")] @@ -92,8 +97,7 @@ (let [tid (codec/tid type)] (-> (update res :entries into (index-entries tid id t hash 1 :create refs)) (update :new-resources conj [type id]) - (update-in [:stats tid :num-changes] inc-0) - (update-in [:stats tid :total] inc-0))))) + (update-in [:stats tid] inc-num-changes-and-total))))) (defn- print-etags [ts] (str/join "," (map (partial format "W/\"%d\"") ts))) @@ -145,6 +149,7 @@ (and (some? old-t) (= if-none-match old-t)) (throw-anom (precondition-version-failed-anomaly type id if-none-match)) + ;; identical update, we will do nothing (= old-hash hash) res @@ -203,7 +208,7 @@ (update :del-resources conj [type id]) (update-in [:stats tid :num-changes] inc-0)) op - (update-in [:stats tid :total] (fnil dec 0))))))) + (update-in [:stats tid :total] dec-0)))))) (def ^:private ^:const ^long delete-history-max 100000) @@ -219,12 +224,9 @@ (defn- purge-entry [tid id t rh] (rts/index-entries tid id (rh/t rh) (rh/hash rh) (rh/num-changes rh) (rh/op rh) t)) -(defn- purge-entries [tid id t instance-history] - (into [] (mapcat (partial purge-entry tid id t)) instance-history)) - -(defn- add-delete-history-entries [entries tid id t instance-history] - (-> (update entries :entries into (purge-entries tid (codec/id-byte-string id) t instance-history)) - (update-in [:stats tid :num-changes] minus-0 (count instance-history)))) +(defn- add-purge-entries [res tid id t resource-handles] + (-> (update res :entries into (mapcat (partial purge-entry tid (codec/id-byte-string id) t)) resource-handles) + (update-in [:stats tid :num-changes] minus-0 (count resource-handles)))) (defmethod verify "delete-history" [db-before t res {:keys [type id]}] @@ -240,7 +242,7 @@ (throw-anom (too-many-history-entries-anom type id)) :else - (add-delete-history-entries res tid id t instance-history))))) + (add-purge-entries res tid id t instance-history))))) ;; like delete-history but also purges the current version (defmethod verify "purge" @@ -257,10 +259,10 @@ (throw-anom (too-many-history-entries-anom type id)) :else - (cond-> (add-delete-history-entries res tid id t instance-history) + (cond-> (add-purge-entries res tid id t instance-history) (not (identical? :delete (rh/op (first instance-history)))) (-> (update :del-resources conj [type id]) - (update-in [:stats tid :total] (fnil dec 0)))))))) + (update-in [:stats tid :total] dec-0))))))) (defmethod verify :default [_db-before _t res _tx-cmd] diff --git a/modules/db/test/blaze/db/api_test.clj b/modules/db/test/blaze/db/api_test.clj index 6ae87a069..e74a748f9 100644 --- a/modules/db/test/blaze/db/api_test.clj +++ b/modules/db/test/blaze/db/api_test.clj @@ -432,16 +432,29 @@ (testing "with identical content" (with-system-data [{:blaze.db/keys [node]} config] - [[[:put {:fhir/type :fhir/Patient :id "0" :gender #fhir/code"female"}]] - [[:put {:fhir/type :fhir/Patient :id "0" :gender #fhir/code"female"}]]] + [[[:create {:fhir/type :fhir/Patient :id "0" :gender #fhir/code"female"}]]] - (testing "versionId is still 1" - (given @(pull-resource (d/db node) "Patient" "0") - :fhir/type := :fhir/Patient - :id := "0" - [:meta :versionId] := #fhir/id"1" - :gender := #fhir/code"female" - [meta :blaze.db/op] := :put))))) + (let [db @(d/transact node [[:put {:fhir/type :fhir/Patient :id "0" :gender #fhir/code"female"}]])] + + (testing "versionId is still 1" + (given @(pull-resource db "Patient" "0") + :fhir/type := :fhir/Patient + :id := "0" + [:meta :versionId] := #fhir/id"1" + :gender := #fhir/code"female" + [meta :blaze.db/op] := :create)) + + (testing "instance history contains only one entry" + (is (= 1 (d/total-num-of-instance-changes db "Patient" "0"))) + (is (= 1 (count (d/instance-history db "Patient" "0"))))) + + (testing "type history contains only one entry" + (is (= 1 (d/total-num-of-type-changes db "Patient"))) + (is (= 1 (count (d/type-history db "Patient"))))) + + (testing "system history contains only one entry" + (is (= 1 (d/total-num-of-system-changes db))) + (is (= 1 (count (d/system-history db))))))))) (testing "Diamond Reference Dependencies" (with-system-data [{:blaze.db/keys [node]} config] @@ -885,6 +898,40 @@ [0 :active] := true [1 :active] := false))))) + (testing "two patients, one with one version and the other with two versions" + (with-system-data [{:blaze.db/keys [node]} config] + [[[:create {:fhir/type :fhir/Patient :id "0"}] + [:create {:fhir/type :fhir/Patient :id "1" :active false}]] + [[:put {:fhir/type :fhir/Patient :id "1" :active true}]]] + + (let [db-after @(d/transact node [[:delete-history "Patient" "0"] + [:delete-history "Patient" "1"]])] + + (testing "the patient 1" + (let [patient @(pull-resource db-after "Patient" "1")] + + (testing "is active" + (given patient + :active := true)) + + (testing "the instance history contains only that patient" + (is (= 1 (d/total-num-of-instance-changes db-after "Patient" "1"))) + (is (= [patient] @(pull-instance-history db-after "Patient" "1")))))) + + (testing "the type history contains two entries" + (is (= 2 (d/total-num-of-type-changes db-after "Patient"))) + (given @(d/pull-many node (d/type-history db-after "Patient")) + count := 2 + [0 :active] := true + [1 :id] := "0")) + + (testing "the system history contains two entries" + (is (= 2 (d/total-num-of-system-changes db-after))) + (given @(d/pull-many node (d/system-history db-after)) + count := 2 + [0 :active] := true + [1 :id] := "0"))))) + (testing "one deleted patient" (with-system-data [{:blaze.db/keys [node]} config] [[[:create {:fhir/type :fhir/Patient :id "0"}]] @@ -1038,7 +1085,34 @@ (is (coll/empty? (d/instance-history db-after "Patient" "0"))) (is (coll/empty? (d/type-history db-after "Patient"))) (is (coll/empty? (d/system-history db-after))) - (is (coll/empty? (d/changes db-after))))))) + (is (coll/empty? (d/changes db-after))))) + + (testing "purging it again" + (let [db-after @(d/transact node [[:patient-purge "0"]])] + + (testing "the patient is still gone" + (is (nil? (d/resource-handle db-after "Patient" "0"))) + (is (zero? (d/total-num-of-instance-changes db-after "Patient" "0"))) + (is (zero? (d/total-num-of-type-changes db-after "Patient"))) + (is (zero? (d/total-num-of-system-changes db-after))) + (is (coll/empty? (d/instance-history db-after "Patient" "0"))) + (is (coll/empty? (d/type-history db-after "Patient"))) + (is (coll/empty? (d/system-history db-after))) + (is (coll/empty? (d/changes db-after))))))) + + (with-system-data [{:blaze.db/keys [node]} config] + [[[:create {:fhir/type :fhir/Patient :id "0"}]]] + + (testing "purging a non-exiting patient" + (let [db-after @(d/transact node [[:patient-purge "1"]])] + + (testing "the existing patient is fine" + (given (d/resource-handle db-after "Patient" "0") + :id := "0" + :op := :create)) + + (testing "the purged patient doesn't exist" + (is (nil? (d/resource-handle db-after "Patient" "1")))))))) (testing "one deleted patient" (with-system-data [{:blaze.db/keys [node]} config] @@ -1099,6 +1173,29 @@ (is (= 1 (count (d/system-history db-after)))) (is (coll/empty? (d/changes db-after)))))))) + (testing "three patients created in different transactions" + (with-system-data [{:blaze.db/keys [node]} config] + [[[:create {:fhir/type :fhir/Patient :id "0"}]] + [[:create {:fhir/type :fhir/Patient :id "1"}]] + [[:create {:fhir/type :fhir/Patient :id "2"}]]] + + (testing "purging the middle one" + (let [db-after @(d/transact node [[:patient-purge "1"]])] + + (testing "the type history contains the two remaining patients" + (is (= 2 (d/total-num-of-type-changes db-after "Patient"))) + (given @(d/pull-many node (d/type-history db-after "Patient")) + count := 2 + [0 :id] := "2" + [1 :id] := "0")) + + (testing "the system history contains the two remaining patients" + (is (= 2 (d/total-num-of-system-changes db-after))) + (given @(d/pull-many node (d/system-history db-after)) + count := 2 + [0 :id] := "2" + [1 :id] := "0")))))) + (testing "one patient with one observation" (with-system-data [{:blaze.db/keys [node]} config] [[[:create {:fhir/type :fhir/Patient :id "0"}] @@ -1122,6 +1219,37 @@ (is (coll/empty? (d/system-history db-after))) (is (coll/empty? (d/changes db-after))))))) + (testing "one patient with one observation and one encounter" + (with-system-data [{:blaze.db/keys [node]} config] + [[[:create {:fhir/type :fhir/Patient :id "0"}] + [:create {:fhir/type :fhir/Encounter :id "0" + :subject #fhir/Reference{:reference "Patient/0"}}] + [:create {:fhir/type :fhir/Observation :id "0" + :subject #fhir/Reference{:reference "Patient/0"} + :encounter #fhir/Reference{:reference "Encounter/0"}}]]] + + (let [db-after @(d/transact node [[:patient-purge "0"]])] + + (testing "the patient, the observation and the encounter are gone" + (is (nil? (d/resource-handle db-after "Patient" "0"))) + (is (nil? (d/resource-handle db-after "Encounter" "0"))) + (is (nil? (d/resource-handle db-after "Observation" "0"))) + (is (zero? (d/total-num-of-instance-changes db-after "Patient" "0"))) + (is (zero? (d/total-num-of-instance-changes db-after "Encounter" "0"))) + (is (zero? (d/total-num-of-instance-changes db-after "Observation" "0"))) + (is (zero? (d/total-num-of-type-changes db-after "Patient"))) + (is (zero? (d/total-num-of-type-changes db-after "Encounter"))) + (is (zero? (d/total-num-of-type-changes db-after "Observation"))) + (is (zero? (d/total-num-of-system-changes db-after))) + (is (coll/empty? (d/instance-history db-after "Patient" "0"))) + (is (coll/empty? (d/instance-history db-after "Encounter" "0"))) + (is (coll/empty? (d/instance-history db-after "Observation" "0"))) + (is (coll/empty? (d/type-history db-after "Patient"))) + (is (coll/empty? (d/type-history db-after "Encounter"))) + (is (coll/empty? (d/type-history db-after "Observation"))) + (is (coll/empty? (d/system-history db-after))) + (is (coll/empty? (d/changes db-after))))))) + (testing "one patient with one MedicationAdministration and Medication" (with-system-data [{:blaze.db/keys [node]} config] [[[:put {:fhir/type :fhir/Patient :id "0"}] @@ -1132,7 +1260,7 @@ (let [db-after @(d/transact node [[:patient-purge "0"]])] - (testing "the Medication says" + (testing "the Medication stays" (is (nil? (d/resource-handle db-after "Patient" "0"))) (is (nil? (d/resource-handle db-after "MedicationAdministration" "0"))) (is (= "0" (:id (d/resource-handle db-after "Medication" "0")))) @@ -1152,6 +1280,48 @@ (is (= 1 (count (d/system-history db-after)))) (is (coll/empty? (d/changes db-after))))))) + (testing "purge one patient and delete another in the same transaction" + (with-system-data [{:blaze.db/keys [node]} config] + [[[:create {:fhir/type :fhir/Patient :id "0"}]] + [[:create {:fhir/type :fhir/Patient :id "1"}]] + [[:put {:fhir/type :fhir/Patient :id "0" :active true}]]] + + (let [db-after @(d/transact node [[:patient-purge "0"] + [:delete "Patient" "1"]])] + + (testing "both patients are gone" + (is (coll/empty? (d/type-list db-after "Patient")))) + + (testing "patient history contains two entries from patient 1" + (is (= 2 (d/total-num-of-type-changes db-after "Patient"))) + (is (= ["1" "1"] (mapv :id (d/type-history db-after "Patient"))))) + + (testing "system history contains two entries from patient 1" + (is (= 2 (d/total-num-of-system-changes db-after))) + (is (= ["1" "1"] (mapv :id (d/system-history db-after)))))))) + + (testing "purge, create and purge a patient" + (with-system-data [{:blaze.db/keys [node]} config] + [[[:create {:fhir/type :fhir/Patient :id "0"}]] + [[:patient-purge "0"]]] + + (let [db-after @(d/transact node [[:create {:fhir/type :fhir/Patient :id "0"}]])] + + (testing "the patient exists" + (is (= ["0"] (mapv :id (d/type-list db-after "Patient"))))) + + (testing "instance history contains one entry" + (is (= 1 (d/total-num-of-instance-changes db-after "Patient" "0"))) + (is (= ["0"] (mapv :id (d/instance-history db-after "Patient" "0"))))) + + (testing "type history contains one entry" + (is (= 1 (d/total-num-of-type-changes db-after "Patient"))) + (is (= ["0"] (mapv :id (d/type-history db-after "Patient"))))) + + (testing "system history contains one entry" + (is (= 1 (d/total-num-of-system-changes db-after))) + (is (= ["0"] (mapv :id (d/system-history db-after)))))))) + (testing "one patient linked by another patient" (with-system-data [{:blaze.db/keys [node]} config] [[[:put {:fhir/type :fhir/Patient :id "0"}] @@ -1162,7 +1332,30 @@ (given-failed-future (d/transact node [[:patient-purge "0"]]) ::anom/category := ::anom/conflict - ::anom/message := "Referential integrity violated. Resource `Patient/0` should be deleted but is referenced from `Patient/1`.")))) + ::anom/message := "Referential integrity violated. Resource `Patient/0` should be deleted but is referenced from `Patient/1`."))) + + (testing "one patient linked by another patient" + (with-system-data [{:blaze.db/keys [node]} config] + [[[:create {:fhir/type :fhir/Patient :id "0"}] + [:create {:fhir/type :fhir/Patient :id "1"}] + [:create {:fhir/type :fhir/Encounter :id "0" + :subject #fhir/Reference{:reference "Patient/0"}}] + [:create {:fhir/type :fhir/Observation :id "1" + :subject #fhir/Reference{:reference "Patient/1"} + :encounter #fhir/Reference{:reference "Encounter/0"}}]]] + + (given-failed-future (d/transact node [[:patient-purge "0"]]) + ::anom/category := ::anom/conflict + ::anom/message := "Referential integrity violated. Resource `Encounter/0` should be deleted but is referenced from `Observation/1`."))) + + (testing "purging an creating the same patient in one transaction fails" + (with-system-data [{:blaze.db/keys [node]} config] + [[[:create {:fhir/type :fhir/Patient :id "0"}]]] + + (given-failed-future (d/transact node [[:patient-purge "0"] + [:create {:fhir/type :fhir/Patient :id "0"}]]) + ::anom/category := ::anom/conflict + ::anom/message := "Duplicate transaction commands `create Patient/0` and `purge Patient/0`.")))) (deftest transact-patient-purge-too-many-test (log/set-min-level! :info)