diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 6575dde..47bbfe9 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,4 +1,41 @@ -### 0.0.0 (Released 2023-5-8) -* Additions: - * Initial set up for RELEASE_Notes.md - +### 0.0.1+6c5010d (Released 2023-12-11) +* Additions: + * [[#f034fe0](https://github.com/nfdi4plants/arc-validate/commit/f034fe0b6ff43dc02c050f5576283ee4ef8aa272)] Add demo notebook(s) (to sln file) + * [[#6c5010d](https://github.com/nfdi4plants/arc-validate/commit/6c5010dcabee7f22a8b4b3a57abc797831aa68af)] Comment out print statements + * [[#48fb9f9](https://github.com/nfdi4plants/arc-validate/commit/48fb9f9ee996d5724185298a597c023eb9b3ac26)] Add release notes to solution file + * [[#3de881c](https://github.com/nfdi4plants/arc-validate/commit/3de881cfb8d988f686eb26b861f740198053aacb)] Copy new metadata graph functionality from script to source file + * [[#61460ed](https://github.com/nfdi4plants/arc-validate/commit/61460ed2544e78e5bd3dac1055e4529259d3ff73)] Add internal FGraph augmentations + * [[#bf4d60f](https://github.com/nfdi4plants/arc-validate/commit/bf4d60ffedc5c0565e87f41d8ddc3076f4242b54)] Add another ontology graph function + * [[#e3b7dc5](https://github.com/nfdi4plants/arc-validate/commit/e3b7dc535e72d8b3fedaae204fa6f0918cf43fa3)] Update Graphoscope dependency + * [[#5aaa8f4](https://github.com/nfdi4plants/arc-validate/commit/5aaa8f4e6f995155abb38a85cdeff57bfe120fc8)] Update prototype: split graph functionality + * [[#64a42f7](https://github.com/nfdi4plants/arc-validate/commit/64a42f7dcc5ab11c7f2bedfe64b8d62e432ed274)] Update vis functions + * [[#78f5686](https://github.com/nfdi4plants/arc-validate/commit/78f56868354a12d80f21e662e1a67922d058e6a1)] (WIP) Update prototype: split function + * [[#ef20f76](https://github.com/nfdi4plants/arc-validate/commit/ef20f76c0a5fd568b39bd795cbc3076ca147467a)] Update working graph + * [[#3329967](https://github.com/nfdi4plants/arc-validate/commit/3329967802290004723cda5d3aed7a6b0a703149)] Update CyGraph viz function + * [[#56b0b06](https://github.com/nfdi4plants/arc-validate/commit/56b0b06c93bfbf1484455e4cdc36b0c593d70b00)] Raise F#Aux dependency + * [[#b8e5c15](https://github.com/nfdi4plants/arc-validate/commit/b8e5c1544c7e06903442d0eb8ef9483314e1104b)] Update working graph and change its file directory + * [[#9ee242d](https://github.com/nfdi4plants/arc-validate/commit/9ee242d1de406212f14a7676f16f489da351b884)] Update ARCGraph Viz functionality + * [[#dc5ada4](https://github.com/nfdi4plants/arc-validate/commit/dc5ada40994f0366112ee60e4815938f590f22b9)] Upate prototype: first finish graph constructor + * [[#2c3d809](https://github.com/nfdi4plants/arc-validate/commit/2c3d809df59bf907ee3c89b16acdf0466f838b23)] Add new cases to ARCRelation + * [[#3f6340d](https://github.com/nfdi4plants/arc-validate/commit/3f6340d55f7b5fbb6969b187e3c22493a68ede79)] Add script files to sln file + * [[#12d6529](https://github.com/nfdi4plants/arc-validate/commit/12d652906240171f2272cf376be7aa059c081822)] Update prototype: finalize missing group terms + * [[#bb81ada](https://github.com/nfdi4plants/arc-validate/commit/bb81adaaa485899b0bd6d0e2b3285002ec04d89d)] Update coloring in working graph + * [[#a58c944](https://github.com/nfdi4plants/arc-validate/commit/a58c94476098a77ee682e895abcda156fc548dc7)] Update prototype: addMissingTermsInGroup + * [[#f26a9b5](https://github.com/nfdi4plants/arc-validate/commit/f26a9b580d027c9ce37c2e1d0f54c386bbfe305b)] Update working graph's visuals + * [[#9604978](https://github.com/nfdi4plants/arc-validate/commit/96049785322a89a84655a7a71bc890dc9cea9ca3)] Prototype: work around graph error(s) + * [[#5618b40](https://github.com/nfdi4plants/arc-validate/commit/5618b403856b5a3e91820686ebdd74bf11e06129)] Prototype: Finalize flowchart graph diagram + * [[#f9a2837](https://github.com/nfdi4plants/arc-validate/commit/f9a283779ea0761d093a804c07b9b51167631144)] Update prototype: working graph + * [[#fa881fa](https://github.com/nfdi4plants/arc-validate/commit/fa881fa055b2a6bb8e9e944c88da38a73fe736f3)] Add flowchart for prototype workflow + * [[#4dbcf11](https://github.com/nfdi4plants/arc-validate/commit/4dbcf1191123940f1cd66cc90fc9a7f18bde12cc)] Update prototype: more branching + * [[#1581357](https://github.com/nfdi4plants/arc-validate/commit/1581357b3d76c72e7b07e7aef639451339738c94)] Restructure playgrounds & prototypes in sln + * [[#2577f60](https://github.com/nfdi4plants/arc-validate/commit/2577f60bbb659a4e5f3bf00712703940a616d8b7)] Update prototype: TF deconstruction, relations +* Bugfixes: + * [[#6cadf19](https://github.com/nfdi4plants/arc-validate/commit/6cadf19467518f3965237d48afa5c4ca16b732d3)] Fix build project in terms of release notes + * [[#30a9afd](https://github.com/nfdi4plants/arc-validate/commit/30a9afd0c7661d712c5ff1a84ba43e78e20ddd54)] Update prototype: Fix ontology graph creation bugs + * [[#816aa34](https://github.com/nfdi4plants/arc-validate/commit/816aa3407bc83086fa6887d030ea79211169c946)] (WIP) Update prototype: Fix ontology graph bug + * [[#fca6b03](https://github.com/nfdi4plants/arc-validate/commit/fca6b036b7c3c22a3b844a4ca5981ca5d7ad16cd)] (WIP) Work on bugs in prototype + +### 0.0.0 (Released 2023-5-8) +* Additions: + * Initial set up for RELEASE_Notes.md + diff --git a/arc-validate.sln b/arc-validate.sln index 507aaf0..68691b1 100644 --- a/arc-validate.sln +++ b/arc-validate.sln @@ -13,6 +13,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "sln", "sln", "{B3F07465-210 ErrorClassOntology.obo = ErrorClassOntology.obo global.json = global.json README.md = README.md + RELEASE_NOTES.md = RELEASE_NOTES.md TestOntology.obo = TestOntology.obo EndProjectSection EndProject @@ -46,15 +47,15 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "playgrounds", "playgrounds" ProjectSection(SolutionItems) = preProject playgrounds\arc-validation-packages.fsx = playgrounds\arc-validation-packages.fsx playgrounds\argu.fsx = playgrounds\argu.fsx - codeGeneratorScripting.fsx = codeGeneratorScripting.fsx - errorClassesStatic.fsx = errorClassesStatic.fsx - expectoPlayground.fsx = expectoPlayground.fsx - getAnnotationTableCvPs.fsx = getAnnotationTableCvPs.fsx + playgrounds\codeGeneratorScripting.fsx = playgrounds\codeGeneratorScripting.fsx + playgrounds\errorClassesStatic.fsx = playgrounds\errorClassesStatic.fsx + playgrounds\expectoPlayground.fsx = playgrounds\expectoPlayground.fsx + playgrounds\getAnnotationTableCvPs.fsx = playgrounds\getAnnotationTableCvPs.fsx playgrounds\github-api.fsx = playgrounds\github-api.fsx - graphModelIOTest.fsx = graphModelIOTest.fsx - graphoscopePlayground.fsx = graphoscopePlayground.fsx - playground.fsx = playground.fsx - prototype.fsx = prototype.fsx + playgrounds\graphModelIOTest.fsx = playgrounds\graphModelIOTest.fsx + playgrounds\graphoscopePlayground.fsx = playgrounds\graphoscopePlayground.fsx + playgrounds\playground.fsx = playgrounds\playground.fsx + playgrounds\workingGraph.md = playgrounds\workingGraph.md EndProjectSection EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "ARCValidationPackages", "src\ARCValidationPackages\ARCValidationPackages.fsproj", "{CF14C74E-20D2-4EC9-B11E-357BFD1244CB}" @@ -67,6 +68,19 @@ Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "ARCExpect", "src\ARCExpect\ EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "ARCExpect.Tests", "tests\ARCExpect.Tests\ARCExpect.Tests.fsproj", "{46EA4BDB-095A-4369-AD7E-07E702CD3E38}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "arcGraph_prototypes", "arcGraph_prototypes", "{02699157-EFC4-4E1A-94CC-B9825E2D1CB8}" + ProjectSection(SolutionItems) = preProject + playgrounds\arcGraph_playgrounds\prototype_v0.1.0.fsx = playgrounds\arcGraph_playgrounds\prototype_v0.1.0.fsx + playgrounds\arcGraph_playgrounds\prototype_v0.1.1.fsx = playgrounds\arcGraph_playgrounds\prototype_v0.1.1.fsx + playgrounds\arcGraph_playgrounds\prototype_v0.2.0.fsx = playgrounds\arcGraph_playgrounds\prototype_v0.2.0.fsx + playgrounds\arcGraph_playgrounds\prototype_v0.2.1.fsx = playgrounds\arcGraph_playgrounds\prototype_v0.2.1.fsx + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "demo_notebooks", "demo_notebooks", "{A83F65C9-925E-437C-A457-EF8B9C6B154D}" + ProjectSection(SolutionItems) = preProject + playgrounds\demo_notebooks\demo_ARCGraph.ipynb = playgrounds\demo_notebooks\demo_ARCGraph.ipynb + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -114,6 +128,8 @@ Global {0DBD3705-F884-4D89-BBEC-B5BDC41905FD} = {5C204780-A94B-46A2-B768-B12C7D81DDFC} {33E89EBC-A0FB-405D-9ED8-756B0CADBB28} = {6275F297-567B-421B-B055-4F88B2785765} {46EA4BDB-095A-4369-AD7E-07E702CD3E38} = {5C204780-A94B-46A2-B768-B12C7D81DDFC} + {02699157-EFC4-4E1A-94CC-B9825E2D1CB8} = {7994D526-5B21-4EBD-9CCB-EFA04EE303CF} + {A83F65C9-925E-437C-A457-EF8B9C6B154D} = {B3F07465-2108-4C43-843A-9F503044FDB7} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {FF9C2722-38D9-48C4-8095-4726C56E92DE} diff --git a/build/Build.fs b/build/Build.fs index b0150e6..72a2ba3 100644 --- a/build/Build.fs +++ b/build/Build.fs @@ -41,6 +41,8 @@ let _preReleaseNoDocs = "PreReleaseNoDocs" [setPrereleaseTag; clean; build; runTests; packPrerelease; createPrereleaseTag; publishNugetPrerelease] +let _releaseNotes = ReleaseNotesTasks.updateReleaseNotes + [] let main args = runOrDefault build args diff --git a/ErrorClassOntology.obo b/playgrounds/ErrorClassOntology.obo similarity index 100% rename from ErrorClassOntology.obo rename to playgrounds/ErrorClassOntology.obo diff --git a/TestOntology.obo b/playgrounds/TestOntology.obo similarity index 100% rename from TestOntology.obo rename to playgrounds/TestOntology.obo diff --git a/prototype.fsx b/playgrounds/arcGraph_playgrounds/prototype_v0.1.0.fsx similarity index 75% rename from prototype.fsx rename to playgrounds/arcGraph_playgrounds/prototype_v0.1.0.fsx index da46611..6ac03ed 100644 --- a/prototype.fsx +++ b/playgrounds/arcGraph_playgrounds/prototype_v0.1.0.fsx @@ -1,10 +1,10 @@ -#I "src/ArcValidation/bin/Debug/netstandard2.0" -#I "src/ArcValidation/bin/Release/netstandard2.0" -#r "ARCValidation.dll" -//#I "../ARCTokenization/src/ARCTokenization/bin/Debug/netstandard2.0" -//#I "../ARCTokenization/src/ARCTokenization/bin/Release/netstandard2.0" -//#r "ARCTokenization.dll" -//#r "ControlledVocabulary.dll" +#I "../ARCTokenization/src/ARCTokenization/bin/Debug/netstandard2.0" +#I "../ARCTokenization/src/ARCTokenization/bin/Release/netstandard2.0" +#r "ARCTokenization.dll" +#r "ControlledVocabulary.dll" +#I "src/ARCExpect/bin/Debug/netstandard2.0" +#I "src/ARCExpect/bin/Release/netstandard2.0" +#r "ARCExpect.dll" //#r "nuget: ARCTokenization" #r "nuget: Expecto" @@ -25,88 +25,15 @@ open Graphoscope open FsOboParser open Cytoscape.NET -open ArcValidation -open ArcValidation.OboGraph -open ArcValidation.ArcGraph -open ArcValidation.ArcGraph.Visualization +open ARCExpect +open ARCExpect.OboGraph +open ARCExpect.ARCGraph +open ARCExpect.ARCGraph.Visualization open System.Collections.Generic open System.Text.RegularExpressions -//// from internal module copypasted - -//open Impl - -//let performTest test = -// let w = System.Diagnostics.Stopwatch() -// w.Start() -// evalTests Tests.defaultConfig test -// |> Async.RunSynchronously -// |> fun r -> -// w.Stop() -// { -// results = r -// duration = w.Elapsed -// maxMemory = 0L -// memoryLimit = 0L -// timedOut = [] -// } - - - -let paramse = ARCTokenization.Investigation.parseMetadataSheetFromFile @"C:\Repos\git.nfdi4plants.org\ArcPrototype\isa.investigation.xlsx" - -//paramse |> List.map (fun p -> p.ToString() |> String.contains "CvParam") |> List.reduce (&&) -paramse |> List.iter (fun p -> printfn "%A" <| p.GetType().ToString()) -paramse |> List.iter (fun p -> printfn "%A" <| (p.Value |> ParamValue.getValueAsString)) -paramse |> List.iter (fun p -> printfn "%A" <| p.Name) - -//let cvparamse = paramse |> List.map (CvParam.tryCvParam >> Option.get) -//let cvparamse = -// paramse -// |> List.map ( -// fun p -> -// match CvParam.tryCvParam p with -// | Some cvp -> cvp -// | None -> CvParam(p.ID, p.Name, p.RefUri, p.Value, p :?> CvAttributeCollection) -// ) -let cvparamse = paramse |> List.map (Param.tryCvParam >> Option.get) - -//let fromCvParamList cvpList = -// cvpList -// |> List.mapi ( -// fun i cvp -> -// (i,CvBase.getCvName cvp), cvp -// ) -// |> FGraph.createFromNodes - -//let invesContentGraph = fromCvParamList cvparamse - -let obo = ARCTokenization.Terms.InvestigationMetadata.ontology - -//let tans = cvparamse |> List.map CvParam.getCvAccession - -//let assTerms = tans |> List.choose (fun tan -> obo.Terms |> List.tryFind (fun term -> term.Id = tan)) -//assTerms |> List.fold (fun acc y -> acc && Option.isSome y) true - -//let assTermsRelships = assTerms |> List.collect (fun x -> OboOntology.getRelatedTerms x obo) - -//toRelation "part_of" + toRelation "has_a" + toRelation "follows" -//toRelation "part_of" ||| toRelation "has_a" ||| toRelation "follows" - -//let assTermsRels = assTermsRelships |> List.map (fun (o1,rs,o2) -> o1, toRelation rs, o2) - -//invesContentGraph.Keys |> Seq.head -//invesContentGraph.Values |> Seq.head - -let ontoGraph = ontologyToFGraph obo - -//ontoGraph |> printGraph (fun x -> x.Name) - -//ontoGraphToFullCyGraph ontoGraph |> CyGraph.show - - // OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO // Helper functions for ISA graph construction // OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO @@ -149,7 +76,7 @@ let equalsFollows onto cvp1 cvp2 = //OboTerm.toCvTerm (ontoGraph.Values |> Seq.head |> fun (_,t,_) -> t) -//cvparamse.[1].Attributes |> Dictionary.item "Row" +//cvparamse.[1].Attributes |> Dictionary.item "Row" //cvparamse[7]["Column"] //(createEmptyFollowsCvParam ontoGraph cvparamse[8])["Column"] @@ -899,184 +826,3 @@ module Expect = -// !!!!!!!!!!SEHR GUT!!!!!!!!!!!! -module ArcExpect = -// alternativ: Expect.ARC.isNonEmpty usw. - - // davon werden wir vllt. 10 Stück oder so brauchen - let hasMetadataSectionKey (arcValidateContext : ARCValidateContext) testName key = - match Dictionary.tryFind key arcValidateContext.Tokens with - | Some value -> - try - getMetadataSectionKey value |> ignore - ARCValidateContext.addTestCondition testName true arcValidateContext - with - | _ -> - ARCValidateContext.addTestCondition testName false arcValidateContext - failtestf "%s" (createErrorStack arcValidateContext.Filepath) - | None -> - ARCValidateContext.addTestCondition testName false arcValidateContext - failtestf "%s" (createErrorStack arcValidateContext.Filepath) - - /// - let hasValues (arcValidateContext : ARCValidateContext) testName key = - match Dictionary.tryFind key arcValidateContext.Tokens with - | Some value -> - let mdsk = getMetadataSectionKey value - let row = (mdsk :?> CvParam).GetAttribute(Address.row) |> Param.getValueAsInt - let col = ((mdsk :?> CvParam).GetAttribute(Address.column) |> Param.getValueAsInt) + 1 - let sheet = (mdsk :?> CvParam).GetAttribute(Address.worksheet) |> Param.getValueAsString - //let message = Message.Create(invPath, XLSXFileKind, row, col, sheet) - value // hier muss das filtern noch raus, das soll bereits vorher passieren - |> List.filter (fun ip -> Param.getValueAsString ip <> (Terms.StructuralTerms.metadataSectionKey |> CvTerm.getName)) - |> fun res -> - match res with - | [] -> - ARCValidateContext.addTestCondition testName false arcValidateContext - failtestf "%s" (createErrorStackWithCell invPath sheet row col) - | _ -> ARCValidateContext.addTestCondition testName true arcValidateContext - | None -> - ARCValidateContext.addTestCondition testName false arcValidateContext - failtestf "%s" (createErrorStack arcValidateContext.Filepath) - - let hasAllMetadataSectionKeys (arcValidateContext : ARCValidateContext) testName keyList = - keyList - |> List.iter (hasMetadataSectionKey arcValidateContext testName) - - - -let tl = - testSequenced ( - testList "Critical" [ - let myArcContext = ARCValidateContext.create invDict (Dictionary()) invPath - let areAllMetadataSectionKeysPresentTest = - ARCTest.Create( - error = Error.MissingEntity.MissingMetadataKey.name, - position = InvestigationMetadata.name, - arcValidateContext = myArcContext, - test = ArcExpect.hasMetadataSectionKey - ) - let hasMetadataSectionKeyTest = - ARCTest.Create( - error = Error.MissingEntity.MissingMetadataKey.name, - position = InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name, - arcValidateContext = myArcContext, - test = ArcExpect.hasMetadataSectionKey - ) - let hasValuesTest = - ARCTest.CreateDependent( - error = Error.MissingEntity.MissingValue.name, - position = InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name, - arcValidateContext = myArcContext, - dependsOnTest = hasMetadataSectionKeyTest.Name, - test = ArcExpect.hasValues - ) - areAllMetadataSectionKeysPresentTest.Test - hasMetadataSectionKeyTest.Test - hasValuesTest.Test - ] - ) - -tl |> performTest - - //testCase $"{Error.MissingEntity.MissingValue.name} test: {InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name}" <| fun () -> - // ArcExpect.isNotEmpty invDict "Investigation Person First Name" - - - - - - - - - - - - - - //let exists - -Error.MissingEntity.MissingValue.name - -testList "Critical" [ - //testCase - testCaseArc - Error.MissingEntity.MissingValue.name - InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name - - ArcExpect.isNotEmpty - //ArcExpect.isNotEmpty invDict "Investigation Person First Name" -] -|> performTest -// !!!!!!!!!!!!!!!!!!!!!!!!! - -ArcExpect.isNonEmpty invDict "Investigation Person First Name" - - -// ValidationResult nicht notwendig, stattdessen alles in Expecto-Funktion evaluieren -// MetadataSection aus dem Dictionary raus -// Addresse bekommen in eine schmale Funktion / zu einer Funktion machen -// createErrorStack-Funktion soll CvParam, Pfad und ErrorOntologyTermName als Parameter bekommen und daraus Fehlermeldung zurückgeben -// "wrong format" fehlt noch in der ErrorOntology -// Expectos Expect.blabla Funktionen alle für uns so schreiben, dass es von der Message her passt - -let hasPersonFirstNames = - if Dictionary.containsKey "Investigation Person First Name" invDict then - invDict["Investigation Person First Name"] - |> fun ipl -> - let values = - ipl - |> List.filter (fun ip -> Param.getValueAsString ip <> (Terms.StructuralTerms.metadataSectionKey |> CvTerm.getName)) - let check = List.isEmpty values |> not - if check then - Success - else - let mdsk = getMetadataSectionKey ipl - let row = (mdsk :?> CvParam).GetAttribute(Address.row) |> Param.getValueAsInt - let col = ((mdsk :?> CvParam).GetAttribute(Address.column) |> Param.getValueAsInt) + 1 - let sheet = (mdsk :?> CvParam).GetAttribute(Address.worksheet) |> Param.getValueAsString - let message = Message.Create(invPath, XLSXFileKind, row, col, sheet) - Error message - else Error (Message.Create(invPath, XLSXFileKind, 0, 0, "")) - |> fun res -> - testCase InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name (fun _ -> - res - |> throwError ( - fun m -> - createErrorStackXlsxFile - m - InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name - Error.MissingEntity.MissingValue.name - ) - ) - - - -let case = - testCase InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name (fun _ -> - hasPersonFirstNames - |> throwError ( - fun m -> - createErrorStackXlsxFile - m - InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name - Error.MissingEntity.MissingValue.name - ) - ) - - - - -case |> performTest - -//let ups = inv |> List.choose UserParam.tryUserParam -//let cvpsEmptyVals = inv |> List.choose CvParam.tryCvParam |> List.filter (Param.getValueAsString >> (=) "") - -//let inv = ARCTokenization.Investigation.parseMetadataRowsFromFile @"C:/Repos/gitlab.nfdi4plants.org/ArcPrototype/isa.investigation.xlsx" - -//inv[20] - -//Param.getValueAsTerm (CvParam("1", "2", "3", ParamValue.Value "")) -//Param.getValueAsString (CvParam("1", "2", "3", ParamValue.Value "")) -//Param.getValueAsString (CvParam("1", "2", "3", ParamValue.CvValue ("1", "Pimmel", "3"))) -//Param.getValueAsTerm (CvParam("1", "2", "3", ParamValue.CvValue ("1", "Pimmel", "3"))) \ No newline at end of file diff --git a/playgrounds/arcGraph_playgrounds/prototype_v0.1.1.fsx b/playgrounds/arcGraph_playgrounds/prototype_v0.1.1.fsx new file mode 100644 index 0000000..ec59053 --- /dev/null +++ b/playgrounds/arcGraph_playgrounds/prototype_v0.1.1.fsx @@ -0,0 +1,217 @@ +#I "../ARCTokenization/src/ARCTokenization/bin/Debug/netstandard2.0" +#I "../ARCTokenization/src/ARCTokenization/bin/Release/netstandard2.0" +#r "ARCTokenization.dll" +#r "ControlledVocabulary.dll" +#I "src/ARCExpect/bin/Debug/netstandard2.0" +#I "src/ARCExpect/bin/Release/netstandard2.0" +#r "ARCExpect.dll" + +//#r "nuget: ARCTokenization" +#r "nuget: Expecto" +#r "nuget: FSharpAux, 1.1.0" +#r "nuget: Graphoscope" +#r "nuget: Cytoscape.NET" +#r "nuget: FsOboParser, 0.3.0" +#r "nuget: FsSpreadsheet.ExcelIO, 4.1.0" + + +open Expecto +open ControlledVocabulary +open ARCTokenization +open FSharpAux +//open ArcValidation.OntologyHelperFunctions +//open ArcValidation.ErrorMessage +open Graphoscope +open FsOboParser +open Cytoscape.NET + +open ARCExpect +open ARCExpect.OboGraph +open ARCExpect.ARCGraph +open ARCExpect.ARCGraph.Visualization + +open System.Collections.Generic +open System.Text.RegularExpressions + + +// !!!!!!!!!!SEHR GUT!!!!!!!!!!!! +module ArcExpect = +// alternativ: Expect.ARC.isNonEmpty usw. + + // davon werden wir vllt. 10 Stück oder so brauchen + let hasMetadataSectionKey (arcValidateContext : ARCValidateContext) testName key = + match Dictionary.tryFind key arcValidateContext.Tokens with + | Some value -> + try + getMetadataSectionKey value |> ignore + ARCValidateContext.addTestCondition testName true arcValidateContext + with + | _ -> + ARCValidateContext.addTestCondition testName false arcValidateContext + failtestf "%s" (createErrorStack arcValidateContext.Filepath) + | None -> + ARCValidateContext.addTestCondition testName false arcValidateContext + failtestf "%s" (createErrorStack arcValidateContext.Filepath) + + /// + let hasValues (arcValidateContext : ARCValidateContext) testName key = + match Dictionary.tryFind key arcValidateContext.Tokens with + | Some value -> + let mdsk = getMetadataSectionKey value + let row = (mdsk :?> CvParam).GetAttribute(Address.row) |> Param.getValueAsInt + let col = ((mdsk :?> CvParam).GetAttribute(Address.column) |> Param.getValueAsInt) + 1 + let sheet = (mdsk :?> CvParam).GetAttribute(Address.worksheet) |> Param.getValueAsString + //let message = Message.Create(invPath, XLSXFileKind, row, col, sheet) + value // hier muss das filtern noch raus, das soll bereits vorher passieren + |> List.filter (fun ip -> Param.getValueAsString ip <> (Terms.StructuralTerms.metadataSectionKey |> CvTerm.getName)) + |> fun res -> + match res with + | [] -> + ARCValidateContext.addTestCondition testName false arcValidateContext + failtestf "%s" (createErrorStackWithCell invPath sheet row col) + | _ -> ARCValidateContext.addTestCondition testName true arcValidateContext + | None -> + ARCValidateContext.addTestCondition testName false arcValidateContext + failtestf "%s" (createErrorStack arcValidateContext.Filepath) + + let hasAllMetadataSectionKeys (arcValidateContext : ARCValidateContext) testName keyList = + keyList + |> List.iter (hasMetadataSectionKey arcValidateContext testName) + + + +let tl = + testSequenced ( + testList "Critical" [ + let myArcContext = ARCValidateContext.create invDict (Dictionary()) invPath + let areAllMetadataSectionKeysPresentTest = + ARCTest.Create( + error = Error.MissingEntity.MissingMetadataKey.name, + position = InvestigationMetadata.name, + arcValidateContext = myArcContext, + test = ArcExpect.hasMetadataSectionKey + ) + let hasMetadataSectionKeyTest = + ARCTest.Create( + error = Error.MissingEntity.MissingMetadataKey.name, + position = InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name, + arcValidateContext = myArcContext, + test = ArcExpect.hasMetadataSectionKey + ) + let hasValuesTest = + ARCTest.CreateDependent( + error = Error.MissingEntity.MissingValue.name, + position = InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name, + arcValidateContext = myArcContext, + dependsOnTest = hasMetadataSectionKeyTest.Name, + test = ArcExpect.hasValues + ) + areAllMetadataSectionKeysPresentTest.Test + hasMetadataSectionKeyTest.Test + hasValuesTest.Test + ] + ) + +tl |> performTest + + //testCase $"{Error.MissingEntity.MissingValue.name} test: {InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name}" <| fun () -> + // ArcExpect.isNotEmpty invDict "Investigation Person First Name" + + + + + + + + + + + + + + //let exists + +Error.MissingEntity.MissingValue.name + +testList "Critical" [ + //testCase + testCaseArc + Error.MissingEntity.MissingValue.name + InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name + + ArcExpect.isNotEmpty + //ArcExpect.isNotEmpty invDict "Investigation Person First Name" +] +|> performTest +// !!!!!!!!!!!!!!!!!!!!!!!!! + +ArcExpect.isNonEmpty invDict "Investigation Person First Name" + + +// ValidationResult nicht notwendig, stattdessen alles in Expecto-Funktion evaluieren +// MetadataSection aus dem Dictionary raus +// Addresse bekommen in eine schmale Funktion / zu einer Funktion machen +// createErrorStack-Funktion soll CvParam, Pfad und ErrorOntologyTermName als Parameter bekommen und daraus Fehlermeldung zurückgeben +// "wrong format" fehlt noch in der ErrorOntology +// Expectos Expect.blabla Funktionen alle für uns so schreiben, dass es von der Message her passt + +let hasPersonFirstNames = + if Dictionary.containsKey "Investigation Person First Name" invDict then + invDict["Investigation Person First Name"] + |> fun ipl -> + let values = + ipl + |> List.filter (fun ip -> Param.getValueAsString ip <> (Terms.StructuralTerms.metadataSectionKey |> CvTerm.getName)) + let check = List.isEmpty values |> not + if check then + Success + else + let mdsk = getMetadataSectionKey ipl + let row = (mdsk :?> CvParam).GetAttribute(Address.row) |> Param.getValueAsInt + let col = ((mdsk :?> CvParam).GetAttribute(Address.column) |> Param.getValueAsInt) + 1 + let sheet = (mdsk :?> CvParam).GetAttribute(Address.worksheet) |> Param.getValueAsString + let message = Message.Create(invPath, XLSXFileKind, row, col, sheet) + Error message + else Error (Message.Create(invPath, XLSXFileKind, 0, 0, "")) + |> fun res -> + testCase InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name (fun _ -> + res + |> throwError ( + fun m -> + createErrorStackXlsxFile + m + InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name + Error.MissingEntity.MissingValue.name + ) + ) + + + +let case = + testCase InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name (fun _ -> + hasPersonFirstNames + |> throwError ( + fun m -> + createErrorStackXlsxFile + m + InvestigationMetadata.InvestigationContacts.InvestigationPersonFirstName.name + Error.MissingEntity.MissingValue.name + ) + ) + + + + +case |> performTest + +//let ups = inv |> List.choose UserParam.tryUserParam +//let cvpsEmptyVals = inv |> List.choose CvParam.tryCvParam |> List.filter (Param.getValueAsString >> (=) "") + +//let inv = ARCTokenization.Investigation.parseMetadataRowsFromFile @"C:/Repos/gitlab.nfdi4plants.org/ArcPrototype/isa.investigation.xlsx" + +//inv[20] + +//Param.getValueAsTerm (CvParam("1", "2", "3", ParamValue.Value "")) +//Param.getValueAsString (CvParam("1", "2", "3", ParamValue.Value "")) +//Param.getValueAsString (CvParam("1", "2", "3", ParamValue.CvValue ("1", "lemmip", "3"))) +//Param.getValueAsTerm (CvParam("1", "2", "3", ParamValue.CvValue ("1", "lemmip", "3"))) \ No newline at end of file diff --git a/playgrounds/arcGraph_playgrounds/prototype_v0.2.0.fsx b/playgrounds/arcGraph_playgrounds/prototype_v0.2.0.fsx new file mode 100644 index 0000000..aee2a1a --- /dev/null +++ b/playgrounds/arcGraph_playgrounds/prototype_v0.2.0.fsx @@ -0,0 +1,379 @@ +#I "../ARCTokenization/src/ARCTokenization/bin/Debug/netstandard2.0" +#I "../ARCTokenization/src/ARCTokenization/bin/Release/netstandard2.0" +#r "ARCTokenization.dll" +#r "ControlledVocabulary.dll" +#I "src/ARCExpect/bin/Debug/netstandard2.0" +#I "src/ARCExpect/bin/Release/netstandard2.0" +#r "ARCExpect.dll" + +//#r "nuget: ARCTokenization" +#r "nuget: Expecto" +#r "nuget: FSharpAux, 1.1.0" +#r "nuget: Graphoscope" +#r "nuget: Cytoscape.NET" +#r "nuget: FsOboParser, 0.3.0" +#r "nuget: FsSpreadsheet.ExcelIO, 4.1.0" + + +open Expecto +open ControlledVocabulary +open ARCTokenization +open FSharpAux +//open ArcValidation.OntologyHelperFunctions +//open ArcValidation.ErrorMessage +open Graphoscope +open FsOboParser +open Cytoscape.NET + +open ARCExpect +open ARCExpect.OboGraph +open ARCExpect.ARCGraph +open ARCExpect.ARCGraph.Visualization + +open System.Collections.Generic +open System.Text.RegularExpressions + + +//// from internal module copypasted + +//open Impl + +//let performTest test = +// let w = System.Diagnostics.Stopwatch() +// w.Start() +// evalTests Tests.defaultConfig test +// |> Async.RunSynchronously +// |> fun r -> +// w.Stop() +// { +// results = r +// duration = w.Elapsed +// maxMemory = 0L +// memoryLimit = 0L +// timedOut = [] +// } + +let paramse = ARCTokenization.Investigation.parseMetadataSheetFromFile @"C:\Repos\git.nfdi4plants.org\ArcPrototype\isa.investigation.xlsx" + +//paramse |> List.map (fun p -> p.ToString() |> String.contains "CvParam") |> List.reduce (&&) +//paramse |> List.iter (fun p -> printfn "%A" <| p.GetType().ToString()) +//paramse |> List.iter (fun p -> printfn "%A" <| (p.Value |> ParamValue.getValueAsString)) +//paramse |> List.iter (fun p -> printfn "%A" <| p.Name) + +//let cvparamse = paramse |> List.map (CvParam.tryCvParam >> Option.get) +//let cvparamse = +// paramse +// |> List.map ( +// fun p -> +// match CvParam.tryCvParam p with +// | Some cvp -> cvp +// | None -> CvParam(p.ID, p.Name, p.RefUri, p.Value, p :?> CvAttributeCollection) +// ) + +let cvparamse = paramse |> List.map (Param.tryCvParam >> Option.get) + +//let fromCvParamList cvpList = +// cvpList +// |> List.mapi ( +// fun i cvp -> +// (i,CvBase.getCvName cvp), cvp +// ) +// |> FGraph.createFromNodes + +//let invesContentGraph = fromCvParamList cvparamse + +let onto = ARCTokenization.Terms.InvestigationMetadata.ontology + +//let tans = cvparamse |> List.map CvParam.getCvAccession + +//let assTerms = tans |> List.choose (fun tan -> obo.Terms |> List.tryFind (fun term -> term.Id = tan)) +//assTerms |> List.fold (fun acc y -> acc && Option.isSome y) true + +//let assTermsRelships = assTerms |> List.collect (fun x -> OboOntology.getRelatedTerms x obo) + +//toRelation "part_of" + toRelation "has_a" + toRelation "follows" +//toRelation "part_of" ||| toRelation "has_a" ||| toRelation "follows" + +//let assTermsRels = assTermsRelships |> List.map (fun (o1,rs,o2) -> o1, toRelation rs, o2) + +//invesContentGraph.Keys |> Seq.head +//invesContentGraph.Values |> Seq.head + +//let ontoGraph = ontologyToFGraph onto + +//ontoGraph |> printGraph (fun x -> x.Name) + +//ontoGraphToFullCyGraph ontoGraph |> CyGraph.show + +let ontologyToFGraphByName (onto : OboOntology) = + OboOntology.getRelations onto + |> List.fold ( + fun acc tr -> + match tr with + | Empty st -> FGraph.addNode st.Name st acc + | TargetMissing (rel,st) -> FGraph.addNode st.Name st acc + | Target (rel,st,tt) -> FGraph.addElement st.Name st tt.Name tt (ARCRelation.toARCRelation rel) acc + ) FGraph.empty + +let ontoGraph = ontologyToFGraphByName onto + + +// NEW METADATA GRAPH CREATION FUNCTION(S) + +// input: OboGraph, CvParam list + + +/// Returns the respective Term Source Ref of a given ID (= Term Accession Number). +let getRef id = + String.takeWhile ((<>) ':') id + + +/// Returns all IParams whose terms are not present in the given ontology but occur in the given CvParam list. +let getUnknownTerms (onto : OboOntology) (ips : IParam seq) = + ips + |> Seq.filter ( + fun ip -> + onto.Terms + |> Seq.exists (fun o -> OboTerm.toCvTerm o = Param.getTerm ip) + |> not + ) + +/// Returns all IParams whose terms have the `is_obsolete` tag in the given ontology. +let getObsoleteTerms (onto : OboOntology) (ips : IParam seq) = + ips + |> Seq.filter ( + fun ip -> + onto.Terms + |> Seq.exists (fun o -> o.IsObsolete && OboTerm.toCvTerm o = Param.getTerm ip) + ) + +let obsos = getObsoleteTerms onto paramse + +/// Returns all terms that are present in the given ontology but don't occur in the given CvParam list as CvParams. +let getMissingTerms (onto : OboOntology) (ips : IParam seq) = + onto.Terms + |> Seq.choose ( + fun o -> + if o.IsObsolete then None + else + let cvtObo = OboTerm.toCvTerm o + if not (ips |> Seq.exists (fun e -> Param.getTerm e = cvtObo)) then + Some (CvParam(cvtObo, Value "") :> IParam) + else None + ) + + +/// Representation of the familiarity of a CvParam's CvTerm. If the CvTerm is known in, e.g., an ontology, use KnownTerm, else use UnknownTerm. ObsoleteTerm is for deprecated terms (i.e., OboTerm with `is_obsolete` = `true`). +type TermFamiliarity = + | KnownTerm of IParam + | UnknownTerm of IParam + | ObsoleteTerm of IParam + | MisplacedTerm of IParam + + +/// Takes an OboOntology and a list of IParams and returns the list with all IParams marked as known in the given ontology, unknown, or obsolete. +let markTerms onto ips = + let unknownTerms = getUnknownTerms onto ips + let obsoleteTerms = getObsoleteTerms onto ips + ips + |> Seq.map ( + fun ip -> + match Seq.contains ip unknownTerms, Seq.contains ip obsoleteTerms with + | true, _ -> UnknownTerm ip + | _, true -> ObsoleteTerm ip + | _ -> KnownTerm ip + ) + +/// Takes an OboOntology and a list of CvParams and returns the list with all OboTerms that are missing in the list appended as empty-value CvParams. +let addMissingTerms onto ips = + let missingTerms = getMissingTerms onto ips + Seq.append ips missingTerms + +/// Groups the given IParams by their name and groups them together. +let groupTerms (ips : IParam seq) = + ips |> Seq.groupBy (fun ip -> ip.Name) // if erroring: change to `.Accession` + + +let ipsAdded = addMissingTerms onto paramse +//ipsAdded |> Seq.iter (fun c -> c.Name |> printfn "%s") +//onto.Terms |> List.filter (fun o -> o.Synonyms.Length > 0) + +//let ipsAggregated = aggregateTerms ipsAdded +//ipsAggregated |> Seq.iter (printfn "%A") + +//let cvpsMarked = ipsAggregated |> Seq.map (fun (n,cs) -> n, markTerms onto cs) +//cvpsMarked |> Seq.iter (fun c -> match c with | KnownTerm x | ObsoleteTerm x -> () | UnknownTerm x -> printfn "%A" x) + +type FGraph with + + /// Returns the nodes of a given FGraph. + static member getNodes (graph : FGraph<'Nk,'Nd,'Ed>) = + graph + |> Seq.map ( + fun kvp -> + let nodeKey = kvp.Key + let p,nd,s = kvp.Value + nodeKey, nd + ) + + +/// Returns the key of the node in a structured ontology-FGraph that has no other nodes pointing to. +let getTopNodeKey (ontoGraph : FGraph) = + ontoGraph.Keys + |> Seq.find (fun k -> FContext.successors ontoGraph[k] |> Seq.length = 0) + +//ontoGraph[getTopNodeKey ontoGraph] |> fun (p,nd,s) -> nd + +///// Creates an intermediate graph with CvParam seq as nodedata. +//let createIntermediateGraph (ontoGraph : FGraph) cvps = +// let topNodeKey = getTopNodeKey ontoGraph +// let rec loop inputList currentKey priorTerm outputGraph = +// let _,oboTerm,_ = ontoGraph[currentKey] +// let cvtObo = OboTerm.toCvTerm oboTerm + +/// Checks if a given IParam is a header term in a given OboOntology. +let isHeader (ontoGraph : FGraph) ip = + ontoGraph.Keys + |> Seq.choose ( + fun k -> + let hasPartOfs = + FContext.predecessors ontoGraph[k] + |> Seq.filter (fun (nk,ed) -> ed = ARCRelation.PartOf) + |> Seq.length > 0 + if hasPartOfs then + Some (ontoGraph[k] |> fun (p,nd,s) -> nd) + else None + ) + |> Seq.exists (fun term -> OboTerm.toCvTerm term = Param.getTerm ip) + +//isHeader ontoGraph cvparamse[2] +//isHeader ontoGraph cvparamse[5] + + +/// Checks if a given IParam has a part_of relation to a given header term using an ontology-based FGraph. +let isPartOfHeader (header : IParam) (ontoGraph : FGraph) (ip : IParam) = + ontoGraph[ip.Name] // change to `.Accession` if required + |> FContext.successors + |> Seq.exists (fun (nk,e) -> nk = header.Name && e.HasFlag ARCRelation.PartOf) // change to `.Accession` if required + +/// Checks if the given IParam contains an obsolete term using a given OboOntology. +let isObsoleteTerm (onto : OboOntology) (ip : IParam) = + onto.Terms + |> Seq.exists (fun o -> o.IsObsolete && OboTerm.toCvTerm o = Param.getTerm ip) + +/// Takes a seq of grouped IParams and tags them according to their TermFamiliarity using a given OboOntology. +let matchTerms (onto : OboOntology) (gips : (string * IParam seq) seq) = + let ontoGraph = ontologyToFGraphByName onto // if time performance is crucial, have this as parameter instead + let header = Seq.head gips |> snd |> Seq.head + printfn $"header: {header.Name}" + gips + |> Seq.mapi ( + fun i (n,ips) -> + if i = 0 then n, seq {KnownTerm header} + else + printfn $"ip: {(ips |> Seq.head).Name}" + if ips |> Seq.exists (fun ip -> Param.tryUserParam ip |> Option.isSome) then n, ips |> Seq.map UnknownTerm + elif ips |> Seq.exists (fun ip -> isObsoleteTerm onto ip) then n, ips |> Seq.map ObsoleteTerm + elif ips |> Seq.exists (fun ip -> isPartOfHeader header ontoGraph ip) then n, ips |> Seq.map KnownTerm + else n, ips |> Seq.map MisplacedTerm + ) + +let partitionedIps = Seq.groupWhen (isHeader ontoGraph) paramse +//partitionedIps |> Seq.map Seq.toList |> Seq.toList +//partitionedIps |> Seq.iter (fun ips -> printfn ""; ips |> Seq.iter (fun ip -> printfn "%s" ip.Name)) + +let groupedIps = partitionedIps |> Seq.map groupTerms +//groupedIps|>Seq.iter(fun ips->printfn"";ips|>Seq.iter(fun(ipN,ipEs)->printfn$"{ipN}:";ipEs|>Seq.iter(fun ip->printfn$"\t{ParamValue.getValueAsString ip.Value}"))) + +// deprecated: (dropped in favor of reworking matchTerms input parameter) +///// Aggregates groups of IParams together. +//let aggregateTerms (groupedIps : (string * IParam seq) seq) = +// groupedIps +// |> Seq.map snd +// |> Seq.concat + +//let aggregatedIps = Seq.map aggregateTerms groupedIps + +//let matchedIps = aggregatedIps |> Seq.map (matchTerms onto) +let matchedIps = groupedIps |> Seq.map (matchTerms onto) +matchedIps |> Seq.head +let header = paramse.Head +isHeader ontoGraph header +let ip = paramse[2] +isPartOfHeader header ontoGraph ip +//ontoGraph[ip.Name] |> FContext.predecessors |> Seq.exists (fun (nk,e) -> printfn $"nk: {nk}\nheader: {header.Name}"; nk = header.Name) +ontoGraph[ip.Name] |> FContext.successors |> Seq.exists (fun (nk,e) -> printfn $"nk: {nk}\nheader: {header.Name}"; nk = header.Name) +onto.Terms[3] +//matchTerms onto [header; ip] +let testHead1 = groupedIps |> Seq.head +//let testHead1a = aggregatedIps |> Seq.head |> Seq.toList +let testHead1a = groupedIps |> Seq.head |> Seq.toList +groupedIps |> Seq.item 3 |> Seq.toList +matchedIps |> Seq.item 3 |> Seq.toList +matchedIps |> Seq.last |> Seq.toList + +// SSSSSSSSSSSSSSSSSSSSSSSSS +// altered from ARCGraph.fs: + +/// Returns all terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph via a given relating function. +let getRelatedCvParamsBy relating (ip : IParam) (graph : FGraph) = + relating graph[ip.Accession] + |> Seq.map (fun (id,rel) -> FGraph.findNode id graph, rel) + |> Seq.map (fun ((id,t),r) -> id, t, r) + +/// Returns all related terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. +let getRelatedCvParams (ip : IParam) (graph : FGraph) = + getRelatedCvParamsBy FContext.neighbours ip graph + +/// Returns all succeeding terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. +let getSucceedingCvParams (ip : IParam) (graph : FGraph) = + getRelatedCvParamsBy FContext.successors ip graph + +/// Returns all preceding terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. +let getPrecedingCvParams (ip : IParam) (graph : FGraph) = + getRelatedCvParamsBy FContext.predecessors ip graph + +/// Checks is a given current CvParam has a given ArcRelation to a given prior CvParam by using a given ontology graph. +let hasRelationTo onto (relation : ARCRelation) currentIp (priorIp : IParam) = + getRelatedCvParams currentIp onto + |> Seq.exists (fun (id,t,r) -> id = priorIp.Accession && r.HasFlag relation) + +/// Checks is a given current CvParam has a follows relationship to a given prior CvParam by using a given ontology graph. +let hasFollowsTo onto currentIp priorIp = + hasRelationTo onto ARCRelation.Follows currentIp priorIp + +/// Checks is a given current CvParam has a part_of relationship to a given prior CvParam by using a given OboOntology. +let hasPartOfTo onto currentIp priorIp = + hasRelationTo onto ARCRelation.PartOf currentIp priorIp + +// EEEEEEEEEEEEEEEEEEEEEEEEE + +/// Returns the TermFamiliarity's IParam value. +let deconstructTf tf = + match tf with + | KnownTerm ip -> ip + | UnknownTerm ip -> ip + | MisplacedTerm ip -> ip + | ObsoleteTerm ip -> ip + +let constructSubgraph (ontoGraph : FGraph) (ips : (string * TermFamiliarity seq) seq) = + let rec loop (section : (string * TermFamiliarity seq) list) (stash : (string * TermFamiliarity seq) list) (header : IParam) (priorParams : string * IParam seq) (graph : FGraph) = + match section with + | (hn,hts) :: t -> + match Seq.head hts with + | UnknownTerm ip -> + FGraph.addElement hn (hts |> Seq.map deconstructTf) (fst priorParams) (snd priorParams) ARCRelation.Unknown graph + |> loop t stash header priorParams + | KnownTerm ip -> + let priorName,priorIps = priorParams + if hasFollowsTo ontoGraph ip (Seq.head priorIps) then + let hips = (hts |> Seq.map deconstructTf) + FGraph.addElement hn hips priorName priorIps ARCRelation.Follows graph + |> loop t stash header (hn, hips) + else + loop t () + loop + + + + diff --git a/playgrounds/arcGraph_playgrounds/prototype_v0.2.1.fsx b/playgrounds/arcGraph_playgrounds/prototype_v0.2.1.fsx new file mode 100644 index 0000000..c7b45da --- /dev/null +++ b/playgrounds/arcGraph_playgrounds/prototype_v0.2.1.fsx @@ -0,0 +1,533 @@ +#I "../../../ARCTokenization/src/ARCTokenization/bin/Debug/netstandard2.0" +#I "../../../ARCTokenization/src/ARCTokenization/bin/Release/netstandard2.0" +#r "ARCTokenization.dll" +#r "ControlledVocabulary.dll" +#I "../../src/ARCExpect/bin/Debug/netstandard2.0" +#I "../../src/ARCExpect/bin/Release/netstandard2.0" +#r "ARCExpect.dll" +#I "../../../../omaus/Graphoscope/src/Graphoscope/bin/Debug/netstandard2.0" +#I "../../../../omaus/Graphoscope/src/Graphoscope/bin/Release/netstandard2.0" +#r "Graphoscope.dll" + +//#r "nuget: ARCTokenization" +#r "nuget: Expecto" +//#r "nuget: FSharpAux, 1.1.0" +#r "nuget: FSharpAux, 2.0.0" +//#r "nuget: Graphoscope" +#r "nuget: Cytoscape.NET" +#r "nuget: FsOboParser, 0.3.0" +#r "nuget: FsSpreadsheet.ExcelIO, 4.1.0" + + +open Expecto +open ControlledVocabulary +open ARCTokenization +open FSharpAux +//open ArcValidation.OntologyHelperFunctions +//open ArcValidation.ErrorMessage +open Graphoscope +open FsOboParser +open Cytoscape.NET + +open ARCExpect +open ARCExpect.OboGraph +open ARCExpect.ARCGraph +open ARCExpect.ARCGraph.Visualization + +open System.Collections.Generic +open System.Text.RegularExpressions + + +//// from internal module copypasted + +//open Impl + +//let performTest test = +// let w = System.Diagnostics.Stopwatch() +// w.Start() +// evalTests Tests.defaultConfig test +// |> Async.RunSynchronously +// |> fun r -> +// w.Stop() +// { +// results = r +// duration = w.Elapsed +// maxMemory = 0L +// memoryLimit = 0L +// timedOut = [] +// } + +let paramse = ARCTokenization.Investigation.parseMetadataSheetFromFile @"C:\Repos\git.nfdi4plants.org\ArcPrototype\isa.investigation.xlsx" + +//paramse |> List.map (fun p -> p.ToString() |> String.contains "CvParam") |> List.reduce (&&) +//paramse |> List.iter (fun p -> printfn "%A" <| p.GetType().ToString()) +//paramse |> List.iter (fun p -> printfn "%A" <| (p.Value |> ParamValue.getValueAsString)) +//paramse |> List.iter (fun p -> printfn "%A" <| p.Name) + +//let cvparamse = paramse |> List.map (CvParam.tryCvParam >> Option.get) +//let cvparamse = +// paramse +// |> List.map ( +// fun p -> +// match CvParam.tryCvParam p with +// | Some cvp -> cvp +// | None -> CvParam(p.ID, p.Name, p.RefUri, p.Value, p :?> CvAttributeCollection) +// ) + +//let cvparamse = paramse |> List.map (Param.tryCvParam >> Option.get) + +//let fromCvParamList cvpList = +// cvpList +// |> List.mapi ( +// fun i cvp -> +// (i,CvBase.getCvName cvp), cvp +// ) +// |> FGraph.createFromNodes + +//let invesContentGraph = fromCvParamList cvparamse + +let onto = ARCTokenization.Terms.InvestigationMetadata.ontology + +//let tans = cvparamse |> List.map CvParam.getCvAccession + +//let assTerms = tans |> List.choose (fun tan -> obo.Terms |> List.tryFind (fun term -> term.Id = tan)) +//assTerms |> List.fold (fun acc y -> acc && Option.isSome y) true + +//let assTermsRelships = assTerms |> List.collect (fun x -> OboOntology.getRelatedTerms x obo) + +//toRelation "part_of" + toRelation "has_a" + toRelation "follows" +//toRelation "part_of" ||| toRelation "has_a" ||| toRelation "follows" + +//let assTermsRels = assTermsRelships |> List.map (fun (o1,rs,o2) -> o1, toRelation rs, o2) + +//invesContentGraph.Keys |> Seq.head +//invesContentGraph.Values |> Seq.head + +//let ontoGraph = ontologyToFGraph onto + +//ontoGraph |> printGraph (fun x -> x.Name) + +//ontoGraphToFullCyGraph ontoGraph |> CyGraph.show + +let ontologyToFGraphByName (onto : OboOntology) = + OboOntology.getRelations onto + |> List.fold ( + fun acc tr -> + match tr with + | Empty st -> FGraph.addNode st.Name st acc + | TargetMissing (rel,st) -> FGraph.addNode st.Name st acc + | Target (rel,st,tt) -> + //printfn $"st: {st.Name}\trelation: {rel}\ttt: {tt.Name}" + if FGraph.containsEdge st.Name tt.Name acc then + let _, _, oldRel = FGraph.findEdge st.Name tt.Name acc + let newRel = oldRel + ARCRelation.toARCRelation rel + FGraph.setEdgeData st.Name tt.Name newRel acc + else FGraph.addElement st.Name st tt.Name tt (ARCRelation.toARCRelation rel) acc + ) FGraph.empty + +//OboOntology.getRelations onto |> List.take 10 +//OboOntology.getRelations onto |> List.filter (fun tr -> match tr with Target (a,b,c) -> (*a = "follows" &&*) c.Name = "ONTOLOGY SOURCE REFERENCE" | _ -> false) + +let ontoGraph = ontologyToFGraphByName onto +//ontoGraph["ONTOLOGY SOURCE REFERENCE"] |> FContext.predecessors +//ontoGraph["ONTOLOGY SOURCE REFERENCE"] |> FContext.successors + +// NEW METADATA GRAPH CREATION FUNCTION(S) + +// input: OboGraph, CvParam list + + +/// Returns the respective Term Source Ref of a given ID (= Term Accession Number). +let getRef id = + String.takeWhile ((<>) ':') id + +/// Returns all terms that are present in the given ontology but don't occur in the given CvParam list as CvParams. +let getMissingTerms (onto : OboOntology) (ips : IParam seq) = + onto.Terms + |> Seq.choose ( + fun o -> + if o.IsObsolete then None + else + let cvtObo = OboTerm.toCvTerm o + if not (ips |> Seq.exists (fun e -> Param.getTerm e = cvtObo)) then + Some (CvParam(cvtObo, Value "") :> IParam) + else None + ) + + +/// Representation of the familiarity of a CvParam's CvTerm. If the CvTerm is known in, e.g., an ontology, use KnownTerm, else use UnknownTerm. ObsoleteTerm is for deprecated terms (i.e., OboTerm with `is_obsolete` = `true`). +type TermFamiliarity = + | KnownTerm of IParam + | UnknownTerm of IParam + | ObsoleteTerm of IParam + | MisplacedTerm of IParam + +/// Takes an OboOntology and a list of CvParams and returns the list with all OboTerms that are missing in the list appended as empty-value CvParams. +let addMissingTerms onto ips = + let missingTerms = getMissingTerms onto ips + Seq.append ips missingTerms + +/// Groups the given IParams by their name and groups them together. +let groupTerms (ips : IParam seq) = + ips |> Seq.groupBy (fun ip -> ip.Name) // if erroring: change to `.Accession` + + +let ipsAdded = addMissingTerms onto paramse +//ipsAdded |> Seq.iter (fun c -> c.Name |> printfn "%s") +//onto.Terms |> List.filter (fun o -> o.Synonyms.Length > 0) + +//let ipsAggregated = aggregateTerms ipsAdded +//ipsAggregated |> Seq.iter (printfn "%A") + +//let cvpsMarked = ipsAggregated |> Seq.map (fun (n,cs) -> n, markTerms onto cs) +//cvpsMarked |> Seq.iter (fun c -> match c with | KnownTerm x | ObsoleteTerm x -> () | UnknownTerm x -> printfn "%A" x) + +/// Returns the key of the node in a structured ontology-FGraph that has no other nodes pointing to. +let getTopNodeKey (graph : FGraph<_,_,_>) = + graph.Keys + |> Seq.find (fun k -> FContext.successors graph[k] |> Seq.length = 0) + +/// Returns the nodedata of the given graph by using a given +let getNodeData nodeKey (graph : FGraph<_,_,_>) = + graph[nodeKey] |> fun (p,nd,s) -> nd + +//ontoGraph[getTopNodeKey ontoGraph] |> fun (p,nd,s) -> nd + +///// Creates an intermediate graph with CvParam seq as nodedata. +//let createIntermediateGraph (ontoGraph : FGraph) cvps = +// let topNodeKey = getTopNodeKey ontoGraph +// let rec loop inputList currentKey priorTerm outputGraph = +// let _,oboTerm,_ = ontoGraph[currentKey] +// let cvtObo = OboTerm.toCvTerm oboTerm + +/// Checks if a given IParam is a header term in a given OboOntology. +let isHeader (ontoGraph : FGraph) ip = + ontoGraph.Keys + |> Seq.choose ( + fun k -> + let hasPartOfs = + FContext.predecessors ontoGraph[k] + |> Seq.filter (fun (nk,ed) -> ed = ARCRelation.PartOf) + |> Seq.length > 0 + if hasPartOfs then + Some (ontoGraph[k] |> fun (p,nd,s) -> nd) + else None + ) + |> Seq.exists (fun term -> OboTerm.toCvTerm term = Param.getTerm ip) + +//isHeader ontoGraph cvparamse[2] +//isHeader ontoGraph cvparamse[5] + +let partitionedIps = Seq.groupWhen (isHeader ontoGraph) paramse +//partitionedIps |> Seq.map Seq.toList |> Seq.toList +//partitionedIps |> Seq.iter (fun ips -> printfn ""; ips |> Seq.iter (fun ip -> printfn "%s" ip.Name)) + + +/// Checks if there are missing terms in a given seq of IParams by using a given ontology-based FGraph and adds them if so. A term is defined as missing if it has a part_of relation to the seq's head term and is not present in the seq's tail. +let addMissingTermsInGroup (ontoGraph : FGraph) (ips : IParam seq) = + let header = Seq.head ips + let ipsTail = Seq.tail ips + let headerChildren = + ontoGraph[header.Name] + |> FContext.predecessors + |> Seq.choose ( + fun (n,e) -> + if e.HasFlag ARCRelation.PartOf then + ontoGraph[n] + |> fun (p,nd,s) -> + if nd.IsObsolete then None + else Some (OboTerm.toCvTerm nd) + else None + ) + let missingParams = + headerChildren + |> Seq.choose ( + fun cvt -> + let cond = Seq.exists (fun ip -> Param.getTerm ip = cvt) ipsTail + if cond then None + else Some (CvParam(cvt, "") :> IParam) + ) + Seq.append ips missingParams + +let partitionallyFilledIps = partitionedIps |> Seq.map (addMissingTermsInGroup ontoGraph) + +let groupedIps = partitionallyFilledIps |> Seq.map groupTerms + +/// Checks if a given IParam has a part_of relation to a given header term using an ontology-based FGraph. +let isPartOfHeader (header : IParam) (ontoGraph : FGraph) (ip : IParam) = + ontoGraph[ip.Name] // change to `.Accession` if required + |> FContext.successors + |> Seq.exists (fun (nk,e) -> nk = header.Name && e.HasFlag ARCRelation.PartOf) // change to `.Accession` if required + +/// Checks if the given IParam contains an obsolete term using a given OboOntology. +let isObsoleteTerm (onto : OboOntology) (ip : IParam) = + onto.Terms + |> Seq.exists (fun o -> o.IsObsolete && OboTerm.toCvTerm o = Param.getTerm ip) + +/// Returns the TermFamiliarity's IParam value. +let deconstructTf tf = + match tf with + | KnownTerm ip -> ip + | UnknownTerm ip -> ip + | MisplacedTerm ip -> ip + | ObsoleteTerm ip -> ip + +/// Takes a seq of grouped IParams and tags them according to their TermFamiliarity using a given OboOntology. +let matchTerms (ontoGraph : FGraph) (gips : (string * IParam seq) seq) = + let header = Seq.head gips |> snd |> Seq.head + printfn $"header: {header.Name}" + gips + |> Seq.mapi ( + fun i (n,ips) -> + if i = 0 then n, seq {KnownTerm header} + else + printfn $"ip: {(ips |> Seq.head).Name}" + if ips |> Seq.exists (fun ip -> Param.tryUserParam ip |> Option.isSome) then n, ips |> Seq.map UnknownTerm + elif ips |> Seq.exists (fun ip -> isObsoleteTerm onto ip) then n, ips |> Seq.map ObsoleteTerm + elif ips |> Seq.exists (fun ip -> isPartOfHeader header ontoGraph ip) then n, ips |> Seq.map KnownTerm + else n, ips |> Seq.map MisplacedTerm + ) + +//groupedIps|>Seq.iter(fun ips->printfn"";ips|>Seq.iter(fun(ipN,ipEs)->printfn$"{ipN}:";ipEs|>Seq.iter(fun ip->printfn$"\t{ParamValue.getValueAsString ip.Value}"))) + +// deprecated: (dropped in favor of reworking matchTerms input parameter) +///// Aggregates groups of IParams together. +//let aggregateTerms (groupedIps : (string * IParam seq) seq) = +// groupedIps +// |> Seq.map snd +// |> Seq.concat + +//let aggregatedIps = Seq.map aggregateTerms groupedIps + +//let matchedIps = aggregatedIps |> Seq.map (matchTerms onto) +let matchedIps = groupedIps |> Seq.map (matchTerms ontoGraph) +//matchedIps |> Seq.head +//let header = paramse.Head +//isHeader ontoGraph header +//let ip = paramse[2] +//isPartOfHeader header ontoGraph ip +//ontoGraph[ip.Name] |> FContext.predecessors |> Seq.exists (fun (nk,e) -> printfn $"nk: {nk}\nheader: {header.Name}"; nk = header.Name) +//ontoGraph[ip.Name] |> FContext.successors |> Seq.exists (fun (nk,e) -> printfn $"nk: {nk}\nheader: {header.Name}"; nk = header.Name) +//onto.Terms[3] +//matchTerms onto [header; ip] +//let testHead1 = groupedIps |> Seq.head +//let testHead1a = aggregatedIps |> Seq.head |> Seq.toList +//let testHead1a = groupedIps |> Seq.head |> Seq.toList +//groupedIps |> Seq.item 3 |> Seq.toList +//matchedIps |> Seq.item 3 |> Seq.toList +//matchedIps |> Seq.last |> Seq.toList + +// +++++++++++++++++++++++++ +// altered from ARCGraph.fs: + +/// Returns all terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph via a given relating function. +let getRelatedIParamsBy relating (ip : IParam) (graph : FGraph) = + //relating graph[ip.Accession] + printfn $"{graph[ip.Name]}" + relating graph[ip.Name] + |> Seq.map (fun (id,rel) -> FGraph.findNode id graph, rel) + |> Seq.map (fun ((id,t),r) -> id, t, r) + +/// Returns all related terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. +let getRelatedIParams (ip : IParam) (graph : FGraph) = + getRelatedIParamsBy FContext.neighbours ip graph + +/// Returns all succeeding terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. +let getSucceedingCvParams (ip : IParam) (graph : FGraph) = + getRelatedIParamsBy FContext.successors ip graph + +/// Returns all preceding terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. +let getPrecedingCvParams (ip : IParam) (graph : FGraph) = + getRelatedIParamsBy FContext.predecessors ip graph + +/// Checks is a given current CvParam has a given ArcRelation to a given prior CvParam by using a given ontology graph. +let hasRelationTo onto (relation : ARCRelation) currentIp (priorIp : IParam) = + getSucceedingCvParams currentIp onto + //|> Seq.exists (fun (id,t,r) -> id = priorIp.Accession && r.HasFlag relation) + |> Seq.exists (fun (id,t,r) -> id = priorIp.Name && r.HasFlag relation) + +/// Checks is a given current CvParam has a follows relationship to a given prior CvParam by using a given ontology graph. +let hasFollowsTo onto currentIp priorIp = + hasRelationTo onto ARCRelation.Follows currentIp priorIp + +/// Checks is a given current CvParam has a part_of relationship to a given prior CvParam by using a given OboOntology. +let hasPartOfTo onto currentIp priorIp = + hasRelationTo onto ARCRelation.PartOf currentIp priorIp + + +//let firstIp = Seq.head matchedIps |> Seq.head |> snd |> Seq.head |> deconstructTf +//let secondIp = Seq.item 1 matchedIps |> Seq.head |> snd |> Seq.head |> deconstructTf +//FContext.successors ontoGraph[firstIp.Name] +//FContext.successors ontoGraph[secondIp.Name] +//FContext.predecessors ontoGraph[firstIp.Name] |> Seq.toList +//FContext.predecessors ontoGraph[secondIp.Name] |> Seq.toList + +// +++++++++++++++++++++++++ + +/// Takes an ontology-based FGraph and a seq of termname * matched IParams to create an intermediate subgraph out of it. This subgraph consists of a chain of nodes that have their termname as nodekey and their IParam seq as nodedata. The nodes are ordered by the follows-relationship taken from the ontology-based FGraph. +let constructIntermediateMetadataSubgraph (ontoGraph : FGraph) (ips : (string * TermFamiliarity seq) seq) = + let rec loop (section : (string * TermFamiliarity seq) list) (stash : (string * TermFamiliarity seq) list) (priorParams : string * IParam seq) (graph : FGraph) = + //printfn "next round" + match section with + | [] -> + //printfn "section empty" + //match stash with + //| [] -> + // printfn "stash empty" + // graph, stash // if section and stash are empty, return graph and empty stash + //| _ -> + // printfn "stash not empty" + // if List.forall (fun (sn,stf) -> match Seq.head stf with MisplacedTerm _ -> true | _ -> false) stash then + // printfn "only MisplacedTerms" + // graph, stash // if section is empty and stash only has MisplacedTerms, return graph and stash + // else + // printfn "some non-MisplacedTerms" + // loop stash [] priorParams graph // else take stash as section and continue + graph, stash + | (hn,hts) :: t -> + //printfn "section not empty" + match Seq.head hts with + | UnknownTerm ip -> // if UnknownTerm then add with Unknown relation to prior node + //printfn "UnknownTerm" + FGraph.addElement hn (Seq.map deconstructTf hts) (fst priorParams) (snd priorParams) ARCRelation.Unknown graph + |> loop t stash priorParams + | KnownTerm ip -> + //printfn "KnownTerm" + let priorName,priorIps = priorParams + if hasFollowsTo ontoGraph ip (Seq.head priorIps) then // + //printfn "has follows" + let hips = hts |> Seq.map deconstructTf + FGraph.addElement hn hips priorName priorIps ARCRelation.Follows graph + |> loop t stash (hn, hips) + else + //printfn "has no follows" + loop t ((hn,hts) :: stash) priorParams graph + | ObsoleteTerm ip -> + //printfn "ObsoleteTerm" + let priorName,priorIps = priorParams + if hasFollowsTo ontoGraph ip (Seq.head priorIps) then + //printfn "has follows" + let hips = hts |> Seq.map deconstructTf + FGraph.addElement hn hips priorName priorIps (ARCRelation.Follows + ARCRelation.Obsolete) graph + |> loop t stash (hn, hips) + else + //printfn "has no follows" + loop t ((hn,hts) :: stash) priorParams graph + | MisplacedTerm ip -> + //printfn "MisplacedTerm" + FGraph.addElement hn (Seq.map deconstructTf hts) (fst priorParams) (snd priorParams) ARCRelation.Misplaced graph + |> loop t stash priorParams + let ipsList = Seq.toList ips + loop ipsList.Tail [] (fst ipsList.Head, (snd >> Seq.map deconstructTf) ipsList.Head) FGraph.empty + +let subgraphs = Seq.map (constructIntermediateMetadataSubgraph ontoGraph) matchedIps +//subgraphs |> Seq.toList +//let subgraph1, subgraph1stash = Seq.head subgraphs +//Seq.item 1 subgraphs +//Visualization.isaIntermediateGraphToFullCyGraph subgraph1 |> CyGraph.show +//Seq.length subgraphs +//Seq.item 3 subgraphs |> snd +//Seq.item 3 subgraphs |> fst |> Visualization.isaIntermediateGraphToFullCyGraph |> CyGraph.show +//Visualization.printGraph string subgraph1 +//let subgraphLengths = Seq.map (fun (sg,st) -> Seq.length st) subgraphs +//Seq.toList subgraphLengths +//(Seq.take 5 >> Seq.iter (fst >> Visualization.isaIntermediateGraphToFullCyGraph >> CyGraph.show)) subgraphs + +/// Takes a subgraph and adds empty IParams of the respective CvTerm to the nodedata if it is shorter than the longest IParam seq of any nodedata so that all IParam seqs have the same amount of items. Ignores the header. +let addEmptyIpsToNodeData (subgraph : FGraph) = + let longestChainLength = + FGraph.getNodes subgraph + |> Seq.maxBy (snd >> Seq.length) + |> snd + |> Seq.length + let header = getTopNodeKey subgraph + subgraph.Keys // .mapNodes would be nicer... + |> Seq.iter ( + fun nk -> + if nk <> header then + let nd = subgraph[nk] |> fun (p,nd,s) -> nd + let currLength = Seq.length nd + if currLength < longestChainLength then + let emptyIps = Seq.init (longestChainLength - currLength) (fun _ -> CvParam(Seq.head nd |> Param.getTerm, "") :> IParam) + FGraph.setNodeData nk (Seq.append nd emptyIps) subgraph + |> ignore + ) + subgraph + +let filledSubgraphs = Seq.map (fst >> addEmptyIpsToNodeData) subgraphs +//Seq.item 3 subgraphs |> fst |> Visualization.isaIntermediateGraphToFullCyGraph |> CyGraph.show +//Seq.item 3 filledSubgraphs |> Visualization.isaIntermediateGraphToFullCyGraph |> CyGraph.show + +let splitMetadataSubgraph (subgraph : FGraph) = + let header = getTopNodeKey subgraph + printfn $"header: {header}" + let newGraph = + subgraph.Keys + |> Seq.fold ( + fun g nk -> + if nk = header then + let nd = getNodeData nk subgraph |> Seq.head + FGraph.addNode (nk,0) nd g + else + let nds = getNodeData nk subgraph + nds + |> Seq.foldi ( + fun i g2 nd -> + FGraph.addNode (nk,i) nd g2 + ) g + ) FGraph.empty + newGraph.Keys + |> Seq.iter ( + fun (nk,i) -> + //printfn $"nk: {nk}, i: {i}" + let succs = FContext.successors subgraph[nk] + succs + |> Seq.iter ( + fun (nk2,e) -> + if nk2 = header then + printfn "edge for header" + FGraph.addEdge (nk,i) (nk2,0) e newGraph + else + printfn "edge for non-header" + FGraph.addEdge (nk,i) (nk2,i) e newGraph + |> ignore + ) + ) + newGraph + +let splitSubgraphs = Seq.map splitMetadataSubgraph filledSubgraphs + +//splitSubgraphs |> Seq.head |> Visualization.isaSplitGraphToFullCyGraph |> CyGraph.show +//splitSubgraphs |> Seq.item 3 |> Visualization.isaSplitGraphToFullCyGraph |> CyGraph.show + +let metadataSubgraphToList (subgraph : FGraph) = + let headerN, headerI = getTopNodeKey subgraph + let chainMaxNo = subgraph.Keys |> Seq.maxBy snd |> snd + Seq.init (chainMaxNo + 1) (fun i -> + subgraph.Keys + |> Seq.choose ( + fun (nk,i2) -> + if nk = headerN then + ((headerN, headerI), getNodeData (nk,0) subgraph) + |> Some + elif i = i2 then + ((nk, i), getNodeData (nk,i) subgraph) + |> Some + else None + ) + ) + + +//metadataSubgraphToList (Seq.item 3 splitSubgraphs) |> Seq.head |> Seq.toList +//metadataSubgraphToList (Seq.item 3 splitSubgraphs) |> Seq.item 2 |> Seq.toList + +let assembleMetadataSubgraphs (ontoGraph : FGraph) (subgraphs : FGraph seq) = + + +// +let constructMetadataGraph (ontoGraph : FGraph) (matchedIps : (string * TermFamiliarity seq) seq seq) = + + + + + diff --git a/codeGeneratorScripting.fsx b/playgrounds/codeGeneratorScripting.fsx similarity index 100% rename from codeGeneratorScripting.fsx rename to playgrounds/codeGeneratorScripting.fsx diff --git a/playgrounds/demo_notebooks/demo_ARCGraph.ipynb b/playgrounds/demo_notebooks/demo_ARCGraph.ipynb new file mode 100644 index 0000000..00f5f45 --- /dev/null +++ b/playgrounds/demo_notebooks/demo_ARCGraph.ipynb @@ -0,0 +1,135 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "#r \"nuget: ARCExpect, 0.0.0-preview0\"\n", + "#r \"nuget: ARCTokenization\"\n", + "#r \"nuget: Graphoscope, 0.6.0-preview.1\"\n", + "#r \"nuget: FSharpAux, 2.0.0\"\n", + "#r \"nuget: Cytoscape.NET.Interactive\"\n", + "\n", + "open ARCTokenization\n", + "open ARCExpect\n", + "open Graphoscope\n", + "open FSharpAux\n", + "open Cytoscape.NET.Interactive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "// put in the path to your ARC's Investigation file here:\n", + "\n", + "let path = @\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "let iparams = ARCTokenization.Investigation.parseMetadataSheetFromFile path\n", + "\n", + "let onto = ARCTokenization.Terms.InvestigationMetadata.ontology\n", + "\n", + "let ontoGraph = OboGraph.ontologyToFGraphByName onto\n", + "\n", + "let ipsAdded = ARCGraph.addMissingTerms onto iparams\n", + "\n", + "let partitionedIps = Seq.groupWhen (ARCGraph.isHeader ontoGraph) ipsAdded\n", + "\n", + "let partitionallyFilledIps = partitionedIps |> Seq.map (ARCGraph.addMissingTermsInGroup ontoGraph)\n", + "\n", + "let groupedIps = partitionallyFilledIps |> Seq.map ARCGraph.groupTerms\n", + "\n", + "let matchedIps = groupedIps |> Seq.map (ARCGraph.matchTerms onto)\n", + "\n", + "let subgraphs = Seq.map (ARCGraph.constructIntermediateMetadataSubgraph ontoGraph) matchedIps\n", + "\n", + "let filledSubgraphs = Seq.map (fst >> ARCGraph.addEmptyIpsToNodeData) subgraphs\n", + "\n", + "let splitSubgraphs = Seq.map ARCGraph.splitMetadataSubgraph filledSubgraphs\n", + "\n", + "let filledLists = Seq.map ARCGraph.metadataSubgraphToList splitSubgraphs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "let contactsGraph =\n", + " splitSubgraphs\n", + " |> Seq.item 3\n", + " |> ARCGraph.Visualization.isaSplitGraphToFullCyGraph\n", + "\n", + "contactsGraph" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".NET (C#)", + "language": "C#", + "name": ".net-csharp" + }, + "language_info": { + "name": "polyglot-notebook" + }, + "polyglot_notebook": { + "kernelInfo": { + "defaultKernelName": "csharp", + "items": [ + { + "aliases": [], + "name": "csharp" + }, + { + "aliases": [], + "languageName": "fsharp", + "name": "fsharp" + } + ] + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/errorClassesStatic.fsx b/playgrounds/errorClassesStatic.fsx similarity index 100% rename from errorClassesStatic.fsx rename to playgrounds/errorClassesStatic.fsx diff --git a/expectoPlayground.fsx b/playgrounds/expectoPlayground.fsx similarity index 100% rename from expectoPlayground.fsx rename to playgrounds/expectoPlayground.fsx diff --git a/getAnnotationTableCvPs.fsx b/playgrounds/getAnnotationTableCvPs.fsx similarity index 100% rename from getAnnotationTableCvPs.fsx rename to playgrounds/getAnnotationTableCvPs.fsx diff --git a/graphModelIOTest.fsx b/playgrounds/graphModelIOTest.fsx similarity index 100% rename from graphModelIOTest.fsx rename to playgrounds/graphModelIOTest.fsx diff --git a/graphoscopePlayground.fsx b/playgrounds/graphoscopePlayground.fsx similarity index 100% rename from graphoscopePlayground.fsx rename to playgrounds/graphoscopePlayground.fsx diff --git a/playground.fsx b/playgrounds/playground.fsx similarity index 100% rename from playground.fsx rename to playgrounds/playground.fsx diff --git a/playgrounds/workingGraph.md b/playgrounds/workingGraph.md new file mode 100644 index 0000000..aa8cab2 --- /dev/null +++ b/playgrounds/workingGraph.md @@ -0,0 +1,52 @@ +```mermaid + +flowchart + xf[XLSX File] + ips[IParams] + of[OBO File] + fips[filled IParams] + partiparams[partitioned IParams] + fipartips[filled partitioned IParams] + grips[grouped IParams] + onto[Structural Ontology] + og[Ontology Graph] + maps[matched IParams] + arcisgr["ARC Intermediate (Metadata) Subgraph"] + arcsgr["ARC (Metadata) Subgraph"] + arcgr["ARC (Metadata) Graph"] + adjips[adjusted IParams] + + D[ ] + D2[ ] + D3[ ] + D4[ ] + D5[ ] + + style D height:0.0000001px,width:0.000001px + style D2 height:0.0000001px,width:0.000001px + style D3 height:0.0000001px,width:0.000001px + style D4 height:0.0000001px,width:0.000001px + style D5 height:0.0000001px,width:0.000001px + style of fill:#EEE,stroke-width:0,color:#777 + + xf --> |parseMetadataSheetFromFile| ips + of --> |OboOntology.parseFromFile| onto + ips & onto --- D + D --> |addMissingTerms| fips + fips & og --- D4 + D4 --> |groupWhen isHeader| partiparams + partiparams & og --- D3 + D3 --> |addMissingTermsInGroup| fipartips + fipartips --> |groupTerms| grips + onto --> |ontologyToGraph| og + grips & og --- D5 + D5 --> |matchTerms| maps + maps & og --- D2 + D2 --> |constructIntermediateMetadataSubgraph| arcisgr + arcisgr --> |splitIntermediateMetadataSubgraph| arcsgr + arcsgr --> |assembleMetadataGraph| arcgr + arcsgr --> |toFlatList| adjips + + linkStyle 1 stroke:#999,color:#777,fill:#EEE + +``` \ No newline at end of file diff --git a/src/ARCExpect/ARCExpect.fsproj b/src/ARCExpect/ARCExpect.fsproj index bda9aa5..5a06b08 100644 --- a/src/ARCExpect/ARCExpect.fsproj +++ b/src/ARCExpect/ARCExpect.fsproj @@ -26,11 +26,11 @@ - + - + diff --git a/src/ARCExpect/ARCGraph.fs b/src/ARCExpect/ARCGraph.fs index 9faf6b0..1fdc547 100644 --- a/src/ARCExpect/ARCGraph.fs +++ b/src/ARCExpect/ARCGraph.fs @@ -14,6 +14,13 @@ open InternalUtils /// Functions for creating and working with ARC FGraphs. module ARCGraph = + /// Representation of the familiarity of a CvParam's CvTerm. If the CvTerm is known in, e.g., an ontology, use KnownTerm, else use UnknownTerm. ObsoleteTerm is for deprecated terms (i.e., OboTerm with `is_obsolete` = `true`). + type TermFamiliarity = + | KnownTerm of IParam + | UnknownTerm of IParam + | ObsoleteTerm of IParam + | MisplacedTerm of IParam + /// Takes a list of CvParams and returns the ArcGraph as an FGraph consisting of Nodes only. let fromCvParamListAsNodes cvpList = cvpList @@ -23,224 +30,490 @@ module ARCGraph = ) |> FGraph.createFromNodes - /// Returns all terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph via a given relating function. - let getRelatedCvParamsBy relating (cvp : CvParam) (graph : FGraph) = - relating graph[cvp.Accession] - |> Seq.map (fun (id,rel) -> FGraph.findNode id graph, rel) - |> Seq.map (fun ((id,t),r) -> id, t, r) + ///// Returns all terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph via a given relating function. + //let getRelatedCvParamsBy relating (cvp : CvParam) (graph : FGraph) = + // relating graph[cvp.Accession] + // |> Seq.map (fun (id,rel) -> FGraph.findNode id graph, rel) + // |> Seq.map (fun ((id,t),r) -> id, t, r) + + ///// Returns all related terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. + //let getRelatedCvParams (cvp : CvParam) (graph : FGraph) = + // getRelatedCvParamsBy FContext.neighbours cvp graph + + ///// Returns all succeeding terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. + //let getSucceedingCvParams (cvp : CvParam) (graph : FGraph) = + // getRelatedCvParamsBy FContext.successors cvp graph + + ///// Returns all preceding terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. + //let getPrecedingCvParams (cvp : CvParam) (graph : FGraph) = + // getRelatedCvParamsBy FContext.predecessors cvp graph + + ///// Checks is a given current CvParam has a given ArcRelation to a given prior CvParam by using a given ontology graph. + //let hasRelationTo onto (relation : ARCRelation) currentCvp (priorCvp : CvParam) = + // getRelatedCvParams currentCvp onto + // |> Seq.exists (fun (id,t,r) -> id = priorCvp.Accession && r.HasFlag relation) + + ///// Checks is a given current CvParam has a follows relationship to a given prior CvParam by using a given ontology graph. + //let hasFollowsTo onto currentCvp priorCvp = + // hasRelationTo onto ARCRelation.Follows currentCvp priorCvp + + ///// Checks is a given current CvParam has a part_of relationship to a given prior CvParam by using a given OboOntology. + //let hasPartOfTo onto currentCvp priorCvp = + // hasRelationTo onto ARCRelation.PartOf currentCvp priorCvp + + ///// Checks if 2 given CvParams share the same ARCRelation to the same other term. + //let equalsRelation onto (relation : ARCRelation) cvp1 cvp2 = + // let relTermsCvp1 = getRelatedCvParams cvp1 onto |> Seq.filter (fun (id,t,r) -> r.HasFlag relation) + // let relTermsCvp2 = getRelatedCvParams cvp2 onto |> Seq.filter (fun (id,t,r) -> r.HasFlag relation) + // relTermsCvp1 + // |> Seq.exists ( + // fun (id1,t1,r1) -> + // relTermsCvp2 + // |> Seq.exists ( + // fun (id2,t2,r2) -> + // t1 = t2 && r2.HasFlag relation && r1.HasFlag relation + // ) + // ) + /// Returns all terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph via a given relating function. + let getRelatedIParamsBy relating (ip : IParam) (graph : FGraph) = + //relating graph[ip.Accession] + //printfn $"{graph[ip.Name]}" + relating graph[ip.Name] + |> Seq.map (fun (id,rel) -> FGraph.findNode id graph, rel) + |> Seq.map (fun ((id,t),r) -> id, t, r) + /// Returns all related terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. - let getRelatedCvParams (cvp : CvParam) (graph : FGraph) = - getRelatedCvParamsBy FContext.neighbours cvp graph + let getRelatedIParams (ip : IParam) (graph : FGraph) = + getRelatedIParamsBy FContext.neighbours ip graph /// Returns all succeeding terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. - let getSucceedingCvParams (cvp : CvParam) (graph : FGraph) = - getRelatedCvParamsBy FContext.successors cvp graph + let getSucceedingCvParams (ip : IParam) (graph : FGraph) = + getRelatedIParamsBy FContext.successors ip graph /// Returns all preceding terms (as ID * OboTerm * ArcRelation) of a given CvParam by using a given ontology graph. - let getPrecedingCvParams (cvp : CvParam) (graph : FGraph) = - getRelatedCvParamsBy FContext.predecessors cvp graph + let getPrecedingCvParams (ip : IParam) (graph : FGraph) = + getRelatedIParamsBy FContext.predecessors ip graph /// Checks is a given current CvParam has a given ArcRelation to a given prior CvParam by using a given ontology graph. - let hasRelationTo onto (relation : ARCRelation) currentCvp (priorCvp : CvParam) = - getRelatedCvParams currentCvp onto - |> Seq.exists (fun (id,t,r) -> id = priorCvp.Accession && r.HasFlag relation) + let hasRelationTo onto (relation : ARCRelation) currentIp (priorIp : IParam) = + getSucceedingCvParams currentIp onto + //|> Seq.exists (fun (id,t,r) -> id = priorIp.Accession && r.HasFlag relation) + |> Seq.exists (fun (id,t,r) -> id = priorIp.Name && r.HasFlag relation) + + /// Checks is a given current CvParam has a follows relationship to a given prior CvParam by using a given ontology graph. + let hasFollowsTo onto currentIp priorIp = + hasRelationTo onto ARCRelation.Follows currentIp priorIp + + /// Checks is a given current CvParam has a part_of relationship to a given prior CvParam by using a given OboOntology. + let hasPartOfTo onto currentIp priorIp = + hasRelationTo onto ARCRelation.PartOf currentIp priorIp - /// Checks is a given current CvParam has a follows relationship to a given prior CvParam by using a given ontology graph. - let hasFollowsTo onto currentCvp priorCvp = - hasRelationTo onto ARCRelation.Follows currentCvp priorCvp + ///// Takes an ontology-based FGraph and returns a seq of OboTerms that are endpoints. Endpoints are OboTerms that don't have the given ArcRelation pointing at them. + //let getEndpointsBy (arcRelation : ARCRelation) (onto : FGraph) = + // onto.Values + // |> Seq.map (fun c -> c |> fun (id,t,e) -> t, FContext.predecessors c) + // |> Seq.map (fun (t,p) -> t, p |> Seq.filter (fun (id,r) -> r.HasFlag arcRelation)) + // |> Seq.choose (fun (t,p) -> if Seq.length p = 0 then Some t else None) - /// Checks is a given current CvParam has a part_of relationship to a given prior CvParam by using a given OboOntology. - let hasPartOfTo onto currentCvp priorCvp = - hasRelationTo onto ARCRelation.PartOf currentCvp priorCvp + ///// Takes an ontology-based FGraph and returns a seq of OboTerms that are endpoints. Endpoints are OboTerms that have no part_of relation pointing at them. + //let getPartOfEndpoints (onto : FGraph) = + // getEndpointsBy ARCRelation.PartOf onto + + ///// Takes an OboTerm seq of endpoints (that is, any term without part_of predecessors) and filters a list of CvParams where every CvParam that is an endpoint is excluded. + //let deletePartOfEndpointSectionKeys (ontoEndpoints : OboTerm seq) (cvParams : CvParam list) = + // cvParams + // |> List.filter ( + // fun cvp -> + // ontoEndpoints + // |> Seq.exists ( + // fun t -> + // t.Name = cvp.Name && + // t.Id = cvp.Accession && + // CvParam.getValueAsTerm cvp |> fun cvt -> cvt.Accession = "AGMO:00000001" && cvt.Name = "Metadata Section Key" + // ) + // |> not + // ) - /// Checks if 2 given CvParams share the same ARCRelation to the same other term. - let equalsRelation onto (relation : ARCRelation) cvp1 cvp2 = - let relTermsCvp1 = getRelatedCvParams cvp1 onto |> Seq.filter (fun (id,t,r) -> r.HasFlag relation) - let relTermsCvp2 = getRelatedCvParams cvp2 onto |> Seq.filter (fun (id,t,r) -> r.HasFlag relation) - relTermsCvp1 - |> Seq.exists ( - fun (id1,t1,r1) -> - relTermsCvp2 - |> Seq.exists ( - fun (id2,t2,r2) -> - t1 = t2 && r2.HasFlag relation && r1.HasFlag relation - ) + /// Returns all terms that are present in the given ontology but don't occur in the given CvParam list as CvParams. + let getMissingTerms (onto : OboOntology) (ips : IParam seq) = + onto.Terms + |> Seq.choose ( + fun o -> + if o.IsObsolete then None + else + let cvtObo = OboTerm.toCvTerm o + if not (ips |> Seq.exists (fun e -> Param.getTerm e = cvtObo)) then + Some (CvParam(cvtObo, Value "") :> IParam) + else None ) - /// Takes an ontology-based FGraph and returns a seq of OboTerms that are endpoints. Endpoints are OboTerms that don't have the given ArcRelation pointing at them. - let getEndpointsBy (arcRelation : ARCRelation) (onto : FGraph) = - onto.Values - |> Seq.map (fun c -> c |> fun (id,t,e) -> t, FContext.predecessors c) - |> Seq.map (fun (t,p) -> t, p |> Seq.filter (fun (id,r) -> r.HasFlag arcRelation)) - |> Seq.choose (fun (t,p) -> if Seq.length p = 0 then Some t else None) + /// Takes an OboOntology and a list of IParams and returns the list with all OboTerms that are missing in the list appended as empty-value IParams. + let addMissingTerms onto ips = + let missingTerms = getMissingTerms onto ips + Seq.append ips missingTerms + + /// Groups the given IParams by their name and groups them together. + let groupTerms (ips : IParam seq) = + ips |> Seq.groupBy (fun ip -> ip.Name) // if erroring: change to `.Accession` - /// Takes an ontology-based FGraph and returns a seq of OboTerms that are endpoints. Endpoints are OboTerms that have no part_of relation pointing at them. - let getPartOfEndpoints (onto : FGraph) = - getEndpointsBy ARCRelation.PartOf onto - - /// Takes an OboTerm seq of endpoints (that is, any term without part_of predecessors) and filters a list of CvParams where every CvParam that is an endpoint is excluded. - let deletePartOfEndpointSectionKeys (ontoEndpoints : OboTerm seq) (cvParams : CvParam list) = - cvParams - |> List.filter ( - fun cvp -> - ontoEndpoints - |> Seq.exists ( - fun t -> - t.Name = cvp.Name && - t.Id = cvp.Accession && - CvParam.getValueAsTerm cvp |> fun cvt -> cvt.Accession = "AGMO:00000001" && cvt.Name = "Metadata Section Key" - ) - |> not - ) + ///// Checks if a given CvParam is the header of a given OboTerm sequence. + //let isHeader (ontoEndpoints : OboTerm seq) (cvp : CvParam) = + // ontoEndpoints + // |> Seq.exists (fun t -> t.Name = cvp.Name && t.Id = cvp.Accession) + // |> not - /// Checks if a given CvParam is the header of a given OboTerm sequence. - let isHeader (ontoEndpoints : OboTerm seq) (cvp : CvParam) = - ontoEndpoints - |> Seq.exists (fun t -> t.Name = cvp.Name && t.Id = cvp.Accession) - |> not + /// Checks if a given IParam is a header term in a given OboOntology. + let isHeader (ontoGraph : FGraph) ip = + ontoGraph.Keys + |> Seq.choose ( + fun k -> + let hasPartOfs = + FContext.predecessors ontoGraph[k] + |> Seq.filter (fun (nk,ed) -> ed = ARCRelation.PartOf) + |> Seq.length > 0 + if hasPartOfs then + Some (ontoGraph[k] |> fun (p,nd,s) -> nd) + else None + ) + |> Seq.exists (fun term -> OboTerm.toCvTerm term = Param.getTerm ip) - /// Takes an ontology FGraph and a given CvParam and creates a CvParam based on a CvTerm that has the "Follows" relation to the given CvParam's term. The created CvParam's value is an empty string. - let createEmptyPriorFollowsCvParam onto cvp = - getSucceedingCvParams cvp onto - |> Seq.pick ( - fun (id,t,r) -> - if r.HasFlag ARCRelation.Follows then - let rowParam = CvParam(Address.row, ParamValue.Value (Param.getValueAsInt cvp["Row"] - 1)) - let colParam = CvParam(Address.column, ParamValue.Value (Param.getValueAsInt cvp["Column"])) - let wsParam = CvParam(Address.worksheet, ParamValue.Value (Param.getValueAsString cvp["Worksheet"])) - Some (CvParam(OboTerm.toCvTerm t, ParamValue.Value "", [rowParam; colParam; wsParam])) - else None - ) + ///// Takes an ontology FGraph and a given CvParam and creates a CvParam based on a CvTerm that has the "Follows" relation to the given CvParam's term. The created CvParam's value is an empty string. + //let createEmptyPriorFollowsCvParam onto cvp = + // getSucceedingCvParams cvp onto + // |> Seq.pick ( + // fun (id,t,r) -> + // if r.HasFlag ARCRelation.Follows then + // let rowParam = CvParam(Address.row, ParamValue.Value (Param.getValueAsInt cvp["Row"] - 1)) + // let colParam = CvParam(Address.column, ParamValue.Value (Param.getValueAsInt cvp["Column"])) + // let wsParam = CvParam(Address.worksheet, ParamValue.Value (Param.getValueAsString cvp["Worksheet"])) + // Some (CvParam(OboTerm.toCvTerm t, ParamValue.Value "", [rowParam; colParam; wsParam])) + // else None + // ) - /// Takes an ontology FGraph and a given CvParam and creates a CvParam based on the CvTerm that the given CvParam's term is related to via the "Follows" relation. The created CvParam's value is an empty string. - let createEmptySubsequentFollowsCvParam onto cvp = - getPrecedingCvParams cvp onto - |> Seq.pick ( - fun (id,t,r) -> - if r.HasFlag ARCRelation.Follows then - let rowParam = CvParam(Address.row, ParamValue.Value (Param.getValueAsInt cvp["Row"] + 1)) - let colParam = CvParam(Address.column, ParamValue.Value (Param.getValueAsInt cvp["Column"])) - let wsParam = CvParam(Address.worksheet, ParamValue.Value (Param.getValueAsString cvp["Worksheet"])) - Some (CvParam(OboTerm.toCvTerm t, ParamValue.Value "", [rowParam; colParam; wsParam])) - else None - ) + ///// Takes an ontology FGraph and a given CvParam and creates a CvParam based on the CvTerm that the given CvParam's term is related to via the "Follows" relation. The created CvParam's value is an empty string. + //let createEmptySubsequentFollowsCvParam onto cvp = + // getPrecedingCvParams cvp onto + // |> Seq.pick ( + // fun (id,t,r) -> + // if r.HasFlag ARCRelation.Follows then + // let rowParam = CvParam(Address.row, ParamValue.Value (Param.getValueAsInt cvp["Row"] + 1)) + // let colParam = CvParam(Address.column, ParamValue.Value (Param.getValueAsInt cvp["Column"])) + // let wsParam = CvParam(Address.worksheet, ParamValue.Value (Param.getValueAsString cvp["Worksheet"])) + // Some (CvParam(OboTerm.toCvTerm t, ParamValue.Value "", [rowParam; colParam; wsParam])) + // else None + // ) - /// Takes an ISA-based ontology in the form of an FGraph and a list of CvParams and creates an FGraph based on a section header's "follows" and "part_of" relations. - let constructSubgraph isaOntology (cvParams : CvParam list) = - - printfn "Start constructSubgraph with CvPs: %A" cvParams - - let nextToSectionHeader currentCvp priorCvp = - hasPartOfTo isaOntology currentCvp priorCvp - let follows currentCvp priorCvp = - hasFollowsTo isaOntology currentCvp priorCvp - - let isaGraph = FGraph.empty - - let rec loop (tokens : CvParam list) (stash : CvParam list) (prior : CvParam) parent = - match tokens with - | h :: t -> - match t with - | [] -> - match stash with - | [] -> - printfn "done via empty stash!" - () - | _ -> - //printfn $"case new section header: h: {h.Name}, prior: {prior.Name}" - loop (h :: stash |> List.rev |> List.tail) t (stash |> List.rev |> List.head) parent - | _ -> - match follows h prior with - | true -> - //printfn "tokensList is %A" (tokens |> List.map (fun (cvp : CvParam) -> $"{cvp.Name}: {cvp.Value |> ParamValue.getValueAsString}")) - match nextToSectionHeader h prior with - | true -> - //printfn $"case first term after section header: h: {h.Name}, prior: {prior.Name}" - FGraph.addElement (hash h,h.Name) h (hash prior,prior.Name) prior (ARCRelation.PartOf + ARCRelation.Follows) isaGraph |> ignore - printfn $"case first term after section header: h: {h.Name}, prior: {prior.Name}" - FGraph.addElement (hash h,h.Name) h (hash prior,prior.Name) prior (ARCRelation.PartOf + ARCRelation.Follows) isaGraph |> ignore - loop t (prior :: stash) h h - | false -> - //printfn $"case new term: h: {h.Name}, prior: {prior.Name}" - FGraph.addElement (hash h,h.Name) h (hash parent,parent.Name) parent ARCRelation.Follows isaGraph |> ignore - printfn $"case new term: h: {h.Name}, prior: {prior.Name}" - FGraph.addElement (hash h,h.Name) h (hash parent,parent.Name) parent ARCRelation.Follows isaGraph |> ignore - loop t stash h h - | false -> - match CvParam.equalsTerm (CvParam.getTerm h) prior with - | true -> - //printfn $"case same term: h: {h.Name}, prior: {prior.Name}" - loop t (h :: stash) h parent - | false -> - //printfn $"case term missing: h: {h.Name}, prior: {prior.Name}" - let missingTerm = createEmptyPriorFollowsCvParam isaOntology h - loop (missingTerm :: h :: t) stash prior parent - | [] -> - printfn "done via empty tokensList! (should not happen...)" - () - - FGraph.addNode (hash cvParams.Head,cvParams.Head.Name) cvParams.Head isaGraph |> ignore - loop cvParams.Tail [] cvParams.Head cvParams.Head - isaGraph + ///// Takes an ISA-based ontology in the form of an FGraph and a list of CvParams and creates an FGraph based on a section header's "follows" and "part_of" relations. + //let constructSubgraph isaOntology (cvParams : CvParam list) = + + // printfn "Start constructSubgraph with CvPs: %A" cvParams + + // let nextToSectionHeader currentCvp priorCvp = + // hasPartOfTo isaOntology currentCvp priorCvp + // let follows currentCvp priorCvp = + // hasFollowsTo isaOntology currentCvp priorCvp + + // let isaGraph = FGraph.empty + + // let rec loop (tokens : CvParam list) (stash : CvParam list) (prior : CvParam) parent = + // match tokens with + // | h :: t -> + // match t with + // | [] -> + // match stash with + // | [] -> + // printfn "done via empty stash!" + // () + // | _ -> + // //printfn $"case new section header: h: {h.Name}, prior: {prior.Name}" + // loop (h :: stash |> List.rev |> List.tail) t (stash |> List.rev |> List.head) parent + // | _ -> + // match follows h prior with + // | true -> + // //printfn "tokensList is %A" (tokens |> List.map (fun (cvp : CvParam) -> $"{cvp.Name}: {cvp.Value |> ParamValue.getValueAsString}")) + // match nextToSectionHeader h prior with + // | true -> + // //printfn $"case first term after section header: h: {h.Name}, prior: {prior.Name}" + // FGraph.addElement (hash h,h.Name) h (hash prior,prior.Name) prior (ARCRelation.PartOf + ARCRelation.Follows) isaGraph |> ignore + // printfn $"case first term after section header: h: {h.Name}, prior: {prior.Name}" + // FGraph.addElement (hash h,h.Name) h (hash prior,prior.Name) prior (ARCRelation.PartOf + ARCRelation.Follows) isaGraph |> ignore + // loop t (prior :: stash) h h + // | false -> + // //printfn $"case new term: h: {h.Name}, prior: {prior.Name}" + // FGraph.addElement (hash h,h.Name) h (hash parent,parent.Name) parent ARCRelation.Follows isaGraph |> ignore + // printfn $"case new term: h: {h.Name}, prior: {prior.Name}" + // FGraph.addElement (hash h,h.Name) h (hash parent,parent.Name) parent ARCRelation.Follows isaGraph |> ignore + // loop t stash h h + // | false -> + // match CvParam.equalsTerm (CvParam.getTerm h) prior with + // | true -> + // //printfn $"case same term: h: {h.Name}, prior: {prior.Name}" + // loop t (h :: stash) h parent + // | false -> + // //printfn $"case term missing: h: {h.Name}, prior: {prior.Name}" + // let missingTerm = createEmptyPriorFollowsCvParam isaOntology h + // loop (missingTerm :: h :: t) stash prior parent + // | [] -> + // printfn "done via empty tokensList! (should not happen...)" + // () + + // FGraph.addNode (hash cvParams.Head,cvParams.Head.Name) cvParams.Head isaGraph |> ignore + // loop cvParams.Tail [] cvParams.Head cvParams.Head + // isaGraph - /// Takes on ISA-based ontology FGraph and a structural FGraph and closes all loose ends (i.e., creating connected nodes to such nodes that should have a Follows ArcRelation and share the same PartOf ArcRelation) of the latter according to the ontology graph. - let completeOpenEnds onto (graph : FGraph<(int * string),CvParam,ARCRelation>) = - - let kvs = List.zip (List.ofSeq graph.Keys) (List.ofSeq graph.Values) - let newGraph = - FGraph.toSeq graph - |> Seq.fold (fun acc (nk1,nd1,nk2,nd2,e) -> FGraph.addElement nk1 nd1 nk2 nd2 e acc) FGraph.empty - - let rec loop (input : ((int * string) * FContext<(int * string),CvParam,ARCRelation>) list) = - //printfn "inputL: %A" input.Length - match input with - | (nk1,c) :: t -> - //printfn "pred: %A" (FContext.predecessors c) - if FContext.predecessors c |> Seq.isEmpty then - //printfn "nk1: %A" nk1 - c - |> fun (p,nd1,s) -> - let newS = createEmptySubsequentFollowsCvParam onto nd1 - //printfn "newS: %A" newS - if equalsRelation onto ARCRelation.PartOf nd1 newS then - //printfn "addEle\n" - let newSnk = hash newS, newS.Name - //printfn "newSnk: %A" newSnk - FGraph.addElement newSnk newS nk1 nd1 ARCRelation.Follows newGraph - |> ignore - let newSnkc = newGraph[newSnk] - let newT = (newSnk, newSnkc) :: t - //printfn "newT: %A" newT - loop newT - else - //printfn "no addEle\n" - loop t - else loop t - | [] -> (*printfn "end";*) () - loop kvs + ///// Takes on ISA-based ontology FGraph and a structural FGraph and closes all loose ends (i.e., creating connected nodes to such nodes that should have a Follows ArcRelation and share the same PartOf ArcRelation) of the latter according to the ontology graph. + //let completeOpenEnds onto (graph : FGraph<(int * string),CvParam,ARCRelation>) = + + // let kvs = List.zip (List.ofSeq graph.Keys) (List.ofSeq graph.Values) + // let newGraph = + // FGraph.toSeq graph + // |> Seq.fold (fun acc (nk1,nd1,nk2,nd2,e) -> FGraph.addElement nk1 nd1 nk2 nd2 e acc) FGraph.empty + + // let rec loop (input : ((int * string) * FContext<(int * string),CvParam,ARCRelation>) list) = + // //printfn "inputL: %A" input.Length + // match input with + // | (nk1,c) :: t -> + // //printfn "pred: %A" (FContext.predecessors c) + // if FContext.predecessors c |> Seq.isEmpty then + // //printfn "nk1: %A" nk1 + // c + // |> fun (p,nd1,s) -> + // let newS = createEmptySubsequentFollowsCvParam onto nd1 + // //printfn "newS: %A" newS + // if equalsRelation onto ARCRelation.PartOf nd1 newS then + // //printfn "addEle\n" + // let newSnk = hash newS, newS.Name + // //printfn "newSnk: %A" newSnk + // FGraph.addElement newSnk newS nk1 nd1 ARCRelation.Follows newGraph + // |> ignore + // let newSnkc = newGraph[newSnk] + // let newT = (newSnk, newSnkc) :: t + // //printfn "newT: %A" newT + // loop newT + // else + // //printfn "no addEle\n" + // loop t + // else loop t + // | [] -> (*printfn "end";*) () + // loop kvs + + // newGraph + + ///// Takes a seq of OboTerms that are part_of endpoints and a list of CvParams and returns the CvParams grouped into lists of sections. + //let groupWhenHeader partOfEndpoints (cvps : CvParam list) = + // cvps + // |> List.groupWhen (isHeader partOfEndpoints) + + ///// Takes an ISA-based ontology FGraph, an XLSX parsing function and a path to an XLSX file and returns a seq of section-based ISA-structured subgraphs. + ///// + ///// `xlsxParsing` can be any of `Investigation.parseMetadataSheetFromFile`, `Study.parseMetadataSheetFromFile`, or `Assay.parseMetadataSheetFromFile`. + //let fromXlsxFile onto (xlsxParsing : string -> IParam list) xlsxPath = + // let endpoints = getPartOfEndpoints onto + // let cvps = + // xlsxParsing xlsxPath + // |> List.choose (Param.tryCvParam) + // |> deletePartOfEndpointSectionKeys endpoints + // |> groupWhenHeader endpoints + // cvps + // |> Seq.map ( + // constructSubgraph onto + // >> completeOpenEnds onto + // ) + + /// Checks if there are missing terms in a given seq of IParams by using a given ontology-based FGraph and adds them if so. A term is defined as missing if it has a part_of relation to the seq's head term and is not present in the seq's tail. + let addMissingTermsInGroup (ontoGraph : FGraph) (ips : IParam seq) = + let header = Seq.head ips + let ipsTail = Seq.tail ips + let headerChildren = + ontoGraph[header.Name] + |> FContext.predecessors + |> Seq.choose ( + fun (n,e) -> + if e.HasFlag ARCRelation.PartOf then + ontoGraph[n] + |> fun (p,nd,s) -> + if nd.IsObsolete then None + else Some (OboTerm.toCvTerm nd) + else None + ) + let missingParams = + headerChildren + |> Seq.choose ( + fun cvt -> + let cond = Seq.exists (fun ip -> Param.getTerm ip = cvt) ipsTail + if cond then None + else Some (CvParam(cvt, "") :> IParam) + ) + Seq.append ips missingParams + + /// Checks if a given IParam has a part_of relation to a given header term using an ontology-based FGraph. + let isPartOfHeader (header : IParam) (ontoGraph : FGraph) (ip : IParam) = + ontoGraph[ip.Name] // change to `.Accession` if required + |> FContext.successors + |> Seq.exists (fun (nk,e) -> nk = header.Name && e.HasFlag ARCRelation.PartOf) // change to `.Accession` if required + /// Checks if the given IParam contains an obsolete term using a given OboOntology. + let isObsoleteTerm (onto : OboOntology) (ip : IParam) = + onto.Terms + |> Seq.exists (fun o -> o.IsObsolete && OboTerm.toCvTerm o = Param.getTerm ip) + + /// Returns the TermFamiliarity's IParam value. + let deconstructTf tf = + match tf with + | KnownTerm ip -> ip + | UnknownTerm ip -> ip + | MisplacedTerm ip -> ip + | ObsoleteTerm ip -> ip + + /// Takes a seq of grouped IParams and tags them according to their TermFamiliarity using a given OboOntology. + let matchTerms (onto : OboOntology) (gips : (string * IParam seq) seq) = + let ontoGraph = OboGraph.ontologyToFGraphByName onto // make this instead a parameter when facing performance issues! + let header = Seq.head gips |> snd |> Seq.head + //printfn $"header: {header.Name}" + gips + |> Seq.mapi ( + fun i (n,ips) -> + if i = 0 then n, seq {KnownTerm header} + else + //printfn $"ip: {(ips |> Seq.head).Name}" + if ips |> Seq.exists (fun ip -> Param.tryUserParam ip |> Option.isSome) then n, ips |> Seq.map UnknownTerm + elif ips |> Seq.exists (fun ip -> isObsoleteTerm onto ip) then n, ips |> Seq.map ObsoleteTerm + elif ips |> Seq.exists (fun ip -> isPartOfHeader header ontoGraph ip) then n, ips |> Seq.map KnownTerm + else n, ips |> Seq.map MisplacedTerm + ) + + /// Takes an ontology-based FGraph and a seq of termname * matched IParams to create an intermediate subgraph out of it. This subgraph consists of a chain of nodes that have their termname as nodekey and their IParam seq as nodedata. The nodes are ordered by the follows-relationship taken from the ontology-based FGraph. + let constructIntermediateMetadataSubgraph (ontoGraph : FGraph) (ips : (string * TermFamiliarity seq) seq) = + let rec loop (section : (string * TermFamiliarity seq) list) (stash : (string * TermFamiliarity seq) list) (priorParams : string * IParam seq) (graph : FGraph) = + //printfn "next round" + match section with + | [] -> + //printfn "section empty" + //match stash with + //| [] -> + // printfn "stash empty" + // graph, stash // if section and stash are empty, return graph and empty stash + //| _ -> + // printfn "stash not empty" + // if List.forall (fun (sn,stf) -> match Seq.head stf with MisplacedTerm _ -> true | _ -> false) stash then + // printfn "only MisplacedTerms" + // graph, stash // if section is empty and stash only has MisplacedTerms, return graph and stash + // else + // printfn "some non-MisplacedTerms" + // loop stash [] priorParams graph // else take stash as section and continue + graph, stash + | (hn,hts) :: t -> + //printfn "section not empty" + match Seq.head hts with + | UnknownTerm ip -> // if UnknownTerm then add with Unknown relation to prior node + //printfn "UnknownTerm" + FGraph.addElement hn (Seq.map deconstructTf hts) (fst priorParams) (snd priorParams) ARCRelation.Unknown graph + |> loop t stash priorParams + | KnownTerm ip -> + //printfn "KnownTerm" + let priorName,priorIps = priorParams + if hasFollowsTo ontoGraph ip (Seq.head priorIps) then // + //printfn "has follows" + let hips = hts |> Seq.map deconstructTf + FGraph.addElement hn hips priorName priorIps ARCRelation.Follows graph + |> loop t stash (hn, hips) + else + //printfn "has no follows" + loop t ((hn,hts) :: stash) priorParams graph + | ObsoleteTerm ip -> + //printfn "ObsoleteTerm" + let priorName,priorIps = priorParams + if hasFollowsTo ontoGraph ip (Seq.head priorIps) then + //printfn "has follows" + let hips = hts |> Seq.map deconstructTf + FGraph.addElement hn hips priorName priorIps (ARCRelation.Follows + ARCRelation.Obsolete) graph + |> loop t stash (hn, hips) + else + //printfn "has no follows" + loop t ((hn,hts) :: stash) priorParams graph + | MisplacedTerm ip -> + //printfn "MisplacedTerm" + FGraph.addElement hn (Seq.map deconstructTf hts) (fst priorParams) (snd priorParams) ARCRelation.Misplaced graph + |> loop t stash priorParams + let ipsList = Seq.toList ips + loop ipsList.Tail [] (fst ipsList.Head, (snd >> Seq.map deconstructTf) ipsList.Head) FGraph.empty + + /// Takes a subgraph and adds empty IParams of the respective CvTerm to the nodedata if it is shorter than the longest IParam seq of any nodedata so that all IParam seqs have the same amount of items. Ignores the header. + let addEmptyIpsToNodeData (subgraph : FGraph) = + let longestChainLength = + FGraph.getNodes subgraph + |> Seq.maxBy (snd >> Seq.length) + |> snd + |> Seq.length + let header = FGraph.getTopNodeKey subgraph + subgraph.Keys // .mapNodes would be nicer... + |> Seq.iter ( + fun nk -> + if nk <> header then + let nd = subgraph[nk] |> fun (p,nd,s) -> nd + let currLength = Seq.length nd + if currLength < longestChainLength then + let emptyIps = Seq.init (longestChainLength - currLength) (fun _ -> CvParam(Seq.head nd |> Param.getTerm, "") :> IParam) + FGraph.setNodeData nk (Seq.append nd emptyIps) subgraph + |> ignore + ) + subgraph + + /// Splits the nodedata of a given intermediate subgraph into single nodes. Nodekey changes from name to name * number (of occurence), 0-based. + let splitMetadataSubgraph (subgraph : FGraph) = + let header = FGraph.getTopNodeKey subgraph + //printfn $"header: {header}" + let newGraph = + subgraph.Keys + |> Seq.fold ( + fun g nk -> + if nk = header then + let nd = FGraph.getNodeData nk subgraph |> Seq.head + FGraph.addNode (nk,0) nd g + else + let nds = FGraph.getNodeData nk subgraph + nds + |> Seq.foldi ( + fun i g2 nd -> + FGraph.addNode (nk,i) nd g2 + ) g + ) FGraph.empty + newGraph.Keys + |> Seq.iter ( + fun (nk,i) -> + //printfn $"nk: {nk}, i: {i}" + let succs = FContext.successors subgraph[nk] + succs + |> Seq.iter ( + fun (nk2,e) -> + if nk2 = header then + //printfn "edge for header" + FGraph.addEdge (nk,i) (nk2,0) e newGraph + else + //printfn "edge for non-header" + FGraph.addEdge (nk,i) (nk2,i) e newGraph + |> ignore + ) + ) newGraph - /// Takes a seq of OboTerms that are part_of endpoints and a list of CvParams and returns the CvParams grouped into lists of sections. - let groupWhenHeader partOfEndpoints (cvps : CvParam list) = - cvps - |> List.groupWhen (isHeader partOfEndpoints) - - /// Takes an ISA-based ontology FGraph, an XLSX parsing function and a path to an XLSX file and returns a seq of section-based ISA-structured subgraphs. - /// - /// `xlsxParsing` can be any of `Investigation.parseMetadataSheetFromFile`, `Study.parseMetadataSheetFromFile`, or `Assay.parseMetadataSheetFromFile`. - let fromXlsxFile onto (xlsxParsing : string -> IParam list) xlsxPath = - let endpoints = getPartOfEndpoints onto - let cvps = - xlsxParsing xlsxPath - |> List.choose (Param.tryCvParam) - |> deletePartOfEndpointSectionKeys endpoints - |> groupWhenHeader endpoints - cvps - |> Seq.map ( - constructSubgraph onto - >> completeOpenEnds onto + /// Takes a metadata subgraph and returns its content as a list of flat list in the form of (name * number) * nodedata. The inner list has their items grouped by the number. + let metadataSubgraphToList (subgraph : FGraph) = + let headerN, headerI = FGraph.getTopNodeKey subgraph + let chainMaxNo = subgraph.Keys |> Seq.maxBy snd |> snd + Seq.init (chainMaxNo + 1) (fun i -> + subgraph.Keys + |> Seq.choose ( + fun (nk,i2) -> + if nk = headerN then + ((headerN, headerI), FGraph.getNodeData (nk,0) subgraph) + |> Some + elif i = i2 then + ((nk, i), FGraph.getNodeData (nk,i) subgraph) + |> Some + else None + ) ) @@ -295,10 +568,61 @@ module ARCGraph = graph /// Takes an ISA-based FGraph and returns a CyGraph according to its structure. - let isaGraphToFullCyGraph (graph : FGraph) = + let isaGraphToFullCyGraph (graph : FGraph) = + toFullCyGraph + //(fun (h,n) -> $"{h}, {n}") // when using hash * accession or hash * name + id // when using only accession or name + (fun (d : IParam) -> $"{d.Name}: {d.Value |> ParamValue.getValueAsString}") + (fun e -> + [ + CyParam.label <| e.ToString() + match e with + | ARCRelation.Follows -> CyParam.color "red" + | ARCRelation.PartOf -> CyParam.color "blue" + | x when x = ARCRelation.PartOf + ARCRelation.Follows -> CyParam.color "purple" + | ARCRelation.IsA -> CyParam.color "lightblue" + | ARCRelation.Misplaced -> CyParam.color "pink" + | ARCRelation.Obsolete -> CyParam.color "yellow" + | ARCRelation.Unknown -> CyParam.color "black" + | x when x = ARCRelation.Obsolete + ARCRelation.Follows -> CyParam.color "orange" + | ARCRelation.HasA -> CyParam.color "brown" + | _ -> CyParam.color "white" + ] + ) + graph + |> CyGraph.withLayout(Layout.initBreadthfirst <| Layout.LayoutOptions.Cose()) + + /// Takes an ISA-based FGraph and returns a CyGraph according to its structure. + let isaIntermediateGraphToFullCyGraph (graph : FGraph) = + toFullCyGraph + //(fun (h,n) -> $"{h}, {n}") // when using hash * accession or hash * name + id // when using only accession or name + (fun (d : IParam seq) -> $"""{(Seq.head d).Name}: {(Seq.map (fun (lil : IParam) -> lil.Value |> ParamValue.getValueAsString) d) |> String.concat "; "}""") + (fun e -> + [ + CyParam.label <| e.ToString() + match e with + | ARCRelation.Follows -> CyParam.color "red" + | ARCRelation.PartOf -> CyParam.color "blue" + | x when x = ARCRelation.PartOf + ARCRelation.Follows -> CyParam.color "purple" + | ARCRelation.IsA -> CyParam.color "lightblue" + | ARCRelation.Misplaced -> CyParam.color "pink" + | ARCRelation.Obsolete -> CyParam.color "yellow" + | ARCRelation.Unknown -> CyParam.color "black" + | x when x = ARCRelation.Obsolete + ARCRelation.Follows -> CyParam.color "orange" + | ARCRelation.HasA -> CyParam.color "brown" + | _ -> CyParam.color "white" + ] + ) + graph + |> CyGraph.withLayout(Layout.initBreadthfirst <| Layout.LayoutOptions.Cose()) + + /// Takes an ISA-based FGraph and returns a CyGraph according to its structure. + let isaSplitGraphToFullCyGraph (graph : FGraph) = toFullCyGraph - (fun (h,n) -> $"{h}, {n}") - (fun (d : CvParam) -> $"{d.Name}: {d.Value |> ParamValue.getValueAsString}") + //(fun (h,n) -> $"{h}, {n}") // when using hash * accession or hash * name + (fun (nk,i) -> $"{nk}, {i}") + (fun (d : IParam) -> $"{d.Name}: {d.Value |> ParamValue.getValueAsString}") (fun e -> [ CyParam.label <| e.ToString() @@ -306,6 +630,13 @@ module ARCGraph = | ARCRelation.Follows -> CyParam.color "red" | ARCRelation.PartOf -> CyParam.color "blue" | x when x = ARCRelation.PartOf + ARCRelation.Follows -> CyParam.color "purple" + | ARCRelation.IsA -> CyParam.color "lightblue" + | ARCRelation.Misplaced -> CyParam.color "pink" + | ARCRelation.Obsolete -> CyParam.color "yellow" + | ARCRelation.Unknown -> CyParam.color "black" + | x when x = ARCRelation.Obsolete + ARCRelation.Follows -> CyParam.color "orange" + | ARCRelation.HasA -> CyParam.color "brown" + | _ -> CyParam.color "white" ] ) graph diff --git a/src/ARCExpect/ARCRelation.fs b/src/ARCExpect/ARCRelation.fs index c5b399a..ea4160b 100644 --- a/src/ARCExpect/ARCRelation.fs +++ b/src/ARCExpect/ARCRelation.fs @@ -9,6 +9,8 @@ type ARCRelation = | HasA = 4 | Follows = 8 | Unknown = 16 + | Misplaced = 32 + | Obsolete = 64 /// Functions for working with ARCRelations. module ARCRelation = diff --git a/src/ARCExpect/InternalUtils.fs b/src/ARCExpect/InternalUtils.fs index ea91ab1..bc53978 100644 --- a/src/ARCExpect/InternalUtils.fs +++ b/src/ARCExpect/InternalUtils.fs @@ -2,6 +2,7 @@ open System +open Graphoscope //// this is needed to allow ValidatorTests project to access internal modules @@ -61,4 +62,16 @@ module InternalUtils = static member toCvTerm (term : OboTerm) = let ref = String.takeWhile ((<>) ':') term.Id - CvTerm.create(term.Id, term.Name, ref) \ No newline at end of file + CvTerm.create(term.Id, term.Name, ref) + + + type FGraph with + + /// Returns the key of the node in a structured ontology-FGraph that has no other nodes pointing to. + static member getTopNodeKey (graph : FGraph<_,_,_>) = + graph.Keys + |> Seq.find (fun k -> FContext.successors graph[k] |> Seq.length = 0) + + /// Returns the nodedata of the given graph by using a given nodekey. + static member getNodeData nodeKey (graph : FGraph<_,_,_>) = + graph[nodeKey] |> fun (p,nd,s) -> nd \ No newline at end of file diff --git a/src/ARCExpect/OboGraph.fs b/src/ARCExpect/OboGraph.fs index 650d7c2..f354622 100644 --- a/src/ARCExpect/OboGraph.fs +++ b/src/ARCExpect/OboGraph.fs @@ -17,7 +17,7 @@ module OboGraph = | TargetMissing (r,t) -> None | Target (r,st,tt) -> Some (toARCRelation r,st,tt) - /// Takes an OboOntology and returns an FGraph with OboTerms as nodes and ARCRelations as Edges. The structure of the graph results from the TermRelations between the ontology's terms. + /// Takes an OboOntology and returns an FGraph with OboTerms as nodes (with their ID as nodekey) and ARCRelations as Edges. The structure of the graph results from the TermRelations between the ontology's terms. let ontologyToFGraph onto = OboOntology.getRelations onto |> Seq.choose tryToARCRelation @@ -37,3 +37,20 @@ module OboGraph = fun acc (ar,st,tt) -> FGraph.addElement st.Id st tt.Id tt ar acc ) FGraph.empty + + /// Takes an OboOntology and returns an FGraph with OboTerms as nodes (with their name as nodekey) and ARCRelations as Edges. The structure of the graph results from the TermRelations between the ontology's terms. + let ontologyToFGraphByName (onto : OboOntology) = + OboOntology.getRelations onto + |> List.fold ( + fun acc tr -> + match tr with + | Empty st -> FGraph.addNode st.Name st acc + | TargetMissing (rel,st) -> FGraph.addNode st.Name st acc + | Target (rel,st,tt) -> + //printfn $"st: {st.Name}\trelation: {rel}\ttt: {tt.Name}" + if FGraph.containsEdge st.Name tt.Name acc then + let _, _, oldRel = FGraph.findEdge st.Name tt.Name acc + let newRel = oldRel + ARCRelation.toARCRelation rel + FGraph.setEdgeData st.Name tt.Name newRel acc + else FGraph.addElement st.Name st tt.Name tt (ARCRelation.toARCRelation rel) acc + ) FGraph.empty \ No newline at end of file diff --git a/src/ARCExpect/packages.lock.json b/src/ARCExpect/packages.lock.json index d530fde..c490b33 100644 --- a/src/ARCExpect/packages.lock.json +++ b/src/ARCExpect/packages.lock.json @@ -46,11 +46,12 @@ }, "FSharpAux": { "type": "Direct", - "requested": "[1.1.0, 1.1.0]", - "resolved": "1.1.0", - "contentHash": "lKxo49OYLoI27GVLXJ2sOBGazfUVZ9zowLLPUn6mHYrepaMgEdxxeZpLcQwFIfzicSoi3i09IH114GSIXy/Dgg==", + "requested": "[2.0.0, 2.0.0]", + "resolved": "2.0.0", + "contentHash": "2gFFDzIVheYO/glZmzyRaol/P60SWPzH7zM7QFZzfkR+rB3F0IdyVQpo3h9eOdsnOVTIYJhiywOaBAtsW0ZFzA==", "dependencies": { - "FSharp.Core": "6.0.1" + "FSharp.Core": "6.0.7", + "FSharpAux.Core": "2.0.0" } }, "FsOboParser": { @@ -86,14 +87,16 @@ }, "Graphoscope": { "type": "Direct", - "requested": "[0.2.0, 0.2.0]", - "resolved": "0.2.0", - "contentHash": "20mAUZMNyYm2BI2FqPSmcR3Ik/0rYNsRPJExBx6xa24Z6dV/Ow93KKFDaIMPeNRQiqWWXTvAV0MHN1TVPkLUpA==", + "requested": "[0.6.0-preview.1, 0.6.0-preview.1]", + "resolved": "0.6.0-preview.1", + "contentHash": "GBXYdS3y8Yg/0a43/r9JzcFiPs/CbdQF3Ikl20ng/gzxBeKeZv08+m9kEwUIV7MTGuNGQWV1ZS6qoRGt35O6YQ==", "dependencies": { "FSharp.Core": "6.0.7", "FSharp.Data": "6.2.0", + "FSharpAux": "2.0.0", "FSharpAux.Core": "2.0.0", - "FSharpx.Collections": "3.1.0" + "FSharpx.Collections": "3.1.0", + "OptimizedPriorityQueue": "5.1.0" } }, "NETStandard.Library": { @@ -286,6 +289,11 @@ "resolved": "13.0.1", "contentHash": "ppPFpBcvxdsfUonNcvITKqLl3bqxWbDCZIzDWHzjpdAHRFfZe0Dw9HmA0+za13IdyrgJwpkDTDA9fHaxOrt20A==" }, + "OptimizedPriorityQueue": { + "type": "Transitive", + "resolved": "5.1.0", + "contentHash": "0AnX6kAs0GiLrSqrHKe5mpQhJSHTmXeqLNucV2CxGATP/I9UzLMc1N4MtS/z8qB6mcgX61OjHdY3YbOmETGs+A==" + }, "System.Buffers": { "type": "Transitive", "resolved": "4.5.1", diff --git a/src/arc-validate/packages.lock.json b/src/arc-validate/packages.lock.json index e6de3aa..8519f87 100644 --- a/src/arc-validate/packages.lock.json +++ b/src/arc-validate/packages.lock.json @@ -349,10 +349,11 @@ }, "FSharpAux": { "type": "Transitive", - "resolved": "1.1.0", - "contentHash": "lKxo49OYLoI27GVLXJ2sOBGazfUVZ9zowLLPUn6mHYrepaMgEdxxeZpLcQwFIfzicSoi3i09IH114GSIXy/Dgg==", + "resolved": "2.0.0", + "contentHash": "2gFFDzIVheYO/glZmzyRaol/P60SWPzH7zM7QFZzfkR+rB3F0IdyVQpo3h9eOdsnOVTIYJhiywOaBAtsW0ZFzA==", "dependencies": { - "FSharp.Core": "6.0.1" + "FSharp.Core": "6.0.7", + "FSharpAux.Core": "2.0.0" } }, "FSharpAux.Core": { @@ -417,13 +418,15 @@ }, "Graphoscope": { "type": "Transitive", - "resolved": "0.2.0", - "contentHash": "20mAUZMNyYm2BI2FqPSmcR3Ik/0rYNsRPJExBx6xa24Z6dV/Ow93KKFDaIMPeNRQiqWWXTvAV0MHN1TVPkLUpA==", + "resolved": "0.6.0-preview.1", + "contentHash": "GBXYdS3y8Yg/0a43/r9JzcFiPs/CbdQF3Ikl20ng/gzxBeKeZv08+m9kEwUIV7MTGuNGQWV1ZS6qoRGt35O6YQ==", "dependencies": { "FSharp.Core": "6.0.7", "FSharp.Data": "6.2.0", + "FSharpAux": "2.0.0", "FSharpAux.Core": "2.0.0", - "FSharpx.Collections": "3.1.0" + "FSharpx.Collections": "3.1.0", + "OptimizedPriorityQueue": "5.1.0" } }, "ISADotNet": { @@ -604,6 +607,11 @@ "resolved": "6.0.0", "contentHash": "XA59hPSbKdExu28TbONlD/BwOkKzjo4u2mkEJIP4wnDz9P65FdM34cVA+2/jztKrKyFmX8jpQW++xOp99mufjw==" }, + "OptimizedPriorityQueue": { + "type": "Transitive", + "resolved": "5.1.0", + "contentHash": "0AnX6kAs0GiLrSqrHKe5mpQhJSHTmXeqLNucV2CxGATP/I9UzLMc1N4MtS/z8qB6mcgX61OjHdY3YbOmETGs+A==" + }, "Semver": { "type": "Transitive", "resolved": "2.3.0", @@ -917,11 +925,11 @@ "Cytoscape.NET": "[0.2.0, 0.2.0]", "Expecto": "[9.0.4, 9.0.4]", "FSharp.Core": "[6.0.7, )", - "FSharpAux": "[1.1.0, 1.1.0]", + "FSharpAux": "[2.0.0, 2.0.0]", "FsOboParser": "[0.3.0, 0.3.0]", "FsSpreadsheet": "[4.1.0, 4.1.0]", "FsSpreadsheet.ExcelIO": "[4.1.0, 4.1.0]", - "Graphoscope": "[0.2.0, 0.2.0]" + "Graphoscope": "[0.6.0-preview.1, 0.6.0-preview.1]" } }, "arcvalidationpackages": {