From 55fa274c33d076b3ad8ba39eaa02f7c9b06291b5 Mon Sep 17 00:00:00 2001 From: Haider Iqbal Date: Thu, 17 Oct 2024 14:03:36 +0100 Subject: [PATCH] Update curie formation logic (#750) * - Update curie formation logic * - Improve code logic * - Improve code logic to handle edge cases --- .../ebi/rdf2json/annotators/ShortFormAnnotator.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java index 1b2d5b1b0..bdb9b8809 100644 --- a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java @@ -36,16 +36,21 @@ public static void annotateShortForms(OntologyGraph graph) { } String shortForm = extractShortForm(graph, ontologyBaseUris, preferredPrefix, c.uri); - String curie = shortForm.replaceFirst("_", ":"); + /* + CURIEs are formed by following rules: + If there is only one underscore "_" AND the characters after the underscore are numbers then replace the underscore with colon ":" + If there is only one underscore "_" and the characters after the underscore are not just numbers then just keep the curie same as shortform + If there are multiple underscore but has only digits after the last underscore then the code replaces the last underscore with a colon + */ + + String curie = shortForm.replaceFirst("_(\\d+)$", ":$1"); c.properties.addProperty("shortForm", PropertyValueLiteral.fromString(shortForm)); c.properties.addProperty("curie", PropertyValueLiteral.fromString(curie)); } } long endTime3 = System.nanoTime(); logger.info("annotate short forms: {}", ((endTime3 - startTime3) / 1000 / 1000 / 1000)); - - } private static String extractShortForm(OntologyGraph graph, Set ontologyBaseUris, String preferredPrefix,