only show images as content creation source when vision model is conf…

…igured
ist-dresden · Jan 17, 2024 · da7a2c5 · da7a2c5
1 parent 8cfed57
commit da7a2c5
Show file tree

Hide file tree

Showing 8 changed files with 96 additions and 9 deletions.
diff --git a/aem/core/src/main/java/com/composum/ai/aem/core/impl/ContentCreationSelectorsServlet.java b/aem/core/src/main/java/com/composum/ai/aem/core/impl/ContentCreationSelectorsServlet.java
@@ -28,6 +28,7 @@
 import com.adobe.granite.ui.components.ds.DataSource;
 import com.adobe.granite.ui.components.ds.SimpleDataSource;
 import com.adobe.granite.ui.components.ds.ValueMapResource;
+import com.composum.ai.backend.base.service.chat.GPTChatCompletionService;
 import com.composum.ai.backend.slingbase.ApproximateMarkdownService;
 import com.google.gson.Gson;
 
@@ -51,6 +52,9 @@ public class ContentCreationSelectorsServlet extends SlingSafeMethodsServlet {
     @Reference
     private ApproximateMarkdownService approximateMarkdownService;
 
+    @Reference
+    private GPTChatCompletionService chatCompletionService;
+
     @Override
     protected void doGet(@Nonnull SlingHttpServletRequest request, @Nonnull SlingHttpServletResponse response) throws ServletException, IOException {
         Map<String, String> contentSelectors = readPredefinedContentSelectors(request);
@@ -73,7 +77,9 @@ protected void addContentPaths(Resource resource, Map<String, String> contentSel
         }
         List<ApproximateMarkdownService.Link> componentLinks = approximateMarkdownService.getComponentLinks(resource);
         for (ApproximateMarkdownService.Link link : componentLinks) {
-            contentSelectors.put(link.getPath(), link.getTitle() + " (" + link.getPath() + ")");
+            if (!link.isNeedsVision() || chatCompletionService.isVisionEnabled()) {
+                contentSelectors.put(link.getPath(), link.getTitle() + " (" + link.getPath() + ")");
+            }
         }
     }
 

diff --git a/...ase/src/main/java/com/composum/ai/backend/base/service/chat/GPTChatCompletionService.java b/...ase/src/main/java/com/composum/ai/backend/base/service/chat/GPTChatCompletionService.java
@@ -76,4 +76,9 @@ public interface GPTChatCompletionService {
      * (That is currently whether there is an api key either globally or in the gptConfig).
      */
     boolean isEnabled(GPTConfiguration gptConfig);
+
+    /**
+     * Returns true if vision is enabled.
+     */
+    boolean isVisionEnabled();
 }
diff --git a/...ain/java/com/composum/ai/backend/base/service/chat/impl/GPTChatCompletionServiceImpl.java b/...ain/java/com/composum/ai/backend/base/service/chat/impl/GPTChatCompletionServiceImpl.java
@@ -541,6 +541,11 @@ public boolean isEnabled(GPTConfiguration gptConfig) {
         );
     }
 
+    @Override
+    public boolean isVisionEnabled() {
+        return imageModel != null && !imageModel.trim().isEmpty();
+    }
+
     @Nonnull
     @Override
     public GPTChatMessagesTemplate getTemplate(@Nonnull String templateName) throws GPTException {

diff --git a/...slingbase/src/main/java/com/composum/ai/backend/slingbase/ApproximateMarkdownService.java b/...slingbase/src/main/java/com/composum/ai/backend/slingbase/ApproximateMarkdownService.java
@@ -78,10 +78,12 @@ void approximateMarkdown(@Nullable Resource resource, @Nonnull PrintWriter out,
     class Link {
         private final String path;
         private final String title;
+        private final boolean needsVision;
 
-        public Link(String path, String title) {
+        public Link(String path, String title, boolean needsVision) {
             this.path = path;
             this.title = title;
+            this.needsVision = needsVision;
         }
 
         public String getPath() {
@@ -92,24 +94,30 @@ public String getTitle() {
             return title;
         }
 
+        public boolean isNeedsVision() {
+            return needsVision;
+        }
+
         @Override
         public boolean equals(Object object) {
             if (this == object) return true;
             if (!(object instanceof Link)) return false;
             Link link = (Link) object;
-            return Objects.equals(getPath(), link.getPath()) && Objects.equals(getTitle(), link.getTitle());
+            return Objects.equals(getPath(), link.getPath()) && Objects.equals(getTitle(), link.getTitle())
+                    && needsVision == link.needsVision;
         }
 
         @Override
         public int hashCode() {
-            return Objects.hash(getPath(), getTitle());
+            return Objects.hash(getPath(), getTitle(), needsVision);
         }
 
         @Override
         public String toString() {
             return "Link{" +
                     "path='" + path + '\'' +
                     ", title='" + title + '\'' +
+                    ", needsVision=" + needsVision +
                     '}';
         }
     }

diff --git a/.../src/main/java/com/composum/ai/backend/slingbase/impl/ApproximateMarkdownServiceImpl.java b/.../src/main/java/com/composum/ai/backend/slingbase/impl/ApproximateMarkdownServiceImpl.java
@@ -73,6 +73,10 @@ public class ApproximateMarkdownServiceImpl implements ApproximateMarkdownServic
      */
     protected final static Pattern IGNORED_NODE_NAMES = Pattern.compile("i18n|renditions|rep:.*|dam:.*|cq:.*");
 
+    protected final static Pattern IMAGE_PATTERN = Pattern.compile("\\.(png|jpg|jpeg|gif|svg)(/|$)", Pattern.CASE_INSENSITIVE);
+
+    protected final static Pattern VIDEO_PATTERN = Pattern.compile("\\.(mp4|mov)(/|$)", Pattern.CASE_INSENSITIVE);
+
     /**
      * A list of attributes that are output (in that ordering) without any label, each on a line for itself.
      */
@@ -377,9 +381,12 @@ protected void collectLinks(@NotNull Resource resource, List<Link> resourceLinks
                                 title = targetResource.getParent().getName();
                             }
                         }
-                        Link link = new Link(path, title);
-                        if (!resourceLinks.contains(link)) {
-                            resourceLinks.add(link);
+                        boolean needsVision = isNeedsVision(targetResource);
+                        if (!VIDEO_PATTERN.matcher(targetResource.getPath()).find()) {
+                            Link link = new Link(path, title, needsVision);
+                            if (!resourceLinks.contains(link)) {
+                                resourceLinks.add(link);
+                            }
                         }
                     }
                 });
@@ -388,6 +395,16 @@ protected void collectLinks(@NotNull Resource resource, List<Link> resourceLinks
         });
     }
 
+    private static boolean isNeedsVision(Resource targetResource) {
+        if (IMAGE_PATTERN.matcher(targetResource.getPath()).find()) {
+            return true;
+        }
+        if (targetResource.getValueMap().get("jcr:content/jcr:mimeType", String.class) != null) {
+            return true;
+        }
+        return false;
+    }
+
     @Override
     public String getImageUrl(Resource imageResource) {
         if (imageResource == null) {

diff --git a/composum/bundle/src/main/java/com/composum/ai/composum/bundle/model/CreateDialogModel.java b/composum/bundle/src/main/java/com/composum/ai/composum/bundle/model/CreateDialogModel.java
@@ -12,6 +12,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.composum.ai.backend.base.service.chat.GPTChatCompletionService;
 import com.composum.ai.backend.slingbase.ApproximateMarkdownService;
 import com.composum.pages.commons.model.AbstractModel;
 import com.google.gson.Gson;
@@ -25,6 +26,8 @@ public class CreateDialogModel extends AbstractModel {
 
     protected transient ApproximateMarkdownService approximateMarkdownService;
 
+    protected transient GPTChatCompletionService chatCompletionService;
+
     public Map<String, String> getPredefinedPrompts() {
         return readJsonFile("create/predefinedprompts.json");
     }
@@ -34,7 +37,9 @@ public Map<String, String> getContentSelectors() {
         results.putAll(readJsonFile("create/contentselectors.json"));
         List<ApproximateMarkdownService.Link> componentLinks = getApproximateMarkdownService().getComponentLinks(getResource());
         for (ApproximateMarkdownService.Link link : componentLinks) {
-            results.put(link.getPath(), link.getTitle() + " (" + link.getPath() + ")");
+            if (!link.isNeedsVision() || getChatCompletionService().isVisionEnabled()) {
+                results.put(link.getPath(), link.getTitle() + " (" + link.getPath() + ")");
+            }
         }
         return results;
     }
@@ -46,6 +51,13 @@ protected ApproximateMarkdownService getApproximateMarkdownService() {
         return approximateMarkdownService;
     }
 
+    protected GPTChatCompletionService getChatCompletionService() {
+        if (chatCompletionService == null) {
+            chatCompletionService = requireNonNull(context.getService(GPTChatCompletionService.class));
+        }
+        return chatCompletionService;
+    }
+
     public Map<String, String> getTextLengths() {
         return readJsonFile("create/textlengths.json");
     }

diff --git a/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/js/create.js b/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/js/create.js
@@ -372,7 +372,7 @@
             generateButtonClicked: function (event) {
                 event.preventDefault();
                 this.setLoading(true);
-                this.$response[0].scrollIntoView();
+                this.$el.find('.stop-button')[0].scrollIntoView();
 
                 const that = this;
 

diff --git a/featurespecs/7Imagerecognition.md b/featurespecs/7Imagerecognition.md
@@ -21,3 +21,37 @@ java ImageIO
           ]
         }
     ]
+
+## Implementation remarks
+
+We extend the content creation dialog with vision features: it is possible to select an image as source and use
+the model gpt4-vision-preview to process it. (That is currently in beta and has some limitations.)
+
+Vision has to be optional, since it needs the a bit more pricey gpt4 models. If it is not switched on, the image
+selection in source models needs to be switched off, and the "describe image" prompt should not be there.
+
+In GPTChatCompletionServiceImpl the default model is configured, and we also need to configure the vision model there.
+If that is not present, vision has to be off.
+
+In the case of Composum, the selectors are read in the class
+com.composum.ai.composum.bundle.model.CreateDialogModel with the method getContentSelectors and the prompts with
+getPredefinedPrompts.
+
+In the case of AEM the content selectors are read from datasource composum-ai/servlets/contentcreationselectors
+(ContentCreationSelectorsServlet)
+and the predefined prompts are read from datasource /conf/composum-ai/settings/dialogs/contentcreation/predefinedprompts
+
+Since there is currently only one vision related prompt, it's difficult to filter it out and the predefined prompts
+need reworking for language dependence, anyway, we do not filter out that prompt.
+
+The simplest way to implement this is to provide isVisionEnabled inthe GPTChatCompletionService , being true if a model
+is set.
+
+The GPTChatMessage was extended with imageUrl as additional attribute to provide for images.
+
+## Test resources
+
+Composum: teasers or http://localhost:9090/bin/pages.html/content/ist/composum/home/blog/nodes/restrictions
+
+AEM: teasers and experience fragments when images are present.
+http://localhost:4502/editor.html/content/experience-fragments/wknd/us/en/adventures/adventures-2021/master.html