Skip to content

Commit

Permalink
only show images as content creation source when vision model is conf…
Browse files Browse the repository at this point in the history
…igured
  • Loading branch information
stoerr committed Jan 17, 2024
1 parent 8cfed57 commit da7a2c5
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.adobe.granite.ui.components.ds.DataSource;
import com.adobe.granite.ui.components.ds.SimpleDataSource;
import com.adobe.granite.ui.components.ds.ValueMapResource;
import com.composum.ai.backend.base.service.chat.GPTChatCompletionService;
import com.composum.ai.backend.slingbase.ApproximateMarkdownService;
import com.google.gson.Gson;

Expand All @@ -51,6 +52,9 @@ public class ContentCreationSelectorsServlet extends SlingSafeMethodsServlet {
@Reference
private ApproximateMarkdownService approximateMarkdownService;

@Reference
private GPTChatCompletionService chatCompletionService;

@Override
protected void doGet(@Nonnull SlingHttpServletRequest request, @Nonnull SlingHttpServletResponse response) throws ServletException, IOException {
Map<String, String> contentSelectors = readPredefinedContentSelectors(request);
Expand All @@ -73,7 +77,9 @@ protected void addContentPaths(Resource resource, Map<String, String> contentSel
}
List<ApproximateMarkdownService.Link> componentLinks = approximateMarkdownService.getComponentLinks(resource);
for (ApproximateMarkdownService.Link link : componentLinks) {
contentSelectors.put(link.getPath(), link.getTitle() + " (" + link.getPath() + ")");
if (!link.isNeedsVision() || chatCompletionService.isVisionEnabled()) {
contentSelectors.put(link.getPath(), link.getTitle() + " (" + link.getPath() + ")");
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,9 @@ public interface GPTChatCompletionService {
* (That is currently whether there is an api key either globally or in the gptConfig).
*/
boolean isEnabled(GPTConfiguration gptConfig);

/**
* Returns true if vision is enabled.
*/
boolean isVisionEnabled();
}
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,11 @@ public boolean isEnabled(GPTConfiguration gptConfig) {
);
}

@Override
public boolean isVisionEnabled() {
return imageModel != null && !imageModel.trim().isEmpty();
}

@Nonnull
@Override
public GPTChatMessagesTemplate getTemplate(@Nonnull String templateName) throws GPTException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,12 @@ void approximateMarkdown(@Nullable Resource resource, @Nonnull PrintWriter out,
class Link {
private final String path;
private final String title;
private final boolean needsVision;

public Link(String path, String title) {
public Link(String path, String title, boolean needsVision) {
this.path = path;
this.title = title;
this.needsVision = needsVision;
}

public String getPath() {
Expand All @@ -92,24 +94,30 @@ public String getTitle() {
return title;
}

public boolean isNeedsVision() {
return needsVision;
}

@Override
public boolean equals(Object object) {
if (this == object) return true;
if (!(object instanceof Link)) return false;
Link link = (Link) object;
return Objects.equals(getPath(), link.getPath()) && Objects.equals(getTitle(), link.getTitle());
return Objects.equals(getPath(), link.getPath()) && Objects.equals(getTitle(), link.getTitle())
&& needsVision == link.needsVision;
}

@Override
public int hashCode() {
return Objects.hash(getPath(), getTitle());
return Objects.hash(getPath(), getTitle(), needsVision);
}

@Override
public String toString() {
return "Link{" +
"path='" + path + '\'' +
", title='" + title + '\'' +
", needsVision=" + needsVision +
'}';
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ public class ApproximateMarkdownServiceImpl implements ApproximateMarkdownServic
*/
protected final static Pattern IGNORED_NODE_NAMES = Pattern.compile("i18n|renditions|rep:.*|dam:.*|cq:.*");

protected final static Pattern IMAGE_PATTERN = Pattern.compile("\\.(png|jpg|jpeg|gif|svg)(/|$)", Pattern.CASE_INSENSITIVE);

protected final static Pattern VIDEO_PATTERN = Pattern.compile("\\.(mp4|mov)(/|$)", Pattern.CASE_INSENSITIVE);

/**
* A list of attributes that are output (in that ordering) without any label, each on a line for itself.
*/
Expand Down Expand Up @@ -377,9 +381,12 @@ protected void collectLinks(@NotNull Resource resource, List<Link> resourceLinks
title = targetResource.getParent().getName();
}
}
Link link = new Link(path, title);
if (!resourceLinks.contains(link)) {
resourceLinks.add(link);
boolean needsVision = isNeedsVision(targetResource);
if (!VIDEO_PATTERN.matcher(targetResource.getPath()).find()) {
Link link = new Link(path, title, needsVision);
if (!resourceLinks.contains(link)) {
resourceLinks.add(link);
}
}
}
});
Expand All @@ -388,6 +395,16 @@ protected void collectLinks(@NotNull Resource resource, List<Link> resourceLinks
});
}

private static boolean isNeedsVision(Resource targetResource) {
if (IMAGE_PATTERN.matcher(targetResource.getPath()).find()) {
return true;
}
if (targetResource.getValueMap().get("jcr:content/jcr:mimeType", String.class) != null) {
return true;
}
return false;
}

@Override
public String getImageUrl(Resource imageResource) {
if (imageResource == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.composum.ai.backend.base.service.chat.GPTChatCompletionService;
import com.composum.ai.backend.slingbase.ApproximateMarkdownService;
import com.composum.pages.commons.model.AbstractModel;
import com.google.gson.Gson;
Expand All @@ -25,6 +26,8 @@ public class CreateDialogModel extends AbstractModel {

protected transient ApproximateMarkdownService approximateMarkdownService;

protected transient GPTChatCompletionService chatCompletionService;

public Map<String, String> getPredefinedPrompts() {
return readJsonFile("create/predefinedprompts.json");
}
Expand All @@ -34,7 +37,9 @@ public Map<String, String> getContentSelectors() {
results.putAll(readJsonFile("create/contentselectors.json"));
List<ApproximateMarkdownService.Link> componentLinks = getApproximateMarkdownService().getComponentLinks(getResource());
for (ApproximateMarkdownService.Link link : componentLinks) {
results.put(link.getPath(), link.getTitle() + " (" + link.getPath() + ")");
if (!link.isNeedsVision() || getChatCompletionService().isVisionEnabled()) {
results.put(link.getPath(), link.getTitle() + " (" + link.getPath() + ")");
}
}
return results;
}
Expand All @@ -46,6 +51,13 @@ protected ApproximateMarkdownService getApproximateMarkdownService() {
return approximateMarkdownService;
}

protected GPTChatCompletionService getChatCompletionService() {
if (chatCompletionService == null) {
chatCompletionService = requireNonNull(context.getService(GPTChatCompletionService.class));
}
return chatCompletionService;
}

public Map<String, String> getTextLengths() {
return readJsonFile("create/textlengths.json");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@
generateButtonClicked: function (event) {
event.preventDefault();
this.setLoading(true);
this.$response[0].scrollIntoView();
this.$el.find('.stop-button')[0].scrollIntoView();

const that = this;

Expand Down
34 changes: 34 additions & 0 deletions featurespecs/7Imagerecognition.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,37 @@ java ImageIO
]
}
]

## Implementation remarks

We extend the content creation dialog with vision features: it is possible to select an image as source and use
the model gpt4-vision-preview to process it. (That is currently in beta and has some limitations.)

Vision has to be optional, since it needs the a bit more pricey gpt4 models. If it is not switched on, the image
selection in source models needs to be switched off, and the "describe image" prompt should not be there.

In GPTChatCompletionServiceImpl the default model is configured, and we also need to configure the vision model there.
If that is not present, vision has to be off.

In the case of Composum, the selectors are read in the class
com.composum.ai.composum.bundle.model.CreateDialogModel with the method getContentSelectors and the prompts with
getPredefinedPrompts.

In the case of AEM the content selectors are read from datasource composum-ai/servlets/contentcreationselectors
(ContentCreationSelectorsServlet)
and the predefined prompts are read from datasource /conf/composum-ai/settings/dialogs/contentcreation/predefinedprompts

Since there is currently only one vision related prompt, it's difficult to filter it out and the predefined prompts
need reworking for language dependence, anyway, we do not filter out that prompt.

The simplest way to implement this is to provide isVisionEnabled inthe GPTChatCompletionService , being true if a model
is set.

The GPTChatMessage was extended with imageUrl as additional attribute to provide for images.

## Test resources

Composum: teasers or http://localhost:9090/bin/pages.html/content/ist/composum/home/blog/nodes/restrictions

AEM: teasers and experience fragments when images are present.
http://localhost:4502/editor.html/content/experience-fragments/wknd/us/en/adventures/adventures-2021/master.html

0 comments on commit da7a2c5

Please sign in to comment.