diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..23a3bbe5c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "maven" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "monthly" diff --git a/.gitignore b/.gitignore index 1c9505005..615538685 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,4 @@ target .cgptdevbench/llmsearch.db .linklint .lycheecache +build.log diff --git a/TODO.txt b/TODO.txt index 89ffcb56a..9134a3f80 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,5 +1,7 @@ # List of minor Todos +Image description from URL? + AEM for content fragments? URL as base text , inner links Append button for AEM @@ -31,3 +33,14 @@ ignore: align, fileReference, target, style, element ## Check out Adobe Sensei GenAI https://business.adobe.com/summit/2023/sessions/opening-keynote-gs1.html at 1:20:00 or something + +## Images + +https://github.com/TheoKanning/openai-java/issues/397 +Alternative: https://github.com/namankhurpia/Easy-open-ai +https://mvnrepository.com/artifact/io.github.namankhurpia/easyopenai -> many dependencies :-( + +## + +DTB Chat Completion Gen +https://chat.openai.com/share/c095d1db-4e72-4abe-8794-c1fe9e01fbf7 diff --git a/aem/core/src/main/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePlugin.java b/aem/core/src/main/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePlugin.java index 9eb6ba8ad..b69e744e1 100644 --- a/aem/core/src/main/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePlugin.java +++ b/aem/core/src/main/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePlugin.java @@ -4,8 +4,14 @@ import static com.day.cq.commons.jcr.JcrConstants.JCR_DESCRIPTION; import static com.day.cq.commons.jcr.JcrConstants.JCR_TITLE; +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; import java.io.PrintWriter; import java.util.ArrayList; +import java.util.Base64; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -14,7 +20,9 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import javax.imageio.ImageIO; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.sling.api.SlingHttpServletRequest; import org.apache.sling.api.SlingHttpServletResponse; @@ -57,6 +65,9 @@ public class AemApproximateMarkdownServicePlugin implements ApproximateMarkdownS @Nonnull Resource resource, @Nonnull PrintWriter out, @Nonnull ApproximateMarkdownService service, @Nonnull SlingHttpServletRequest request, @Nonnull SlingHttpServletResponse response) { + if (renderDamAssets(resource, out, response)) { + return PluginResult.HANDLED_ALL; + } if (resourceRendersAsComponentMatching(resource, FULLY_IGNORED_TYPES)) { return PluginResult.HANDLED_ALL; } @@ -305,4 +316,80 @@ protected List listModelResources(List list, Resource traver return list; } + /** + * If the resource is a dam:Asset or a dam:AssetContent jcr:content then we return an image link + */ + protected boolean renderDamAssets(Resource resource, PrintWriter out, SlingHttpServletResponse response) { + Resource assetNode = resource; + if (resource.isResourceType("dam:AssetContent")) { + assetNode = resource.getParent(); + } + if (assetNode.isResourceType("dam:Asset")) { + String mimeType = assetNode.getValueMap().get("jcr:content/metadata/dc:format", String.class); + if (StringUtils.startsWith(mimeType, "image/")) { + String name = StringUtils.defaultString(assetNode.getValueMap().get("jcr:content/jcr:title", String.class), assetNode.getName()); + out.println("![" + name + "](" + assetNode.getPath()); + try { + response.addHeader(ApproximateMarkdownService.HEADER_IMAGEPATH, resource.getParent().getPath()); + } catch (RuntimeException e) { + LOG.warn("Unable to set header " + ApproximateMarkdownService.HEADER_IMAGEPATH + " to " + resource.getParent().getPath(), e); + } + return true; + } + } + return false; + } + + /** + * Retrieves the imageURL in a way useable for ChatGPT - usually data:image/jpeg;base64,{base64_image} + */ + @Nullable + @Override + public String getImageUrl(@Nullable Resource imageResource) { + Resource assetNode = imageResource; + if (imageResource.isResourceType("dam:AssetContent")) { + assetNode = imageResource.getParent(); + } + if (assetNode.isResourceType("dam:Asset")) { + String mimeType = assetNode.getValueMap().get("jcr:content/metadata/dc:format", String.class); + Resource originalRendition = assetNode.getChild("jcr:content/renditions/original/jcr:content"); + if (StringUtils.startsWith(mimeType, "image/") && originalRendition != null) { + try (InputStream is = originalRendition.adaptTo(InputStream.class)) { + if (is == null) { + LOG.warn("Unable to get InputStream from image resource {}", assetNode.getPath()); + return null; + } + byte[] data = IOUtils.toByteArray(is); + data = resizeToMaxSize(data, mimeType, 512); + return "data:" + mimeType + ";base64," + new String(Base64.getEncoder().encode(data)); + } catch (IOException e) { + LOG.warn("Unable to get InputStream from image resource {}", assetNode.getPath(), e); + } + } + } + return null; + } + + /** + * We resize the image to a maximum width and height of maxSize, keeping the aspect ratio. If it's smaller, it's + * returned as is. It could be of types image/jpeg, image/png or image/gif . + */ + protected byte[] resizeToMaxSize(@Nonnull byte[] imageData, String mimeType, int maxSize) throws IOException { + ByteArrayInputStream inputStream = new ByteArrayInputStream(imageData); + BufferedImage originalImage = ImageIO.read(inputStream); + int width = originalImage.getWidth(); + int height = originalImage.getHeight(); + if (width <= maxSize && height <= maxSize) { + return imageData; + } + double factor = maxSize * 1.0 / (Math.max(width, height) + 1); + int newWidth = (int) (width * factor); + int newHeight = (int) (height * factor); + BufferedImage resizedImage = new BufferedImage(newWidth, newHeight, originalImage.getType()); + resizedImage.createGraphics().drawImage(originalImage, 0, 0, newWidth, newHeight, null); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + ImageIO.write(resizedImage, mimeType.substring("image/".length()), outputStream); + return outputStream.toByteArray(); + } + } diff --git a/aem/core/src/main/java/com/composum/ai/aem/core/impl/ContentCreationSelectorsServlet.java b/aem/core/src/main/java/com/composum/ai/aem/core/impl/ContentCreationSelectorsServlet.java new file mode 100644 index 000000000..3d4da94d3 --- /dev/null +++ b/aem/core/src/main/java/com/composum/ai/aem/core/impl/ContentCreationSelectorsServlet.java @@ -0,0 +1,104 @@ +package com.composum.ai.aem.core.impl; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import javax.annotation.Nonnull; +import javax.servlet.Servlet; +import javax.servlet.ServletException; + +import org.apache.sling.api.SlingHttpServletRequest; +import org.apache.sling.api.SlingHttpServletResponse; +import org.apache.sling.api.resource.Resource; +import org.apache.sling.api.resource.ResourceMetadata; +import org.apache.sling.api.resource.ValueMap; +import org.apache.sling.api.servlets.SlingSafeMethodsServlet; +import org.apache.sling.api.wrappers.ValueMapDecorator; +import org.osgi.framework.Constants; +import org.osgi.service.component.annotations.Component; +import org.osgi.service.component.annotations.Reference; + +import com.adobe.granite.ui.components.ds.DataSource; +import com.adobe.granite.ui.components.ds.SimpleDataSource; +import com.adobe.granite.ui.components.ds.ValueMapResource; +import com.composum.ai.backend.slingbase.ApproximateMarkdownService; +import com.google.gson.Gson; + +/** + * Servlet that reads the content selectors from a JSON file, adds links in the content and provides that to the dialog. + */ +@Component(service = Servlet.class, + property = { + Constants.SERVICE_DESCRIPTION + "=Composum Pages Content Creation Selectors Servlet", + "sling.servlet.resourceTypes=composum-ai/servlets/contentcreationselectors", + }) +public class ContentCreationSelectorsServlet extends SlingSafeMethodsServlet { + + private final Gson gson = new Gson(); + + /** + * JCR path to a JSON with the basic content selectors supported by the dialog. + */ + public static final String PATH_CONTENTSELECTORS = "/conf/composum-ai/settings/dialogs/contentcreation/contentselectors.json"; + + @Reference + private ApproximateMarkdownService approximateMarkdownService; + + @Override + protected void doGet(@Nonnull SlingHttpServletRequest request, @Nonnull SlingHttpServletResponse response) throws ServletException, IOException { + Map contentSelectors = readPredefinedContentSelectors(request); + String path = request.getParameter("path"); + Resource resource = request.getResourceResolver().getResource(path); + if (resource != null) { + addContentPaths(resource, contentSelectors); + } + DataSource dataSource = transformToDatasource(request, contentSelectors); + request.setAttribute(DataSource.class.getName(), dataSource); + } + + /** + * We look for content paths in the component and it's parent. That seems more appropriate than the component itself + * in AEM - often interesting links are contained one level up, e.g. for text fields in teasers. + */ + protected void addContentPaths(Resource resource, Map contentSelectors) { + if (resource.getPath().contains("/jcr:content/")) { + resource = resource.getParent(); + } + List componentLinks = approximateMarkdownService.getComponentLinks(resource); + for (ApproximateMarkdownService.Link link : componentLinks) { + contentSelectors.put(link.getPath(), link.getTitle() + " (" + link.getPath() + ")"); + } + } + + protected Map readPredefinedContentSelectors(SlingHttpServletRequest request) throws IOException { + Resource resource = request.getResourceResolver().getResource(PATH_CONTENTSELECTORS); + Map contentSelectors; + try (InputStream in = resource.adaptTo(InputStream.class); + Reader reader = new InputStreamReader(in, StandardCharsets.UTF_8)) { + contentSelectors = gson.fromJson(reader, Map.class); + } + return contentSelectors; + } + + protected static DataSource transformToDatasource(SlingHttpServletRequest request, Map contentSelectors) { + List resourceList = contentSelectors.entrySet().stream() + .map(entry -> { + Map values = new HashMap<>(); + values.put("value", entry.getKey()); + values.put("text", entry.getValue()); + ValueMap valueMap = new ValueMapDecorator(values); + return new ValueMapResource(request.getResourceResolver(), new ResourceMetadata(), "nt:unstructured", valueMap); + }) + .collect(Collectors.toList()); + DataSource dataSource = new SimpleDataSource(resourceList.iterator()); + return dataSource; + } + +} diff --git a/aem/pom.xml b/aem/pom.xml index cbbdb2600..578981f5c 100644 --- a/aem/pom.xml +++ b/aem/pom.xml @@ -269,7 +269,7 @@ Bundle-DocURL: org.apache.maven.plugins maven-compiler-plugin - 3.8.1 + 3.12.1 ${source.encoding} ${java.source} diff --git a/aem/ui.apps/src/main/content/jcr_root/apps/composum-ai/components/contentcreation/_cq_dialog/.content.xml b/aem/ui.apps/src/main/content/jcr_root/apps/composum-ai/components/contentcreation/_cq_dialog/.content.xml index 6782895df..80fd3d4ed 100644 --- a/aem/ui.apps/src/main/content/jcr_root/apps/composum-ai/components/contentcreation/_cq_dialog/.content.xml +++ b/aem/ui.apps/src/main/content/jcr_root/apps/composum-ai/components/contentcreation/_cq_dialog/.content.xml @@ -69,8 +69,8 @@ granite:class="composum-ai-content-selector"> + sling:resourceType="composum-ai/servlets/contentcreationselectors" + additionalAttribute="17"/> + name="./sourcePlaintext" + granite:class="composum-ai-source-plaintext composum-ai-source-container"> + + + + + + @@ -241,6 +257,7 @@ diff --git a/aem/ui.content/src/main/content/META-INF/vault/filter.xml b/aem/ui.content/src/main/content/META-INF/vault/filter.xml index c83543ba6..09c5dafd5 100644 --- a/aem/ui.content/src/main/content/META-INF/vault/filter.xml +++ b/aem/ui.content/src/main/content/META-INF/vault/filter.xml @@ -3,5 +3,5 @@ - + diff --git a/aem/ui.content/src/main/content/jcr_root/conf/composum-ai/settings/dialogs/contentcreation/contentselectors.json b/aem/ui.content/src/main/content/jcr_root/conf/composum-ai/settings/dialogs/contentcreation/contentselectors.json new file mode 100644 index 000000000..d19283a8b --- /dev/null +++ b/aem/ui.content/src/main/content/jcr_root/conf/composum-ai/settings/dialogs/contentcreation/contentselectors.json @@ -0,0 +1,9 @@ +{ + "widget": "The text field you were editing", + "component": "The component you were editing, including subcomponents", + "page": "Current page text", + "lastoutput": "Current suggestion shown in this dialog (for iterative improvement)", + "url": "Text content of an external URL", + "empty": "No additional content", + "-": "Manually entered source content" +} diff --git a/aem/ui.content/src/main/content/jcr_root/conf/composum-ai/settings/dialogs/contentcreation/contentselectors/.content.xml b/aem/ui.content/src/main/content/jcr_root/conf/composum-ai/settings/dialogs/contentcreation/contentselectors/.content.xml deleted file mode 100644 index f72a2925b..000000000 --- a/aem/ui.content/src/main/content/jcr_root/conf/composum-ai/settings/dialogs/contentcreation/contentselectors/.content.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - diff --git a/aem/ui.content/src/main/content/jcr_root/conf/composum-ai/settings/dialogs/contentcreation/predefinedprompts/.content.xml b/aem/ui.content/src/main/content/jcr_root/conf/composum-ai/settings/dialogs/contentcreation/predefinedprompts/.content.xml index a3bb2af24..5f955fa84 100644 --- a/aem/ui.content/src/main/content/jcr_root/conf/composum-ai/settings/dialogs/contentcreation/predefinedprompts/.content.xml +++ b/aem/ui.content/src/main/content/jcr_root/conf/composum-ai/settings/dialogs/contentcreation/predefinedprompts/.content.xml @@ -70,4 +70,7 @@ Do also fix orthographical and grammar errors."> value="Convert the following text from Markdown to HTML: "> + diff --git a/aem/ui.frontend/src/main/webpack/site/AICreate.js b/aem/ui.frontend/src/main/webpack/site/AICreate.js index dc86b75d8..32016c4cf 100644 --- a/aem/ui.frontend/src/main/webpack/site/AICreate.js +++ b/aem/ui.frontend/src/main/webpack/site/AICreate.js @@ -36,7 +36,9 @@ class AICreate { this.runningxhr = undefined; return response.json(); } else { - throw new Error("Unexpected response code " + response.status); + return response.text().then(errMsg => { + throw new Error("Unexpected response code " + response.status + " : " + errMsg); + }); } }) .then(data => { @@ -48,7 +50,7 @@ class AICreate { throw new Error("Bug: No streamid response " + JSON.stringify(data)); } }) - .catch(error => this.processError(error)); + .catch(this.processError.bind(this)); }); } @@ -68,7 +70,8 @@ class AICreate { if (this.debug) console.log("AICreate ajaxError", arguments); debugger; this.runningxhr = undefined; - this.errorCallback(error); + const shortedError = error.toString().substring(0, 400); + this.errorCallback(shortedError); } startEventStream(streamid) { diff --git a/aem/ui.frontend/src/main/webpack/site/ContentCreationDialog.js b/aem/ui.frontend/src/main/webpack/site/ContentCreationDialog.js index 0bf9fd35c..ea8bbe270 100644 --- a/aem/ui.frontend/src/main/webpack/site/ContentCreationDialog.js +++ b/aem/ui.frontend/src/main/webpack/site/ContentCreationDialog.js @@ -1,7 +1,7 @@ /** Implementation for the actions of the Content Creation Dialog - button actions, drop down list actions etc. */ import {AICreate} from './AICreate.js'; -import {errorText, findSingleElement, coralSelectValue} from './common.js'; +import {errorText, findSingleElement} from './common.js'; import {DialogHistory} from './DialogHistory.js'; import {HelpPage} from './HelpPage.js'; @@ -120,12 +120,15 @@ class ContentCreationDialog { this.$stopButton = findSingleElement(this.$dialog, '.composum-ai-stop-button'); this.$urlField = findSingleElement(this.$dialog, '.composum-ai-url-field'); this.$urlContainer = this.$urlField.parent(); + this.$imageContainer = findSingleElement(this.$dialog, '.composum-ai-source-image-container'); + this.$image = findSingleElement(this.$imageContainer, '.composum-ai-source-image'); } getDialogStatus() { return { prompt: this.$prompt.val(), source: this.getSourceContent(), + imagepath: this.$image.data('imagepath'), textLength: this.$textLengthSelector.val(), contentSelector: this.$contentSelector.val(), predefinedPrompts: this.$predefinedPromptsSelector.val(), @@ -138,8 +141,8 @@ class ContentCreationDialog { this.$contentSelector.val(status.contentSelector); this.$textLengthSelector.val(status.textLength); this.$prompt.val(status.prompt); - if (status.source) { - this.setSourceContent(status.source); + if (status.source || status.imagepath) { + this.setSourceContent(status.source, status.imagepath); } else { this.setSourceContent(this.oldContent); } @@ -215,6 +218,7 @@ class ContentCreationDialog { if (this.debug) console.log("onContentSelectorChanged", arguments); const key = this.$contentSelector.val(); this.showUrl(false); + this.$image.removeData('imagepath'); switch (key) { case 'lastoutput': this.setSourceContent(this.getResponse()); @@ -223,13 +227,15 @@ class ContentCreationDialog { this.setSourceContent(this.oldContent); break; case 'component': - this.retrieveValue(this.componentPath, (value) => this.setSourceContent(value)); + this.retrieveValue(this.componentPath, this.setSourceContent.bind(this)); break; case 'page': - this.retrieveValue(this.pagePath(this.componentPath), (value) => this.setSourceContent(value)); + this.retrieveValue(this.pagePath(this.componentPath), this.setSourceContent.bind(this)); break; case 'url': - this.showError(); + this.showError(false); + this.$urlField.val(''); + this.onUrlChanged(); this.showUrl(true); break; case 'empty': @@ -239,7 +245,12 @@ class ContentCreationDialog { this.setSourceContent(''); // waiting for input break; default: - this.showError('Unknown content selector value ' + key); + if (key.startsWith('/content/')) { + this.retrieveValue(key, this.setSourceContent.bind(this)); + } else { + this.showError('Unknown content selector value ' + key); + debugger; + } } } @@ -253,15 +264,17 @@ class ContentCreationDialog { } onUrlChanged(event) { - event.preventDefault(); - event.stopPropagation(); - const url = $(event.target).val(); - if (url) { + if (event && event.preventDefault && event.stopPropagation) { + event.preventDefault(); + event.stopPropagation(); + } + const url = this.$urlField.val(); + if (url && url.trim().length > 0) { console.log('fetching url ', url); $.ajax({ url: Granite.HTTP.externalize(APPROXIMATED_MARKDOWN_SERVLET + (this.isRichtext ? '.html' : '.md') - + '?fromurl=' + url + + '?fromurl=' + url.trim() ), type: "GET", dataType: "text", @@ -284,9 +297,29 @@ class ContentCreationDialog { return rte; } - setSourceContent(value) { - const thevalue = value || ''; - this.isRichtext ? this.$sourceContent.setContent(thevalue) : this.$sourceContent.val(thevalue); + /** Puts the value into the source field. If imagepath is set, we instead make the image visible instead of the source textarea / rte */ + setSourceContent(value, imagepath) { + console.log("setSourceContent", arguments); + const $sourceContainer = this.$dialog.find('.composum-ai-source-container'); + $sourceContainer.removeClass('hidden'); + this.$imageContainer.addClass('hidden'); + if (!imagepath) { + const thevalue = value || ''; + this.isRichtext ? this.$sourceContent.setContent(thevalue) : this.$sourceContent.val(thevalue); + this.$image.removeData('imagepath'); + } else { + const $heightReference = $sourceContainer.find('.coral-Form-field'); + const height = $heightReference.height(); + this.$urlContainer.hide(); + $sourceContainer.addClass('hidden'); + this.$imageContainer.removeClass('hidden'); + // const $image = $imageContainer.find('.composum-ai-source-image'); + // this.$image[0].outerHtml = '
{ - callback(data); + success: (data, status, xhr) => { + callback(data, xhr.getResponseHeader('imagepath')); }, error: (xhr, status, error) => { console.error("error loading approximate markdown", xhr, status, error); @@ -342,9 +375,11 @@ class ContentCreationDialog { onGenerateButtonClicked(event) { if (this.debug) console.log("onGenerateButtonClicked", arguments); this.showError(undefined); + let imagepath = this.$image.data('imagepath'); const data = { prompt: this.$prompt.val(), - source: this.getSourceContent(), + source: imagepath ? '' : this.getSourceContent(), + inputImagePath: imagepath, textLength: this.$textLengthSelector.val(), richText: this.isRichtext, configBasePath: this.pagePath(this.componentPath) @@ -352,7 +387,8 @@ class ContentCreationDialog { if (this.debug) console.log("createContent", data); this.setLoading(true); this.createServlet.createContent(data); - this.$dialog.find('.composum-ai-content-suggestion')[0].scrollIntoView(); + findSingleElement(this.$dialog, '.composum-ai-actionbar')[0].scrollIntoView(); + // this also makes content suggestion and loading indicator visible. } streamingCallback(text) { @@ -403,6 +439,7 @@ class ContentCreationDialog { findSingleElement(this.$dialog, '.composum-ai-alert').text(errorText(error)); findSingleElement(this.$dialog, '.composum-ai-error-columns') .removeClass('hidden').show()[0].scrollIntoView(); + this.$stopButton debugger; } } diff --git a/aem/ui.frontend/src/main/webpack/site/registerdialogs.js b/aem/ui.frontend/src/main/webpack/site/registerdialogs.js index 14403845c..036afd8bc 100644 --- a/aem/ui.frontend/src/main/webpack/site/registerdialogs.js +++ b/aem/ui.frontend/src/main/webpack/site/registerdialogs.js @@ -85,7 +85,7 @@ try { const dialogId = 'composumAI-create-dialog'; // possibly use editable.path to make it unique $.ajax({ - url: CREATE_DIALOG_URL + "?richtext=" + parameters.isRichtext, + url: CREATE_DIALOG_URL + "?richtext=" + parameters.isRichtext + '&path=' + encodeURIComponent(parameters.componentPath), type: "GET", dataType: "html", success: function (data) { diff --git a/aem/ui.frontend/src/main/webpack/site/styles/composum-ai.scss b/aem/ui.frontend/src/main/webpack/site/styles/composum-ai.scss index 001878509..55b9bd770 100644 --- a/aem/ui.frontend/src/main/webpack/site/styles/composum-ai.scss +++ b/aem/ui.frontend/src/main/webpack/site/styles/composum-ai.scss @@ -37,3 +37,9 @@ .composum-ai-help { max-width: 55em; } + +.composum-ai-source-image { + background-size: contain; + background-position: center; + background-repeat: no-repeat; +} diff --git a/backend/base/pom.xml b/backend/base/pom.xml index 69dcc3ac0..8d55a1af6 100644 --- a/backend/base/pom.xml +++ b/backend/base/pom.xml @@ -22,11 +22,6 @@ - - com.theokanning.openai-gpt3-java - api - - com.knuddels @@ -98,7 +93,7 @@ org.jsoup jsoup - 1.15.4 + 1.17.2 @@ -159,7 +154,6 @@ com.composum.ai.backend.base.* - api*;groupId=com.theokanning.openai-gpt3-java, jtokkit*;groupId=com.knuddels, jsoup*, org.eclipse.mylyn.wikitext*, diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/GPTChatMessage.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/GPTChatMessage.java index 265a098ab..6302d1158 100644 --- a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/GPTChatMessage.java +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/GPTChatMessage.java @@ -3,9 +3,10 @@ import java.util.Objects; import javax.annotation.Nonnull; +import javax.annotation.Nullable; /** - * A chat message in a dialog with ChatGPT. + * A chat message in a dialog with ChatGPT. Currently limited to at most a text message and an image. * * @see "https://platform.openai.com/docs/guides/chat" */ @@ -13,10 +14,18 @@ public class GPTChatMessage { private final GPTMessageRole role; private final String content; + private final String imageUrl; public GPTChatMessage(@Nonnull GPTMessageRole role, @Nonnull String content) { this.role = role; this.content = content; + this.imageUrl = null; + } + + public GPTChatMessage(@Nonnull GPTMessageRole role, @Nullable String content, @Nullable String imageUrl) { + this.role = role; + this.content = content; + this.imageUrl = imageUrl; } /** @@ -33,6 +42,13 @@ public String getContent() { return content; } + /** + * The URL with the content of the image to be analyzed. + */ + public String getImageUrl() { + return imageUrl; + } + /** * String representation only for debugging. */ @@ -40,7 +56,8 @@ public String getContent() { public String toString() { return "GPTChatMessage{" + "role=" + role + - ", text='" + content + '\'' + + (content != null ? ", text='" + content + '\'' : "") + + (imageUrl != null ? ", imageUrl='" + imageUrl + '\'' : "") + '}'; } @@ -49,12 +66,13 @@ public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof GPTChatMessage)) return false; GPTChatMessage that = (GPTChatMessage) o; - return getRole() == that.getRole() && Objects.equals(getContent(), that.getContent()); + return getRole() == that.getRole() && Objects.equals(getContent(), that.getContent()) && + Objects.equals(getImageUrl(), that.getImageUrl()); } @Override public int hashCode() { - return Objects.hash(getRole() != null ? getRole().toString() : "", getContent()); + return Objects.hash(getRole() != null ? getRole().toString() : "", getContent(), getImageUrl()); } } diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/GPTChatCompletionServiceImpl.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/GPTChatCompletionServiceImpl.java index 8d1ba0d88..1747ab0d7 100644 --- a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/GPTChatCompletionServiceImpl.java +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/GPTChatCompletionServiceImpl.java @@ -7,6 +7,7 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.CancellationException; @@ -65,19 +66,18 @@ import com.composum.ai.backend.base.service.chat.GPTCompletionCallback; import com.composum.ai.backend.base.service.chat.GPTConfiguration; import com.composum.ai.backend.base.service.chat.GPTFinishReason; -import com.composum.ai.backend.base.service.chat.GPTMessageRole; -import com.fasterxml.jackson.annotation.JsonInclude; +import com.composum.ai.backend.base.service.chat.impl.chatmodel.ChatCompletionChoice; +import com.composum.ai.backend.base.service.chat.impl.chatmodel.ChatCompletionMessage; +import com.composum.ai.backend.base.service.chat.impl.chatmodel.ChatCompletionMessagePart; +import com.composum.ai.backend.base.service.chat.impl.chatmodel.ChatCompletionRequest; +import com.composum.ai.backend.base.service.chat.impl.chatmodel.ChatCompletionResponse; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; import com.knuddels.jtokkit.Encodings; import com.knuddels.jtokkit.api.Encoding; import com.knuddels.jtokkit.api.EncodingRegistry; import com.knuddels.jtokkit.api.EncodingType; -import com.theokanning.openai.completion.chat.ChatCompletionChoice; -import com.theokanning.openai.completion.chat.ChatCompletionChunk; -import com.theokanning.openai.completion.chat.ChatCompletionRequest; -import com.theokanning.openai.completion.chat.ChatMessage; /** * Implements the actual access to the ChatGPT chat API. @@ -107,6 +107,8 @@ public class GPTChatCompletionServiceImpl implements GPTChatCompletionService { public static final String OPENAI_API_KEY_SYSPROP = "openai.api.key"; public static final String DEFAULT_MODEL = "gpt-3.5-turbo"; + public static final String DEFAULT_IMAGE_MODEL = "gpt-4-vision-preview"; + private static final int DEFAULTVALUE_CONNECTIONTIMEOUT = 20; private static final int DEFAULTVALUE_REQUESTTIMEOUT = 60; @@ -121,10 +123,11 @@ public class GPTChatCompletionServiceImpl implements GPTChatCompletionService { */ private String apiKey; private String defaultModel; + private String imageModel; private CloseableHttpAsyncClient httpAsyncClient; - private ObjectMapper mapper; + private static final Gson gson = new GsonBuilder().create(); private final AtomicLong requestCounter = new AtomicLong(System.currentTimeMillis()); @@ -166,6 +169,7 @@ public void activate(GPTChatCompletionServiceConfig config, BundleContext bundle RateLimiter hourLimiter = new RateLimiter(dayLimiter, 100, 1, TimeUnit.HOURS); this.limiter = new RateLimiter(hourLimiter, 20, 1, TimeUnit.MINUTES); this.defaultModel = config != null && config.defaultModel() != null && !config.defaultModel().trim().isEmpty() ? config.defaultModel().trim() : DEFAULT_MODEL; + this.imageModel = config != null && config.imageModel() != null && !config.imageModel().trim().isEmpty() ? config.imageModel().trim() : null; this.apiKey = null; this.requestTimeout = config != null && config.requestTimeout() > 0 ? config.requestTimeout() : DEFAULTVALUE_REQUESTTIMEOUT; this.connectionTimeout = config != null && config.connectionTimeout() > 0 ? config.connectionTimeout() : DEFAULTVALUE_CONNECTIONTIMEOUT; @@ -200,9 +204,6 @@ public void activate(GPTChatCompletionServiceConfig config, BundleContext bundle .setResponseTimeout(this.requestTimeout, TimeUnit.SECONDS).build()) .build(); this.httpAsyncClient.start(); - mapper = new ObjectMapper(); - mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(r -> { Thread thread = Executors.defaultThreadFactory().newThread(r); @@ -226,9 +227,9 @@ public void deactivate() { } this.apiKey = null; this.defaultModel = null; + this.imageModel = null; this.limiter = null; this.gptLimiter = null; - this.mapper = null; this.bundleContext = null; this.templates.clear(); this.temperature = null; @@ -330,7 +331,11 @@ public void streamingChatCompletion(@Nonnull GPTChatRequest request, @Nonnull GP String jsonRequest = createJsonRequest(request); callback.setRequest(jsonRequest); - LOG.debug("Sending streaming request {} to GPT: {}", id, jsonRequest); + if (LOG.isDebugEnabled()) { + // replace data:image/jpeg;base64,{base64_image} with data:image/jpeg;base64, ... + String shortenedRequest = jsonRequest.replaceAll("data:image/[^;]+;base64,[^\\}]+\\}", "data:image/jpeg;base64,{base64_image}"); + LOG.debug("Sending streaming request {} to GPT: {}", id, shortenedRequest); + } SimpleHttpRequest httpRequest = makeRequest(jsonRequest, request.getConfiguration()); performCallAsync(new CompletableFuture<>(), id, httpRequest, callback, 0, 2000); @@ -358,19 +363,19 @@ protected void handleStreamingEvent(GPTCompletionCallback callback, long id, Str LOG.debug("Response {} from GPT received DONE", id); return; } - ChatCompletionChunk chunk = mapper.readerFor(ChatCompletionChunk.class).readValue(line); + ChatCompletionResponse chunk = gson.fromJson(line, ChatCompletionResponse.class); ChatCompletionChoice choice = chunk.getChoices().get(0); - String content = choice.getMessage().getContent(); + String content = choice.getDelta().getContent(); if (content != null && !content.isEmpty()) { LOG.trace("Response {} from GPT: {}", id, content); callback.onNext(content); } - GPTFinishReason finishReason = GPTFinishReason.fromChatGPT(choice.getFinishReason()); + GPTFinishReason finishReason = ChatCompletionResponse.FinishReason.toGPTFinishReason(choice.getFinishReason()); if (finishReason != null) { LOG.debug("Response {} from GPT finished with reason {}", id, finishReason); callback.onFinish(finishReason); } - } catch (RuntimeException | IOException e) { + } catch (RuntimeException e) { LOG.error("Id {} Cannot deserialize {}", id, line, e); GPTException gptException = new GPTException("Cannot deserialize " + line, e); callback.onError(gptException); @@ -486,27 +491,34 @@ protected long recalculateDelay(String responsebody, long delay) { } protected String createJsonRequest(GPTChatRequest request) throws JsonProcessingException { - List messages = new ArrayList<>(); + List messages = new ArrayList<>(); for (GPTChatMessage message : request.getMessages()) { - String role = message.getRole().toString(); - messages.add(new ChatMessage(role, message.getContent())); + messages.add(ChatCompletionMessage.make(message)); } - while (!messages.isEmpty() && StringUtil.isBlank(messages.get(messages.size() - 1).getContent())) { - LOG.debug("Removing empty last message."); // suspicious - likely misusage of the API - messages.remove(messages.size() - 1); + for (Iterator messageIterator = messages.iterator(); messageIterator.hasNext(); ) { + ChatCompletionMessage message = messageIterator.next(); + if (message.isEmpty(null)) { + LOG.debug("Removing empty message {}", message); // suspicious - likely misusage of the API + messageIterator.remove(); + } } - if (!messages.isEmpty() && messages.get(messages.size() - 1).getRole() == GPTMessageRole.ASSISTANT.toString()) { + if (!messages.isEmpty() && messages.get(messages.size() - 1).getRole() == ChatCompletionRequest.Role.ASSISTANT) { LOG.debug("Removing last message because it's an assistant message and that'd be confusing for GPT."); messages.remove(messages.size() - 1); } - ChatCompletionRequest externalRequest = ChatCompletionRequest.builder() - .model(defaultModel) - .messages(messages) - .temperature(temperature) - .maxTokens(request.getMaxTokens()) - .stream(Boolean.TRUE) - .build(); - String jsonRequest = mapper.writeValueAsString(externalRequest); + boolean hasImage = messages.stream().flatMap(m -> m.getContent().stream()) + .anyMatch(m -> m.getType() == ChatCompletionMessagePart.Type.IMAGE_URL); + if (hasImage && imageModel == null) { + LOG.error("No image model configured - defaultModel {} imageModel {}", defaultModel, imageModel); + throw new IllegalArgumentException("Cannot use image as input, no image model configured."); + } + ChatCompletionRequest externalRequest = new ChatCompletionRequest(); + externalRequest.setModel(hasImage ? imageModel : defaultModel); + externalRequest.setMessages(messages); + externalRequest.setTemperature(temperature); + externalRequest.setMaxTokens(request.getMaxTokens()); + externalRequest.setStream(Boolean.TRUE); + String jsonRequest = gson.toJson(externalRequest); return jsonRequest; } @@ -611,7 +623,11 @@ public String htmlToMarkdown(String html) { String openAiApiKeyFile(); @AttributeDefinition(name = "Default model to use for the chat completion. The default is " + DEFAULT_MODEL + ". Please consider the varying prices https://openai.com/pricing .", defaultValue = DEFAULT_MODEL) - String defaultModel(); + String defaultModel() default DEFAULT_MODEL; + + @AttributeDefinition(name = "Optional, a model that is used if an image is given as input, e.g. gpt-4-vision-preview. If not given, that is rejected.", + defaultValue = DEFAULT_IMAGE_MODEL) + String imageModel() default DEFAULT_IMAGE_MODEL; @AttributeDefinition(name = "Optional temperature setting that determines variability vs. creativity as a floating point between 0.0 and 1.0", defaultValue = "") String temperature(); @@ -702,7 +718,8 @@ protected Void buildResult() throws IOException { result.completeExceptionally(retryableException); throw retryableException; } - GPTException gptException = new GPTException("Error response from GPT: " + resultBuilder); + GPTException gptException = new GPTException("Error response from GPT (status " + errorStatusCode + + ") : " + resultBuilder); callback.onError(gptException); result.completeExceptionally(gptException); throw gptException; diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverter.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverter.java index 63301c48b..d1574acc2 100644 --- a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverter.java +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverter.java @@ -13,10 +13,14 @@ import org.jsoup.Jsoup; import org.jsoup.internal.StringUtil; +import org.jsoup.nodes.Comment; +import org.jsoup.nodes.DataNode; import org.jsoup.nodes.Document; +import org.jsoup.nodes.DocumentType; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; +import org.jsoup.nodes.XmlDeclaration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,6 +78,10 @@ private void convertNode(Node node) { } else if (node instanceof Element) { Element element = (Element) node; convertElement(element); + } else if (node instanceof Comment || node instanceof DocumentType || node instanceof XmlDeclaration) { + // no text content + } else if (node instanceof DataNode) { + // not quite sure what to do with this, but this is very likely not text content. } else { throw new UnsupportedOperationException("Unknown node type " + node.getClass()); } diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionChoice.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionChoice.java new file mode 100644 index 000000000..051ab479b --- /dev/null +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionChoice.java @@ -0,0 +1,54 @@ +package com.composum.ai.backend.base.service.chat.impl.chatmodel; + +import com.google.gson.annotations.SerializedName; + +public class ChatCompletionChoice { + + @SerializedName("index") + private int index; + + @SerializedName("message") + private ChatCompletionChoiceMessage message; + + @SerializedName("delta") + private ChatCompletionChoiceMessage delta; + + @SerializedName("finish_reason") + private ChatCompletionResponse.FinishReason finishReason; + + // Getters and setters + public int getIndex() { + return index; + } + + public void setIndex(int index) { + this.index = index; + } + + public ChatCompletionChoiceMessage getMessage() { + return message; + } + + public void setMessage(ChatCompletionChoiceMessage message) { + this.message = message; + } + + /** + * Alternative to {@link #getMessage()} if it's a response chunk. + */ + public ChatCompletionChoiceMessage getDelta() { + return delta; + } + + public void setDelta(ChatCompletionChoiceMessage delta) { + this.delta = delta; + } + + public ChatCompletionResponse.FinishReason getFinishReason() { + return finishReason; + } + + public void setFinishReason(ChatCompletionResponse.FinishReason finishReason) { + this.finishReason = finishReason; + } +} diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionChoiceMessage.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionChoiceMessage.java new file mode 100644 index 000000000..4913a8968 --- /dev/null +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionChoiceMessage.java @@ -0,0 +1,29 @@ +package com.composum.ai.backend.base.service.chat.impl.chatmodel; + +import com.google.gson.annotations.SerializedName; + +public class ChatCompletionChoiceMessage { + + @SerializedName("role") + private ChatCompletionRequest.Role role; + + @SerializedName("content") + private String content; + + // Getters and setters + public ChatCompletionRequest.Role getRole() { + return role; + } + + public void setRole(ChatCompletionRequest.Role role) { + this.role = role; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } +} diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionMessage.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionMessage.java new file mode 100644 index 000000000..720c8328d --- /dev/null +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionMessage.java @@ -0,0 +1,55 @@ +package com.composum.ai.backend.base.service.chat.impl.chatmodel; + +import java.util.Collections; +import java.util.List; + +import com.composum.ai.backend.base.service.chat.GPTChatMessage; +import com.google.gson.annotations.Expose; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; + +public class ChatCompletionMessage { + + @SerializedName("role") + private ChatCompletionRequest.Role role; + + @SerializedName("content") + @JsonAdapter(ChatCompletionMessagePart.ChatCompletionMessagePartListDeSerializer.class) + private List content; + + // Getters and setters + public ChatCompletionRequest.Role getRole() { + return role; + } + + public void setRole(ChatCompletionRequest.Role role) { + this.role = role; + } + + public List getContent() { + return content; + } + + public void setContent(List content) { + this.content = content; + } + + public boolean isEmpty(Void ignoreJustPreventSerialization) { + return content == null || content.isEmpty() || + !content.stream().anyMatch(m -> !m.isEmpty(null)); + } + + public static ChatCompletionMessage make(GPTChatMessage message) { + ChatCompletionMessagePart part; + if (message.getImageUrl() != null && !message.getImageUrl().isEmpty()) { + part = ChatCompletionMessagePart.imageUrl(message.getImageUrl()); + } else { + part = ChatCompletionMessagePart.text(message.getContent()); + } + ChatCompletionRequest.Role role = ChatCompletionRequest.Role.make(message.getRole()); + ChatCompletionMessage result = new ChatCompletionMessage(); + result.setRole(role); + result.setContent(Collections.singletonList(part)); + return result; + } +} diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionMessagePart.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionMessagePart.java new file mode 100644 index 000000000..b6ec5630b --- /dev/null +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionMessagePart.java @@ -0,0 +1,178 @@ +package com.composum.ai.backend.base.service.chat.impl.chatmodel; + +import java.util.ArrayList; +import java.util.List; + +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonParseException; +import com.google.gson.JsonSerializationContext; +import com.google.gson.JsonSerializer; +import com.google.gson.annotations.SerializedName; + +/** + * A text part or image part of a chat completion message. + *

+ *          {
+ *           "type": "text",
+ *           "text": "What’s in this image?"
+ *         }
+ *         or
+ *         {
+ *           "type": "image_url",
+ *           "image_url": {
+ *             "url": "https://www.example.net/somepicture.jpg"
+ *           }
+ *         }
+ * 
+ */ +public class ChatCompletionMessagePart { + + public enum Type { + @SerializedName("text") + TEXT, + @SerializedName("image_url") + IMAGE_URL + } + + @SerializedName("type") + private Type type; + + @SerializedName("text") + private String text; + + @SerializedName("image_url") + private ChatCompletionMessageUrlPart imageUrl; + + // Getters and setters + + public Type getType() { + return type; + } + + public void setType(Type type) { + this.type = type; + } + + public String getText() { + return text; + } + + public void setText(String text) { + this.text = text; + } + + public ChatCompletionMessageUrlPart getImageUrl() { + return imageUrl; + } + + public void setImageUrl(ChatCompletionMessageUrlPart image_url) { + this.imageUrl = image_url; + } + + public boolean isEmpty(Void ignoreJustPreventSerialization) { + return (text == null || text.isEmpty()) && + (imageUrl == null || imageUrl.getUrl() == null || imageUrl.getUrl().isEmpty()); + } + + public static ChatCompletionMessagePart text(String text) { + ChatCompletionMessagePart part = new ChatCompletionMessagePart(); + part.setType(Type.TEXT); + part.setText(text); + return part; + } + + public static ChatCompletionMessagePart imageUrl(String imageUrl) { + ChatCompletionMessagePart part = new ChatCompletionMessagePart(); + part.setType(Type.IMAGE_URL); + ChatCompletionMessageUrlPart urlpart = new ChatCompletionMessageUrlPart(); + urlpart.setUrl(imageUrl); + part.setImageUrl(urlpart); + return part; + } + + /** + * Encodes URL part: { "url": "https://example.com/somepicture.jpg" } + */ + public static class ChatCompletionMessageUrlPart { + + @SerializedName("url") + private String url; + + @SerializedName("detail") + private ImageDetail detail = ImageDetail.LOW; + + // Getters and setters + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public ImageDetail getDetail() { + return detail; + } + + public void setDetail(ImageDetail detail) { + this.detail = detail; + } + + } + + public enum ImageDetail { + @SerializedName("low") + LOW, + @SerializedName("high") + HIGH + } + + + public static class ChatCompletionMessagePartListDeSerializer implements JsonDeserializer>, + JsonSerializer> { + + @Override + public List deserialize(JsonElement json, java.lang.reflect.Type typeOfT, JsonDeserializationContext context) throws JsonParseException { + List content = new ArrayList<>(); + + if (json.isJsonArray()) { + for (JsonElement element : json.getAsJsonArray()) { + try { + content.add(context.deserialize(element, ChatCompletionMessagePart.class)); + } catch (RuntimeException e) { + e.printStackTrace(); + throw e; + } + + } + } else if (json.isJsonPrimitive()) { + ChatCompletionMessagePart part = new ChatCompletionMessagePart(); + part.setText(json.getAsString()); + part.setType(Type.TEXT); + content.add(part); + } + + return content; + } + + /** + * To save space: if there is only one element in src that also is a text message, we serialize it as a string, + * otherwise as object list. + */ + @Override + public JsonElement serialize(List src, java.lang.reflect.Type typeOfSrc, JsonSerializationContext context) { + if (src == null || src.isEmpty()) { + return null; + } + if (src.size() == 1 && src.get(0).getType() == Type.TEXT) { + return context.serialize(src.get(0).getText(), String.class); + } + return context.serialize(src); + } + + } + +} diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionRequest.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionRequest.java new file mode 100644 index 000000000..49a29e5cc --- /dev/null +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionRequest.java @@ -0,0 +1,87 @@ +package com.composum.ai.backend.base.service.chat.impl.chatmodel; + +import java.util.List; + +import com.composum.ai.backend.base.service.chat.GPTMessageRole; +import com.google.gson.annotations.SerializedName; + +public class ChatCompletionRequest { + + @SerializedName("model") + private String model; + + @SerializedName("messages") + private List messages; + + @SerializedName("max_tokens") + private Integer maxTokens; + + @SerializedName("stream") + private Boolean stream; + + @SerializedName("temperature") + private Double temperature; + + // Getters and setters + public String getModel() { + return model; + } + + public void setModel(String model) { + this.model = model; + } + + public List getMessages() { + return messages; + } + + public void setMessages(List messages) { + this.messages = messages; + } + + public Integer getMaxTokens() { + return maxTokens; + } + + public void setMaxTokens(Integer maxTokens) { + this.maxTokens = maxTokens; + } + + public Boolean isStream() { + return stream; + } + + public void setStream(Boolean stream) { + this.stream = stream; + } + + public Double getTemperature() { + return temperature; + } + + public void setTemperature(Double temperature) { + this.temperature = temperature; + } + + public enum Role { + @SerializedName("user") + USER, + @SerializedName("assistant") + ASSISTANT, + @SerializedName("system") + SYSTEM; + + public static Role make(GPTMessageRole role) { + switch (role) { + case USER: + return USER; + case SYSTEM: + return SYSTEM; + case ASSISTANT: + return ASSISTANT; + default: + throw new IllegalArgumentException("Unknown role " + role); + } + } + } +} diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionResponse.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionResponse.java new file mode 100644 index 000000000..ce9eb147d --- /dev/null +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionResponse.java @@ -0,0 +1,112 @@ +package com.composum.ai.backend.base.service.chat.impl.chatmodel; + +import java.util.List; + +import com.composum.ai.backend.base.service.chat.GPTFinishReason; +import com.google.gson.annotations.SerializedName; + +public class ChatCompletionResponse { + + @SerializedName("id") + private String id; + + @SerializedName("object") + private String object; + + @SerializedName("created") + private long created; + + @SerializedName("model") + private String model; + + @SerializedName("system_fingerprint") + private String systemFingerprint; + + @SerializedName("choices") + private List choices; + + @SerializedName("usage") + private ChatCompletionUsage usage; + + // Getters and setters + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getObject() { + return object; + } + + public void setObject(String object) { + this.object = object; + } + + public long getCreated() { + return created; + } + + public void setCreated(long created) { + this.created = created; + } + + public String getModel() { + return model; + } + + public void setModel(String model) { + this.model = model; + } + + public String getSystemFingerprint() { + return systemFingerprint; + } + + public void setSystemFingerprint(String systemFingerprint) { + this.systemFingerprint = systemFingerprint; + } + + public List getChoices() { + return choices; + } + + public void setChoices(List choices) { + this.choices = choices; + } + + public ChatCompletionUsage getUsage() { + return usage; + } + + public void setUsage(ChatCompletionUsage usage) { + this.usage = usage; + } + + public enum FinishReason { + @SerializedName("stop") + STOP, + @SerializedName("length") + LENGTH, + @SerializedName("content_filter") + CONTENT_FILTER; + + public static GPTFinishReason toGPTFinishReason(FinishReason finishReason) { + if (finishReason == null) { + return null; + } + switch (finishReason) { + case STOP: + return GPTFinishReason.STOP; + case LENGTH: + return GPTFinishReason.LENGTH; + case CONTENT_FILTER: + return GPTFinishReason.CONTENT_FILTER; + default: + throw new IllegalArgumentException("Unknown finish reason: " + finishReason); + } + } + } +} diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionUsage.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionUsage.java new file mode 100644 index 000000000..00abed741 --- /dev/null +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionUsage.java @@ -0,0 +1,40 @@ +package com.composum.ai.backend.base.service.chat.impl.chatmodel; + +import com.google.gson.annotations.SerializedName; + +public class ChatCompletionUsage { + + @SerializedName("prompt_tokens") + private int promptTokens; + + @SerializedName("completion_tokens") + private int completionTokens; + + @SerializedName("total_tokens") + private int totalTokens; + + // Getters and setters + public int getPromptTokens() { + return promptTokens; + } + + public void setPromptTokens(int promptTokens) { + this.promptTokens = promptTokens; + } + + public int getCompletionTokens() { + return completionTokens; + } + + public void setCompletionTokens(int completionTokens) { + this.completionTokens = completionTokens; + } + + public int getTotalTokens() { + return totalTokens; + } + + public void setTotalTokens(int totalTokens) { + this.totalTokens = totalTokens; + } +} diff --git a/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/AbstractGPTRunner.java b/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/AbstractGPTRunner.java index f51c34ee8..4dddd6ed9 100644 --- a/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/AbstractGPTRunner.java +++ b/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/AbstractGPTRunner.java @@ -46,6 +46,11 @@ public String defaultModel() { return "gpt-3.5-turbo"; } + @Override + public String imageModel() { + return "gpt-4-vision-preview"; + } + @Override public String temperature() { return null; diff --git a/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/RunGPTChatCompletionServiceImgAnalysisImpl.java b/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/RunGPTChatCompletionServiceImgAnalysisImpl.java new file mode 100644 index 000000000..0e18e1c91 --- /dev/null +++ b/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/RunGPTChatCompletionServiceImgAnalysisImpl.java @@ -0,0 +1,73 @@ +package com.composum.ai.backend.base.service.chat.impl; + +import java.io.IOException; +import java.util.Collections; + +import com.composum.ai.backend.base.service.chat.GPTChatMessage; +import com.composum.ai.backend.base.service.chat.GPTChatRequest; +import com.composum.ai.backend.base.service.chat.GPTCompletionCallback; +import com.composum.ai.backend.base.service.chat.GPTFinishReason; +import com.composum.ai.backend.base.service.chat.GPTMessageRole; +import com.google.common.io.Resources; + +/** + * Asks ChatGPT about an image. + */ +public class RunGPTChatCompletionServiceImgAnalysisImpl extends AbstractGPTRunner implements GPTCompletionCallback { + + StringBuilder buffer = new StringBuilder(); + private boolean isFinished; + + public static void main(String[] args) throws Exception { + RunGPTChatCompletionServiceImgAnalysisImpl instance = new RunGPTChatCompletionServiceImgAnalysisImpl(); + instance.setup(); + instance.run(); + instance.teardown(); + System.out.println("Done."); + } + + private void run() throws InterruptedException, IOException { + GPTChatRequest request = new GPTChatRequest(); + GPTChatMessage imgMsg = makeImageChatMessage(); + request.addMessages(Collections.singletonList(imgMsg)); + request.addMessage(GPTMessageRole.USER, "Describe the image at great length - at least one paragraph."); + request.setMaxTokens(400); + chatCompletionService.streamingChatCompletion(request, this); + System.out.println("Call returned."); + while (!isFinished) Thread.sleep(1000); + System.out.println("Complete response:"); + System.out.println(buffer); + } + + protected GPTChatMessage makeImageChatMessage() throws IOException { + // GPTChatMessage imgMsg = new GPTChatMessage(GPTMessageRole.USER, null, "https://www.composum.com/assets/pages/composum-pages-edit-view.jpg"); + byte[] imageBytes = Resources.toByteArray(getClass().getResource("/imgtest/imgtest.png")); + String imageUrl = "data:image/png;base64," + java.util.Base64.getEncoder().encodeToString(imageBytes); + GPTChatMessage imgMsg = new GPTChatMessage(GPTMessageRole.USER, null, imageUrl); + return imgMsg; + } + + @Override + public void onFinish(GPTFinishReason finishReason) { + isFinished = true; + System.out.println(); + System.out.println("Finished: " + finishReason); + } + + @Override + public void setLoggingId(String loggingId) { + System.out.println("Logging ID: " + loggingId); + } + + @Override + public void onNext(String item) { + buffer.append(item); + System.out.print(item); + } + + @Override + public void onError(Throwable throwable) { + throwable.printStackTrace(System.err); + isFinished = true; + } +} diff --git a/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionModelTest.java b/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionModelTest.java new file mode 100644 index 000000000..8d83b28fe --- /dev/null +++ b/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/chatmodel/ChatCompletionModelTest.java @@ -0,0 +1,97 @@ +package com.composum.ai.backend.base.service.chat.impl.chatmodel; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +public class ChatCompletionModelTest { + + private static final Gson gson = new GsonBuilder().create(); + + private String removeWhitespaceAndNewlines(String str) { + return str.replaceAll("\\s+", ""); + } + + @Test + public void testRequestSerializationDeserialization() { + String originalRequestJson = + "{\n" + + " \"model\": \"gpt-3.5-turbo\",\n" + + " \"messages\": [\n" + + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n" + + " {\"role\": \"assistant\", \"content\": \"Hello!\"},\n" + + " {\"role\": \"user\", \"content\": [\n" + + " {\"type\": \"text\", \"text\": \"What?s in this image?\"},\n" + + " {\"type\": \"image_url\", \"image_url\": {\"url\": \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\", \"detail\":\"low\"}}\n" + + " ]}\n" + + " ],\n" + + " \"max_tokens\": 300,\n" + + " \"stream\": true\n" + + "}"; + ChatCompletionRequest request = gson.fromJson(originalRequestJson, ChatCompletionRequest.class); + String serializedRequestJson = gson.toJson(request); + assertEquals(removeWhitespaceAndNewlines(originalRequestJson), removeWhitespaceAndNewlines(serializedRequestJson)); + } + + @Test + public void testResponseSerializationDeserialization() { + String originalResponseJson = + "{\n" + + " \"id\": \"chatcmpl-123\",\n" + + " \"object\": \"chat.completion\",\n" + + " \"created\": 1677652288,\n" + + " \"model\": \"gpt-3.5-turbo-0613\",\n" + + " \"system_fingerprint\": \"fp_44709d6fcb\",\n" + + " \"choices\": [\n" + + " {\"index\": 0, \"message\": {\"role\": \"assistant\", \"content\": \"Hello there, how may I assist you today?\"}, \"finish_reason\": \"stop\"}\n" + + " ],\n" + + " \"usage\": {\"prompt_tokens\": 9, \"completion_tokens\": 12, \"total_tokens\": 21}\n" + + "}"; + ChatCompletionResponse response = gson.fromJson(originalResponseJson, ChatCompletionResponse.class); + String serializedResponseJson = gson.toJson(response); + assertEquals(removeWhitespaceAndNewlines(originalResponseJson), removeWhitespaceAndNewlines(serializedResponseJson)); + } + + @Test + public void testChunkSerializationDeserialization() { + String originalChunkJson = "{\n" + + " \"id\": \"chatcmpl-8YBAbKcTCwOzh6EklnSCJE2k44NOU\",\n" + + " \"object\": \"chat.completion.chunk\",\n" + + " \"created\": 1703156781,\n" + + " \"model\": \"gpt-3.5-turbo-0613\",\n" + + " \"system_fingerprint\": null,\n" + + " \"choices\": [\n" + + " {\n" + + " \"index\": 0,\n" + + " \"delta\": {\n" + + " \"role\": \"assistant\",\n" + + " \"content\": \"\"\n" + + " },\n" + + " \"logprobs\": null,\n" + + " \"finish_reason\": null\n" + + " }\n" + + " ]\n" + + "}"; + String expectedChunkJson = "{\n" + + " \"id\": \"chatcmpl-8YBAbKcTCwOzh6EklnSCJE2k44NOU\",\n" + + " \"object\": \"chat.completion.chunk\",\n" + + " \"created\": 1703156781,\n" + + " \"model\": \"gpt-3.5-turbo-0613\",\n" + + " \"choices\": [\n" + + " {\n" + + " \"index\": 0,\n" + + " \"delta\": {\n" + + " \"role\": \"assistant\",\n" + + " \"content\": \"\"\n" + + " }\n" + + " }\n" + + " ]\n" + + "}"; // without the null values + ChatCompletionResponse response = gson.fromJson(originalChunkJson, ChatCompletionResponse.class); + String serializedChunkJson = gson.toJson(response); + assertEquals(removeWhitespaceAndNewlines(expectedChunkJson), removeWhitespaceAndNewlines(serializedChunkJson)); + } +} diff --git a/backend/base/src/test/resources/imgtest/imgtest.png b/backend/base/src/test/resources/imgtest/imgtest.png new file mode 100644 index 000000000..f73daba2a Binary files /dev/null and b/backend/base/src/test/resources/imgtest/imgtest.png differ diff --git a/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/AICreateServlet.java b/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/AICreateServlet.java index 390067d68..0ba9e7680 100644 --- a/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/AICreateServlet.java +++ b/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/AICreateServlet.java @@ -1,24 +1,26 @@ package com.composum.ai.backend.slingbase; -import static org.apache.commons.lang3.StringUtils.isNoneBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank; import java.io.IOException; import java.io.PrintWriter; import java.lang.reflect.Type; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.servlet.Servlet; import javax.servlet.ServletException; import javax.servlet.http.HttpServletResponse; +import org.apache.commons.lang3.StringUtils; import org.apache.sling.api.SlingHttpServletRequest; import org.apache.sling.api.SlingHttpServletResponse; import org.apache.sling.api.resource.Resource; @@ -38,6 +40,7 @@ import com.composum.ai.backend.base.service.chat.GPTChatRequest; import com.composum.ai.backend.base.service.chat.GPTConfiguration; import com.composum.ai.backend.base.service.chat.GPTContentCreationService; +import com.composum.ai.backend.base.service.chat.GPTMessageRole; import com.google.common.cache.CacheBuilder; import com.google.common.collect.ImmutableMap; import com.google.gson.Gson; @@ -99,6 +102,11 @@ public class AICreateServlet extends SlingAllMethodsServlet { */ public static final String PARAMETER_TEXTLENGTH = "textLength"; + /** + * Parameter to transmit a path to an image instead of a text. + */ + public static final String PARAMETER_INPUT_IMAGE_PATH = "inputImagePath"; + /** * Session contains a map at this key that maps the streamids to the streaming handle. */ @@ -243,11 +251,17 @@ protected void doPost(@NotNull SlingHttpServletRequest request, @NotNull SlingHt String sourcePath = request.getParameter(PARAMETER_SOURCEPATH); String sourceText = request.getParameter(PARAMETER_SOURCE); String configBasePath = request.getParameter(PARAMETER_CONFIGBASEPATH); + String inputImagePath = request.getParameter(PARAMETER_INPUT_IMAGE_PATH); + if ("undefined".equals(inputImagePath) || "null".equals(inputImagePath) || StringUtils.isBlank(inputImagePath)) { + inputImagePath = null; + } GPTConfiguration config = configurationService.getGPTConfiguration(request, configBasePath); String chat = request.getParameter(PARAMETER_CHAT); - if (isNoneBlank(sourcePath, sourceText)) { - LOG.warn("Cannot use both sourcePath and sourceText"); - response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Cannot use both sourcePath and sourceText."); + if (Stream.of(sourcePath, sourceText, inputImagePath).filter(StringUtils::isNotBlank).count() > 1) { + LOG.warn("More than one of sourcePath and sourceText and " + PARAMETER_INPUT_IMAGE_PATH + + " given, only one of them is allowed"); + response.sendError(HttpServletResponse.SC_BAD_REQUEST, "More than one of sourcePath and sourceText and " + PARAMETER_INPUT_IMAGE_PATH + + " given, only one of them is allowed"); return; } boolean richtext = Boolean.TRUE.toString().equalsIgnoreCase(request.getParameter(PARAMETER_RICHTEXT)); @@ -284,6 +298,18 @@ protected void doPost(@NotNull SlingHttpServletRequest request, @NotNull SlingHt } } + if (isNotBlank(inputImagePath)) { + Resource resource = request.getResourceResolver().getResource(inputImagePath); + String imageUrl = markdownService.getImageUrl(resource); + if (imageUrl == null) { + LOG.warn("No image found at {}", inputImagePath); + response.sendError(HttpServletResponse.SC_NOT_FOUND, "No image found at " + inputImagePath); + return; + } else { + additionalParameters.addMessages(Collections.singletonList(new GPTChatMessage(GPTMessageRole.USER, null, imageUrl))); + } + } + EventStream callback = new EventStream(); String id = saveStream(callback, request); LOG.info("Starting stream {}", id); diff --git a/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/ApproximateMarkdownService.java b/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/ApproximateMarkdownService.java index 5d7d24384..8baef0284 100644 --- a/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/ApproximateMarkdownService.java +++ b/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/ApproximateMarkdownService.java @@ -1,6 +1,8 @@ package com.composum.ai.backend.slingbase; import java.io.PrintWriter; +import java.util.List; +import java.util.Objects; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -14,6 +16,11 @@ */ public interface ApproximateMarkdownService { + /** + * An additional header for the response that tells that the path is actually an image and gives its path. + */ + String HEADER_IMAGEPATH = "imagepath"; + /** * Generates a text formatted with markdown that heuristically represents the text content of a page or resource, mainly for use with the AI. * That is rather heuristically - it cannot faithfully represent the page, but will probably be enough to generate summaries, keywords and so forth. @@ -45,4 +52,66 @@ void approximateMarkdown(@Nullable Resource resource, @Nonnull PrintWriter out, */ @Nonnull String getMarkdown(@Nullable String value); + + /** + * Returns a number of links that are saved in the component or siblings of the component that could be used as + * a proposal for the user to be used as source for the AI via markdown generation etc. + * This heuristically collects a number of links that might be interesting. + * + * @param resource the resource to check + * @return a list of links, or an empty list if there are none. + */ + @Nonnull + List getComponentLinks(@Nullable Resource resource); + + /** + * Retrieves the imageURL in a way useable for ChatGPT - usually data:image/jpeg;base64,{base64_image} + */ + @Nullable + String getImageUrl(@Nullable Resource imageResource); + + /** + * A link from a component. + * + * @see #getComponentLinks(Resource) + */ + class Link { + private final String path; + private final String title; + + public Link(String path, String title) { + this.path = path; + this.title = title; + } + + public String getPath() { + return path; + } + + public String getTitle() { + return title; + } + + @Override + public boolean equals(Object object) { + if (this == object) return true; + if (!(object instanceof Link)) return false; + Link link = (Link) object; + return Objects.equals(getPath(), link.getPath()) && Objects.equals(getTitle(), link.getTitle()); + } + + @Override + public int hashCode() { + return Objects.hash(getPath(), getTitle()); + } + + @Override + public String toString() { + return "Link{" + + "path='" + path + '\'' + + ", title='" + title + '\'' + + '}'; + } + } + } diff --git a/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/ApproximateMarkdownServicePlugin.java b/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/ApproximateMarkdownServicePlugin.java index 6e7aaeab5..2fd0af803 100644 --- a/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/ApproximateMarkdownServicePlugin.java +++ b/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/ApproximateMarkdownServicePlugin.java @@ -4,6 +4,7 @@ import java.util.regex.Pattern; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import org.apache.sling.api.SlingHttpServletRequest; import org.apache.sling.api.SlingHttpServletResponse; @@ -39,6 +40,13 @@ PluginResult maybeHandle(@Nonnull Resource resource, @Nonnull PrintWriter out, @Nonnull ApproximateMarkdownService service, @Nonnull SlingHttpServletRequest request, @Nonnull SlingHttpServletResponse response); + /** + * Retrieves the imageURL in a way useable for ChatGPT - usually data:image/jpeg;base64,{base64_image} + * If the plugin cannot handle this resource, it should return null. + */ + @Nullable + String getImageUrl(@Nullable Resource imageResource); + /** * Returns true when the sling:resourceType or one of the sling:resourceSuperType of the sling:resourceType match the pattern. * Useable to check whether a resource is rendered with a derivation of a certain component. diff --git a/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/impl/ApproximateMarkdownServiceImpl.java b/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/impl/ApproximateMarkdownServiceImpl.java index 3c145f0ea..db26df271 100644 --- a/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/impl/ApproximateMarkdownServiceImpl.java +++ b/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/impl/ApproximateMarkdownServiceImpl.java @@ -8,7 +8,9 @@ import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; +import java.util.ArrayList; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -21,10 +23,12 @@ import javax.annotation.Nullable; import org.apache.commons.lang3.StringUtils; +import org.apache.jackrabbit.JcrConstants; import org.apache.sling.api.SlingHttpServletRequest; import org.apache.sling.api.SlingHttpServletResponse; import org.apache.sling.api.resource.Resource; import org.apache.sling.api.resource.ResourceUtil; +import org.jetbrains.annotations.NotNull; import org.osgi.service.component.annotations.Activate; import org.osgi.service.component.annotations.Component; import org.osgi.service.component.annotations.Deactivate; @@ -314,6 +318,89 @@ String[] labelledAttributePatternDeny() default {".*:.*", "layout", "backgroundC } + /** + * {@inheritDoc} + * We traverse the attributes of resource and all children and collect everything that starts with /content. + * If there are less than 5 links, we continue with the parent resource until jcr:content is reached. + * The link title will be the jcr:title or title attribute. + */ + @NotNull + @Override + public List getComponentLinks(@NotNull Resource resource) { + List resourceLinks = new ArrayList<>(); + if (resource == null) { + return resourceLinks; + } + Resource searchResource = resource; + if (resource.getValueMap().isEmpty()) { // attribute resource, use parent + searchResource = resource.getParent(); + } + while (searchResource != null && resourceLinks.size() < 5 && searchResource.getPath().contains("/jcr:content/")) { + List resourceLinkCandidates = new ArrayList<>(); + collectLinks(searchResource, resourceLinkCandidates); + Iterator iterator = resourceLinkCandidates.iterator(); + while (resourceLinks.size() < 5 && iterator.hasNext()) { + Link link = iterator.next(); + if (!resourceLinks.contains(link)) { + resourceLinks.add(link); + } + } + searchResource = searchResource.getParent(); + } + return resourceLinks; + } + + /** + * Collects links from a resource and its children. The link title will be the jcr:title or title attribute. + * + * @param resource the resource to collect links from + * @param resourceLinks the list to store the collected links + */ + protected void collectLinks(@NotNull Resource resource, List resourceLinks) { + resource.getValueMap().entrySet().stream() + .filter(entry -> entry.getValue() instanceof String) + .filter(entry -> ((String) entry.getValue()).startsWith("/content/")) + .forEach(entry -> { + String path = (String) entry.getValue(); + Resource targetResource = resource.getResourceResolver().getResource(path); + if (targetResource != null) { + if (targetResource.getChild(JcrConstants.JCR_CONTENT) != null) { + targetResource = targetResource.getChild(JcrConstants.JCR_CONTENT); + } + String title = targetResource.getValueMap().get("jcr:title", String.class); + if (title == null) { + title = targetResource.getValueMap().get("title", String.class); + } + if (title == null) { + title = targetResource.getName(); + if (JcrConstants.JCR_CONTENT.equals(title)) { + title = targetResource.getParent().getName(); + } + } + Link link = new Link(path, title); + if (!resourceLinks.contains(link)) { + resourceLinks.add(link); + } + } + }); + resource.getChildren().forEach(child -> { + collectLinks(child, resourceLinks); + }); + } + + @Override + public String getImageUrl(Resource imageResource) { + if (imageResource == null) { + return null; + } + for (ApproximateMarkdownServicePlugin plugin : plugins) { + String imageUrl = plugin.getImageUrl(imageResource); + if (imageUrl != null) { + return imageUrl; + } + } + return null; + } // debugging code; remove after it works. diff --git a/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/impl/HtmlToApproximateMarkdownServicePlugin.java b/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/impl/HtmlToApproximateMarkdownServicePlugin.java index 4a0193640..814bb5512 100644 --- a/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/impl/HtmlToApproximateMarkdownServicePlugin.java +++ b/backend/slingbase/src/main/java/com/composum/ai/backend/slingbase/impl/HtmlToApproximateMarkdownServicePlugin.java @@ -100,6 +100,10 @@ public PluginResult maybeHandle( try { String html = renderedAsHTML(resource, request, response); + if (StringUtils.isBlank(html)) { + LOG.debug("No HTML generated for {} with resource type {}", resource.getPath(), resource.getResourceType()); + return PluginResult.NOT_HANDLED; + } String markdown = service.getMarkdown(html); if (StringUtils.isBlank(markdown)) { LOG.debug("No markdown generated for {} with resource type {}", resource.getPath(), resource.getResourceType()); @@ -122,6 +126,12 @@ public PluginResult maybeHandle( return PluginResult.NOT_HANDLED; } + @Nullable + @Override + public String getImageUrl(@Nullable Resource imageResource) { + return null; + } + protected boolean isBecauseOfUnsupportedOperation(Throwable e) { if (e instanceof UnsupportedOperationCalled) { return true; @@ -174,6 +184,9 @@ protected String renderedAsHTML(Resource resource, SlingHttpServletRequest reque throw new UnsupportedOperationCalled(); } } + if (writer.toString().contains("Resource dumped by HtmlRenderer")) { + return null; + } return writer.toString(); } diff --git a/backend/slingbase/src/test/java/com/composum/ai/backend/slingbase/impl/ApproximateMarkdownServiceImplTest.java b/backend/slingbase/src/test/java/com/composum/ai/backend/slingbase/impl/ApproximateMarkdownServiceImplTest.java index 85bf405d1..a21c4cd7c 100644 --- a/backend/slingbase/src/test/java/com/composum/ai/backend/slingbase/impl/ApproximateMarkdownServiceImplTest.java +++ b/backend/slingbase/src/test/java/com/composum/ai/backend/slingbase/impl/ApproximateMarkdownServiceImplTest.java @@ -12,6 +12,7 @@ import java.io.StringWriter; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.sling.api.SlingHttpServletRequest; @@ -26,6 +27,7 @@ import org.mockito.Mockito; import com.composum.ai.backend.base.service.chat.GPTChatCompletionService; +import com.composum.ai.backend.slingbase.ApproximateMarkdownService; import com.google.common.collect.ImmutableMap; /** @@ -141,4 +143,29 @@ private Resource createMockResource(String resourceType, Map att return context.create().resource("/content/parent/path/res", props); } + @Test + public void testGetComponentLinks() { + // Setup Mock Resources + Resource rootResource = context.create().resource("/content/parent/path/jcr:content/res", + ImmutableMap.of("link1", "/content/parent/path/res1")); + context.create().resource("/content/parent/path/jcr:content/res/child", + ImmutableMap.of("link2", "/content/parent/path/child1")); + // set up resources for the links, one with a title, one with a jcr:title + context.create().resource("/content/parent/path/res1", + ImmutableMap.of("jcr:title", "res1")); + context.create().resource("/content/parent/path/child1"); + context.create().resource("/content/parent/path/child1/jcr:content", + ImmutableMap.of("title", "child1")); + + // Execute Method + List links = service.getComponentLinks(rootResource); + + // Assertions + ec.checkThat(links.size(), is(2)); // Check if two links are returned + ec.checkThat(links.get(0).getPath(), is("/content/parent/path/res1")); // Check first link path + ec.checkThat(links.get(0).getTitle(), is("res1")); // Check first link title + ec.checkThat(links.get(1).getPath(), is("/content/parent/path/child1")); // Check second link path + ec.checkThat(links.get(1).getTitle(), is("child1")); // Check second link title + } + } diff --git a/composum/bundle/src/main/java/com/composum/ai/composum/bundle/AIServlet.java b/composum/bundle/src/main/java/com/composum/ai/composum/bundle/AIServlet.java index cd66cc888..c5f95372a 100644 --- a/composum/bundle/src/main/java/com/composum/ai/composum/bundle/AIServlet.java +++ b/composum/bundle/src/main/java/com/composum/ai/composum/bundle/AIServlet.java @@ -11,6 +11,7 @@ import java.io.PrintWriter; import java.lang.reflect.Type; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.UUID; @@ -18,6 +19,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -26,6 +28,7 @@ import javax.servlet.ServletException; import javax.servlet.http.HttpServletResponse; +import org.apache.commons.lang3.StringUtils; import org.apache.sling.api.SlingHttpServletRequest; import org.apache.sling.api.SlingHttpServletResponse; import org.apache.sling.api.resource.Resource; @@ -47,6 +50,7 @@ import com.composum.ai.backend.base.service.chat.GPTChatRequest; import com.composum.ai.backend.base.service.chat.GPTConfiguration; import com.composum.ai.backend.base.service.chat.GPTContentCreationService; +import com.composum.ai.backend.base.service.chat.GPTMessageRole; import com.composum.ai.backend.base.service.chat.GPTTranslationService; import com.composum.ai.backend.slingbase.AIConfigurationService; import com.composum.ai.backend.slingbase.ApproximateMarkdownService; @@ -87,6 +91,11 @@ public class AIServlet extends AbstractServiceServlet { */ public static final String PARAMETER_TEXT = "text"; + /** + * Parameter to transmit a path to an image instead of a text. + */ + public static final String PARAMETER_INPUT_IMAGE_PATH = "inputImagePath"; + /** * Parameter to transmit a prompt on which ChatGPT is to operate - that is, the instructions. * If there is a {@link #PARAMETER_CHAT} given, this is the first prompt *before* the chat - @@ -512,9 +521,12 @@ protected void performOperation(@Nonnull Status status, @Nonnull SlingHttpServle String textLength = request.getParameter("textLength"); String inputPath = request.getParameter("inputPath"); String inputText = request.getParameter("inputText"); + String inputImagePath = request.getParameter(PARAMETER_INPUT_IMAGE_PATH); String chat = request.getParameter(PARAMETER_CHAT); - if (isNoneBlank(inputPath, inputText)) { - status.error("Both inputPath and inputText given, only one of them is allowed"); + if (Stream.of(inputPath, inputText, inputImagePath).filter(StringUtils::isNotBlank).count() > 1) { + status.error("More than one of inputPath and inputText and " + PARAMETER_INPUT_IMAGE_PATH + + " given, only one of them is allowed"); + return; } boolean richtext = Boolean.TRUE.toString().equalsIgnoreCase(request.getParameter(PARAMETER_RICHTEXT)); @@ -545,6 +557,16 @@ protected void performOperation(@Nonnull Status status, @Nonnull SlingHttpServle inputText = markdownService.approximateMarkdown(resource, request, response); } } + if (isNotBlank(inputImagePath)) { + Resource resource = request.getResourceResolver().getResource(inputImagePath); + String imageUrl = markdownService.getImageUrl(resource); + if (imageUrl == null) { + status.error("No image found at " + inputImagePath); + return; + } else { + additionalParameters.addMessages(List.of(new GPTChatMessage(GPTMessageRole.USER, null, imageUrl))); + } + } if (status.isValid()) { if (!streaming) { String result; diff --git a/composum/bundle/src/main/java/com/composum/ai/composum/bundle/model/CreateDialogModel.java b/composum/bundle/src/main/java/com/composum/ai/composum/bundle/model/CreateDialogModel.java index 16dfd0ba4..35b10b42d 100644 --- a/composum/bundle/src/main/java/com/composum/ai/composum/bundle/model/CreateDialogModel.java +++ b/composum/bundle/src/main/java/com/composum/ai/composum/bundle/model/CreateDialogModel.java @@ -1,37 +1,62 @@ package com.composum.ai.composum.bundle.model; -import java.io.IOException; +import static java.util.Objects.requireNonNull; + import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.composum.ai.backend.slingbase.ApproximateMarkdownService; import com.composum.pages.commons.model.AbstractModel; -import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonIOException; +import com.google.gson.JsonSyntaxException; public class CreateDialogModel extends AbstractModel { private static final Logger LOG = LoggerFactory.getLogger(CreateDialogModel.class); + protected transient ApproximateMarkdownService approximateMarkdownService; + public Map getPredefinedPrompts() { return readJsonFile("create/predefinedprompts.json"); } public Map getContentSelectors() { - return readJsonFile("create/contentselectors.json"); + Map results = new LinkedHashMap<>(); + results.putAll(readJsonFile("create/contentselectors.json")); + List componentLinks = getApproximateMarkdownService().getComponentLinks(getResource()); + for (ApproximateMarkdownService.Link link : componentLinks) { + results.put(link.getPath(), link.getTitle() + " (" + link.getPath() + ")"); + } + return results; + } + + protected ApproximateMarkdownService getApproximateMarkdownService() { + if (approximateMarkdownService == null) { + approximateMarkdownService = requireNonNull(context.getService(ApproximateMarkdownService.class)); + } + return approximateMarkdownService; } public Map getTextLengths() { return readJsonFile("create/textlengths.json"); } + private static final Gson gson = new GsonBuilder().create(); + static Map readJsonFile(String filePath) { try { - final ObjectMapper mapper = new ObjectMapper(); InputStream inputStream = CreateDialogModel.class.getClassLoader().getResourceAsStream(filePath); - return mapper.readValue(inputStream, Map.class); - } catch (IOException e) { + return gson.fromJson(new InputStreamReader(inputStream, StandardCharsets.UTF_8), Map.class); + } catch (JsonSyntaxException | JsonIOException e) { LOG.error("Cannot read {}", filePath, e); return null; } diff --git a/composum/bundle/src/main/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePlugin.java b/composum/bundle/src/main/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePlugin.java index 1d19ea2cb..8b27ce842 100644 --- a/composum/bundle/src/main/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePlugin.java +++ b/composum/bundle/src/main/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePlugin.java @@ -1,17 +1,26 @@ package com.composum.ai.composum.bundle.service; +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; import java.io.PrintWriter; +import java.util.Base64; import java.util.List; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import javax.annotation.Nonnull; +import javax.imageio.ImageIO; import org.apache.commons.lang3.StringUtils; +import org.apache.jackrabbit.JcrConstants; import org.apache.sling.api.SlingHttpServletRequest; import org.apache.sling.api.SlingHttpServletResponse; import org.apache.sling.api.resource.Resource; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import org.osgi.framework.Constants; import org.osgi.service.component.annotations.Component; import org.slf4j.Logger; @@ -36,6 +45,9 @@ public class ComposumApproximateMarkdownServicePlugin implements ApproximateMark @NotNull Resource resource, @NotNull PrintWriter out, @Nonnull ApproximateMarkdownService service, @Nonnull SlingHttpServletRequest request, @Nonnull SlingHttpServletResponse response) { + if (handleImage(resource, out, response)) { + return PluginResult.HANDLED_ALL; + } boolean wasHandledAsPage = pageHandling(resource, out, service); boolean wasHandledAsTable = !wasHandledAsPage && tableHandling(resource, out, service); handleContentReference(resource, out, service, request, response); @@ -114,5 +126,78 @@ protected void handleContentReference(Resource resource, PrintWriter out, Approx } } + /** + * Handle resource that is a jcr:content of type nt:resource with a jcr:mimeType starting with image/ + * as a markdown image reference to that path. + * + * @return whether it was an image for which we have written a markdown reference + */ + protected boolean handleImage(Resource resource, PrintWriter out, SlingHttpServletResponse response) { + if (JcrConstants.JCR_CONTENT.equals(resource.getName()) && resource.isResourceType("nt:resource")) { + String mimeType = resource.getValueMap().get("jcr:mimeType", String.class); + if (StringUtils.startsWith(mimeType, "image/")) { + String name = StringUtils.defaultString(resource.getValueMap().get("jcr:title", String.class), resource.getName()); + out.println("![" + name + "](" + resource.getParent().getPath() + ")"); + try { + response.addHeader(ApproximateMarkdownService.HEADER_IMAGEPATH, resource.getParent().getPath()); + } catch (RuntimeException e) { + LOG.warn("Unable to set header " + ApproximateMarkdownService.HEADER_IMAGEPATH + " to " + resource.getParent().getPath(), e); + } + return true; + } + } + return false; + } + + /** + * Retrieves the imageURL in a way useable for ChatGPT - usually data:image/jpeg;base64,{base64_image} + */ + @Nullable + @Override + public String getImageUrl(@Nullable Resource imageResource) { + Resource imageContentResource = imageResource; + if (imageContentResource != null && imageContentResource.isResourceType("nt:file")) { + imageContentResource = imageContentResource.getChild(JcrConstants.JCR_CONTENT); + } + if (imageContentResource != null && imageContentResource.isResourceType("nt:resource")) { + String mimeType = imageContentResource.getValueMap().get("jcr:mimeType", String.class); + if (StringUtils.startsWith(mimeType, "image/")) { + try (InputStream is = imageContentResource.adaptTo(InputStream.class)) { + if (is == null) { + LOG.warn("Unable to get InputStream from image resource {}", imageContentResource.getPath()); + return null; + } + byte[] data = is.readAllBytes(); + data = resizeToMaxSize(data, mimeType, 512); + return "data:" + mimeType + ";base64," + new String(Base64.getEncoder().encode(data)); + } catch (IOException e) { + LOG.warn("Unable to get InputStream from image resource {}", imageContentResource.getPath(), e); + } + } + } + return null; + } + + /** + * We resize the image to a maximum width and height of maxSize, keeping the aspect ratio. If it's smaller, it's + * returned as is. It could be of types image/jpeg, image/png or image/gif . + */ + protected byte[] resizeToMaxSize(@Nonnull byte[] imageData, String mimeType, int maxSize) throws IOException { + ByteArrayInputStream inputStream = new ByteArrayInputStream(imageData); + BufferedImage originalImage = ImageIO.read(inputStream); + int width = originalImage.getWidth(); + int height = originalImage.getHeight(); + if (width <= maxSize && height <= maxSize) { + return imageData; + } + double factor = maxSize * 1.0 / (Math.max(width, height) + 1); + int newWidth = (int) (width * factor); + int newHeight = (int) (height * factor); + BufferedImage resizedImage = new BufferedImage(newWidth, newHeight, originalImage.getType()); + resizedImage.createGraphics().drawImage(originalImage, 0, 0, newWidth, newHeight, null); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + ImageIO.write(resizedImage, mimeType.substring("image/".length()), outputStream); + return outputStream.toByteArray(); + } } diff --git a/composum/bundle/src/main/resources/create/predefinedprompts.json b/composum/bundle/src/main/resources/create/predefinedprompts.json index 94554d0a4..4068b36c9 100644 --- a/composum/bundle/src/main/resources/create/predefinedprompts.json +++ b/composum/bundle/src/main/resources/create/predefinedprompts.json @@ -19,5 +19,6 @@ "AIDA Framework Text": "Please write a complete text using the AIDA framework. Start by capturing attention, then generate interest, induce desire, and finally, call for action.", "Slogan": "Based on the given text, please create a catchy and memorable slogan that captures its main theme. Output it as it could be put in front of the text.", "CTA": "Based on the text, create a call to action that encourages the reader to take the next step, and clearly conveys the unique value proposition to the reader.", - "Markdown to HTML": "Convert the following text from Markdown to HTML:\n\n" + "Markdown to HTML": "Convert the following text from Markdown to HTML:\n\n", + "Describe image": "Please describe the following image in a way that a blind person can understand it." } diff --git a/composum/bundle/src/test/java/com/composum/ai/composum/bundle/model/CreateDialogModelTest.java b/composum/bundle/src/test/java/com/composum/ai/composum/bundle/model/CreateDialogModelTest.java index 15b0f7d08..f5032aabe 100644 --- a/composum/bundle/src/test/java/com/composum/ai/composum/bundle/model/CreateDialogModelTest.java +++ b/composum/bundle/src/test/java/com/composum/ai/composum/bundle/model/CreateDialogModelTest.java @@ -6,10 +6,18 @@ import java.util.Map; import org.junit.Test; +import org.mockito.Mockito; + +import com.composum.ai.backend.slingbase.ApproximateMarkdownService; public class CreateDialogModelTest { - private CreateDialogModel model = new CreateDialogModel(); + private ApproximateMarkdownService approximateMarkdownServiceMock = + Mockito.mock(ApproximateMarkdownService.class); + + private CreateDialogModel model = new CreateDialogModel() {{ + this.approximateMarkdownService = approximateMarkdownServiceMock; + }}; @Test public void testGetPredefinedPrompts() { diff --git a/composum/config/src/main/content/jcr_root/libs/composum/pages/install/com.composum.ai.backend.base.service.chat.impl.GPTChatCompletionServiceImpl.cfg.json b/composum/config/src/main/content/jcr_root/libs/composum/pages/install/com.composum.ai.backend.base.service.chat.impl.GPTChatCompletionServiceImpl.cfg.json index 237a68363..90ae2da28 100644 --- a/composum/config/src/main/content/jcr_root/libs/composum/pages/install/com.composum.ai.backend.base.service.chat.impl.GPTChatCompletionServiceImpl.cfg.json +++ b/composum/config/src/main/content/jcr_root/libs/composum/pages/install/com.composum.ai.backend.base.service.chat.impl.GPTChatCompletionServiceImpl.cfg.json @@ -1,5 +1,6 @@ { "defaultModel": "gpt-3.5-turbo", + "imageModel": "gpt-4-vision-preview", "disable": false, "openAiApiKey": "${openai.api.key}" } diff --git a/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/css/dialogs.scss b/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/css/dialogs.scss index b87839f72..248e22f42 100644 --- a/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/css/dialogs.scss +++ b/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/css/dialogs.scss @@ -152,6 +152,12 @@ } } + .ai-source-image { + background-size: contain; + background-position: center; + background-repeat: no-repeat; + } + } .composum-pages-options-ai-dialogs-help { diff --git a/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/dialogs/create/create.jsp b/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/dialogs/create/create.jsp index e9d8063cc..ec222bedf 100644 --- a/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/dialogs/create/create.jsp +++ b/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/dialogs/create/create.jsp @@ -89,7 +89,7 @@ maxlength="256" type="text" value="" placeholder="">
-
diff --git a/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/js/create.js b/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/js/create.js index 1c0fe94bd..94424c1da 100644 --- a/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/js/create.js +++ b/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/js/create.js @@ -65,6 +65,7 @@ this.$spinner = this.$el.find('.loading-indicator'); this.$response = this.$el.find('.ai-response-field'); this.$sourceContent = this.$el.find('.ai-source-field'); + this.$sourceImage = this.$el.find('.ai-source-image'); this.$urlContainer = this.$el.find('.composum-ai-url-container'); this.$urlField = this.$el.find('.composum-ai-url-field'); @@ -146,15 +147,17 @@ /** Creates a map that saves the content of all fields of this dialog. */ makeSaveStateMap: function () { - return { + const map = { 'predefinedPrompts': this.$predefinedPrompts.val(), 'contentSelect': this.$contentSelect.val(), 'textLength': this.$textLength.val(), 'prompt': this.$prompt.val(), 'result': this.getResponse(), 'source': this.getSourceContent(), + 'imagepath': this.$sourceImage.data('imagepath'), 'url': this.$urlField.val() }; + return map; }, saveState: function () { @@ -178,9 +181,10 @@ this.$textLength.val(map['textLength']); this.$prompt.val(map['prompt']); this.setResponse(map['result']); - this.setSourceContent(map['source']); + this.setSourceContent(map['source'], map['imagepath']); this.adjustButtonStates(); this.$urlField.val(map['url']); + this.contentSelectChanged(); }, /** Button 'Reset' was clicked. */ @@ -210,10 +214,11 @@ contentSelectChanged: function (event) { console.log("contentSelectChanged", arguments); - event.preventDefault(); + event && event.preventDefault(); let contentSelect = this.$contentSelect.val(); const key = this.$contentSelect.val(); this.$urlContainer.hide(); + this.$sourceImage.removeData('imagepath'); switch (key) { case 'lastoutput': this.setSourceContent(this.getResponse()); @@ -222,22 +227,29 @@ this.setSourceContent(this.widget.getValue()); break; case 'component': - this.retrieveValue(this.componentPath, (value) => this.setSourceContent(value)); + this.retrieveValue(this.componentPath, this.setSourceContent.bind(this)); break; case 'page': - this.retrieveValue(this.pagePath, (value) => this.setSourceContent(value)); + this.retrieveValue(this.pagePath, this.setSourceContent.bind(this)); break; case 'empty': this.setSourceContent(''); break; case 'url': this.showError(); + this.setSourceContent(''); + this.urlChanged(); this.$urlContainer.show(); case '-': this.setSourceContent(''); break; default: - this.showError('Unknown content selector value ' + key); + if (key.startsWith('/content/')) { + this.retrieveValue(key, this.setSourceContent.bind(this)); + } else { + this.showError('Unknown content selector value ' + key); + debugger; + } } }, @@ -246,11 +258,25 @@ this.$contentSelect.val('-'); }, - setSourceContent(value) { - if (this.isRichText) { - core.widgetOf(this.$sourceContent.find('textarea')).setValue(value || ''); + /** Puts the value into the source field. If imagepath is set, we instead make the image visible instead of the source textarea / rte */ + setSourceContent(value, imagepath) { + if (!imagepath) { + this.$sourceContent.show(); + this.$sourceImage.hide(); + this.$sourceImage.removeData('imagepath'); + if (this.isRichText) { + core.widgetOf(this.$sourceContent.find('textarea')).setValue(value || ''); + } else { + this.$sourceContent.val(value || ''); + } } else { - this.$sourceContent.val(value || ''); + // set the imageAlternative to the same size as the textAlternative + this.$sourceImage.css('width', this.$sourceContent.css('width')); + this.$sourceImage.css('height', this.$sourceContent.css('height')); + this.$sourceContent.hide(); + this.$sourceImage.show(); + this.$sourceImage.css('background-image', 'url(' + imagepath + ')'); + this.$sourceImage.data('imagepath', imagepath); } }, @@ -268,8 +294,8 @@ (this.isRichText ? '.html' : '.md') + core.encodePath(path), type: "GET", dataType: "text", - success: (data) => { - callback(data); + success: (data, status, xhr) => { + callback(data, xhr.getResponseHeader('imagepath')); }, error: (xhr, status, error) => { console.error("error loading approximate markdown", xhr, status, error); @@ -279,7 +305,7 @@ }, urlChanged(event) { - event.preventDefault(); + if (event) event.preventDefault(); const url = this.$urlField.val(); if (url) { this.showError(); @@ -290,6 +316,7 @@ dataType: "text", success: (data) => { this.setSourceContent(data); + this.$contentSelect.val('url'); }, error: (xhr, status, error) => { console.error("error loading approximate markdown for ", url, xhr, status, error); @@ -357,11 +384,13 @@ let textLength = this.$textLength.val(); let prompt = this.$prompt.val(); let source = this.getSourceContent(); + let imagepath = this.$sourceImage.data('imagepath'); let url = ai.const.url.general.authoring + ".create.json"; core.ajaxPost(url, { textLength: textLength, - inputText: source, + inputText: imagepath ? undefined : source, + inputImagePath: imagepath, streaming: this.streaming, richText: this.isRichText, prompt: prompt @@ -430,6 +459,7 @@ if (error) { this.$alert.text(error); this.$alert.show(); + this.$alert[0].scrollIntoView(); } else { this.$alert.hide(); } diff --git a/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/js/sidebar.js b/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/js/sidebar.js index 916a647b1..ab8d1ac80 100644 --- a/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/js/sidebar.js +++ b/composum/package/src/main/content/jcr_root/libs/composum/pages/options/ai/js/sidebar.js @@ -401,6 +401,7 @@ console.error("Error generating text: ", data); this.$alert.html("Error generating text: " + JSON.stringify(data)); this.$alert.show(); + this.$alert[0].scrollIntoView(); this.setLoading(false); } }, @@ -429,6 +430,7 @@ console.error("Error generating text: ", arguments); this.$alert.html("Error generating text: " + JSON.stringify(arguments)); this.$alert.show(); + this.$alert[0].scrollIntoView(); }, startStreaming: function (streamid) { @@ -458,6 +460,7 @@ this.setLoading(false); this.$alert.text('Connection failed.'); this.$alert.show(); + this.$alert[0].scrollIntoView(); }, onStreamingFinished: function (event) { @@ -491,6 +494,7 @@ this.abortRunningCalls(); this.$alert.text(event.data); this.$alert.show(); + this.$alert[0].scrollIntoView(); } }); diff --git a/featurespecs/7Imagerecognition.md b/featurespecs/7Imagerecognition.md new file mode 100644 index 000000000..11093df3d --- /dev/null +++ b/featurespecs/7Imagerecognition.md @@ -0,0 +1,23 @@ +# Add ChatGPT image recognition as additional input + +## Links + +https://platform.openai.com/docs/guides/vision +https://platform.openai.com/docs/api-reference/chat/object +java ImageIO + + "messages": [ { "role": "user", + "content": [ + { + "type": "text", + "text": "What’s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}" + } + } + ] + } + ] diff --git a/featurespecs/ChatCompletionInterface.md b/featurespecs/ChatCompletionInterface.md new file mode 100644 index 000000000..08d040373 --- /dev/null +++ b/featurespecs/ChatCompletionInterface.md @@ -0,0 +1,174 @@ +# Description of the ChatGPT Chat Completion Interface for code generation + +## Links + +https://platform.openai.com/docs/api-reference/chat/create +https://platform.openai.com/docs/guides/vision +We currently ignore tools and logprobs. + +## Basic implementation decisions + +We use GSON for JSON serialization and deserialization. We use the `@SerializedName` annotation to map the JSON +attributes to the Java attributes if the attribute name has to be different due to Java naming conventions. + +Package `com.composum.ai.backend.base.service.chat.impl.chatmodel` +(folder `backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/chatmodel`) +contains the generated Java classes for the JSON objects. Use the JavaBean conventions. +The class names should be prefixed with ChatCompletion to avoid name clashes. +Enumerations should be used for the fixed values and these should be inner classes in the class where they are used. + +## Request + +Attribute `role` has the fixed values `user`, `assistant` and `system` and should be an enum in the generated code. +Attribute `type` can have the fixed values `text` and `image_url` and should be an enum in the generated code. + +```json +{ + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "assistant", + "content": "Hello!" + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What’s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + } + ] + } + ], + "max_tokens": 300, + "stream": true +} +``` + +## Response + +Attribute `role` has the fixed values `user`, `assistant` and `system` and should be an enum in the generated code. +Attribute `logprobs` should be ignored in the generated code. +Attribute `finish_reason` has the fixed values `stop`, `length`, `content_filter` and should be an enum in the generated +code. + +The normal response and the streaming response should be mapped to the same Java class. + +```json +{ + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "\n\nHello there, how may I assist you today?" + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 9, + "completion_tokens": 12, + "total_tokens": 21 + } +} +``` + +## Streaming Response + +```json +{ + "id": "chatcmpl-123", + "object": "chat.completion.chunk", + "created": 1694268190, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "" + }, + "logprobs": null, + "finish_reason": null + } + ] +} +``` + +```json +{ + "id": "chatcmpl-123", + "object": "chat.completion.chunk", + "created": 1694268190, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "index": 0, + "delta": { + "content": "Hello" + }, + "logprobs": null, + "finish_reason": null + } + ] +} +``` + +.... + +```json +{ + "id": "chatcmpl-123", + "object": "chat.completion.chunk", + "created": 1694268190, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "index": 0, + "delta": { + "content": "?" + }, + "logprobs": null, + "finish_reason": null + } + ] +} +``` + +```json +{ + "id": "chatcmpl-123", + "object": "chat.completion.chunk", + "created": 1694268190, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "index": 0, + "delta": {}, + "logprobs": null, + "finish_reason": "stop" + } + ] +} +``` diff --git a/pom.xml b/pom.xml index c731226bd..63400cf86 100644 --- a/pom.xml +++ b/pom.xml @@ -63,17 +63,6 @@ 1.8 - - - - - com.theokanning.openai-gpt3-java - api - 0.18.2 - - - - diff --git a/src/site/markdown/aem-variant/usage.md b/src/site/markdown/aem-variant/usage.md index 617b1473f..486dfe386 100644 --- a/src/site/markdown/aem-variant/usage.md +++ b/src/site/markdown/aem-variant/usage.md @@ -83,6 +83,13 @@ Selecting one of these options replaces the 'Source' text area with the chosen t - **Hand edited content:** You can provide your own text as a base for the AI to work with. - **No Text Added:** If you like to generate text by just giving the AI some instructions that do not refer to any current text content, this is your choice. +- **External URL:** An URL field is provided, where you can enter the URL of a web page. The text content of the URL + will be retrieved into the 'Source' text area. +- If the component itself or sibling components contain paths into the JCR - e.g. links to other pages or references to + parts of other pages - then up to 5 of these paths are listed as additional options. Selecting one of these options + replaces the 'Source' text area with the chosen text. +- If some of those references are images, these will be offered as well. The images can serve as input for the AI ( + currently the beta version of ChatGPT vision preview) e.g. to generate a description for the image. ### Predefined prompts @@ -102,6 +109,7 @@ text. Some examples are: - **Improve:** The AI improves the text into a businesslike informative style, fixing orthographical and grammar errors. - **SEO Description:** The AI creates a description for a web page with the given text, usable for search engine optimization (SEO). +- **Describe Image:** somewhat experimental: the AI describes a selected image. ### Tips and Tricks for using the Content Creation Assistant diff --git a/src/site/markdown/composum-variant/usage.md b/src/site/markdown/composum-variant/usage.md index a1795ccd1..e9626e307 100644 --- a/src/site/markdown/composum-variant/usage.md +++ b/src/site/markdown/composum-variant/usage.md @@ -115,6 +115,13 @@ Selecting one of these options replaces the 'Source' text area with the chosen t - **Hand edited content:** You can provide your own text as a base for the AI to work with. - **No Text Added:** If you like to generate text by just giving the AI some instructions that do not refer to any current text content, this is your choice. +- **External URL:** An URL field is provided, where you can enter the URL of a web page. The text content of the URL + will be retrieved into the 'Source' text area. +- If the component itself or sibling components contain paths into the JCR - e.g. links to other pages or references to + parts of other pages - then up to 5 of these paths are listed as additional options. Selecting one of these options + replaces the 'Source' text area with the chosen text. +- If some of those references are images, these will be offered as well. The images can serve as input for the AI ( + currently the beta version of ChatGPT vision preview) e.g. to generate a description for the image. ### Predefined prompts @@ -134,6 +141,7 @@ text. Some examples are: - **Improve:** The AI improves the text into a businesslike informative style, fixing orthographical and grammar errors. - **SEO Description:** The AI creates a description for a web page with the given text, usable for search engine optimization (SEO). +- **Describe Image:** somewhat experimental: the AI describes a selected image.