illgitthat · illgitthat · Oct 19, 2024 · Sep 9, 2024 · Sep 9, 2024 · Sep 10, 2024
diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml
@@ -1,5 +1,3 @@
-version: "3.8"
-
 services:
   app:
     build:

diff --git a/.env.example b/.env.example
@@ -111,6 +111,26 @@ ANTHROPIC_API_KEY=user_provided
 BINGAI_TOKEN=user_provided
 # BINGAI_HOST=https://cn.bing.com
 
+#=================#
+#   AWS Bedrock   #
+#=================#
+
+# BEDROCK_AWS_DEFAULT_REGION=us-east-1 # A default region must be provided
+# BEDROCK_AWS_ACCESS_KEY_ID=someAccessKey
+# BEDROCK_AWS_SECRET_ACCESS_KEY=someSecretAccessKey
+
+# Note: This example list is not meant to be exhaustive. If omitted, all known, supported model IDs will be included for you.
+# BEDROCK_AWS_MODELS=anthropic.claude-3-5-sonnet-20240620-v1:0,meta.llama3-1-8b-instruct-v1:0
+
+# See all Bedrock model IDs here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
+
+# Notes on specific models:
+# The following models are not support due to not supporting streaming:
+# ai21.j2-mid-v1
+
+# The following models are not support due to not supporting conversation history:
+# ai21.j2-ultra-v1, cohere.command-text-v14, cohere.command-light-text-v14
+
 #============#
 # Google     #
 #============#
@@ -126,6 +146,8 @@ GOOGLE_KEY=user_provided
 
 # GOOGLE_TITLE_MODEL=gemini-pro
 
+# GOOGLE_LOC=us-central1
+
 # Google Safety Settings
 # NOTE: These settings apply to both Vertex AI and Gemini API (AI Studio)
 #
@@ -392,6 +414,7 @@ LDAP_CA_CERT_PATH=
 # LDAP_LOGIN_USES_USERNAME=true
 # LDAP_ID=
 # LDAP_USERNAME=
+# LDAP_EMAIL=
 # LDAP_FULL_NAME=
 
 #========================#

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
diff --git a/.github/workflows/helmcharts.yml b/.github/workflows/helmcharts.yml
@@ -25,11 +25,9 @@ jobs:
       - name: Install Helm
         uses: azure/setup-helm@v4
         env:
-          GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"       
+          GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
 
       - name: Run chart-releaser
         uses: helm/chart-releaser-action@v1.6.0
-        with:
-          charts_dir: helmchart
         env:
-          CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+          CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-# v0.7.5-rc1
+# v0.7.5-rc2
 
 # Base node image
 FROM node:20-alpine AS node

diff --git a/Dockerfile.multi b/Dockerfile.multi
@@ -1,5 +1,5 @@
 # Dockerfile.multi
-# v0.7.5-rc1
+# v0.7.5-rc2
 
 # Base for all builds
 FROM node:20-alpine AS base

diff --git a/README.md b/README.md
@@ -42,10 +42,10 @@
 
 - 🖥️ UI matching ChatGPT, including Dark mode, Streaming, and latest updates
 - 🤖 AI model selection:
-  - OpenAI, Azure OpenAI, BingAI, ChatGPT, Google Vertex AI, Anthropic (Claude), Plugins, Assistants API (including Azure Assistants)
+  - Anthropic (Claude), AWS Bedrock, OpenAI, Azure OpenAI, BingAI, ChatGPT, Google Vertex AI, Plugins, Assistants API (including Azure Assistants)
 - ✅ Compatible across both **[Remote & Local AI services](https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints):**
   - groq, Ollama, Cohere, Mistral AI, Apple MLX, koboldcpp, OpenRouter, together.ai, Perplexity, ShuttleAI, and more
-- 🪄 Generative UI with [Code Artifacts](https://youtu.be/GfTj7O4gmd0?si=WJbdnemZpJzBrJo3)
+- 🪄 Generative UI with **[Code Artifacts](https://youtu.be/GfTj7O4gmd0?si=WJbdnemZpJzBrJo3)**
    - Create React, HTML code, and Mermaid diagrams right in chat
 - 💾 Create, Save, & Share Custom Presets
 - 🔀 Switch between AI Endpoints and Presets, mid-chat

diff --git a/api/app/clients/AnthropicClient.js b/api/app/clients/AnthropicClient.js
@@ -17,8 +17,8 @@ const {
   parseParamFromPrompt,
   createContextHandlers,
 } = require('./prompts');
+const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
-const { getModelMaxTokens, matchModelName } = require('~/utils');
 const { sleep } = require('~/server/utils');
 const BaseClient = require('./BaseClient');
 const { logger } = require('~/config');
@@ -64,6 +64,12 @@ class AnthropicClient extends BaseClient {
     /** Whether or not the model supports Prompt Caching
      * @type {boolean} */
     this.supportsCacheControl;
+    /** The key for the usage object's input tokens
+     * @type {string} */
+    this.inputTokensKey = 'input_tokens';
+    /** The key for the usage object's output tokens
+     * @type {string} */
+    this.outputTokensKey = 'output_tokens';
   }
 
   setOptions(options) {
@@ -114,7 +120,14 @@ class AnthropicClient extends BaseClient {
       this.options.maxContextTokens ??
       getModelMaxTokens(this.modelOptions.model, EModelEndpoint.anthropic) ??
       100000;
-    this.maxResponseTokens = this.modelOptions.maxOutputTokens || 1500;
+    this.maxResponseTokens =
+      this.modelOptions.maxOutputTokens ??
+      getModelMaxOutputTokens(
+        this.modelOptions.model,
+        this.options.endpointType ?? this.options.endpoint,
+        this.options.endpointTokenConfig,
+      ) ??
+      1500;
     this.maxPromptTokens =
       this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
 
@@ -138,17 +151,6 @@ class AnthropicClient extends BaseClient {
     this.endToken = '';
     this.gptEncoder = this.constructor.getTokenizer('cl100k_base');
 
-    if (!this.modelOptions.stop) {
-      const stopTokens = [this.startToken];
-      if (this.endToken && this.endToken !== this.startToken) {
-        stopTokens.push(this.endToken);
-      }
-      stopTokens.push(`${this.userLabel}`);
-      stopTokens.push('<|diff_marker|>');
-
-      this.modelOptions.stop = stopTokens;
-    }
-
     return this;
   }
 
@@ -200,15 +202,15 @@ class AnthropicClient extends BaseClient {
   }
 
   /**
-   * Calculates the correct token count for the current message based on the token count map and API usage.
+   * Calculates the correct token count for the current user message based on the token count map and API usage.
    * Edge case: If the calculation results in a negative value, it returns the original estimate.
    * If revisiting a conversation with a chat history entirely composed of token estimates,
    * the cumulative token count going forward should become more accurate as the conversation progresses.
    * @param {Object} params - The parameters for the calculation.
    * @param {Record<string, number>} params.tokenCountMap - A map of message IDs to their token counts.
    * @param {string} params.currentMessageId - The ID of the current message to calculate.
    * @param {AnthropicStreamUsage} params.usage - The usage object returned by the API.
-   * @returns {number} The correct token count for the current message.
+   * @returns {number} The correct token count for the current user message.
    */
   calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) {
     const originalEstimate = tokenCountMap[currentMessageId] || 0;
@@ -680,7 +682,11 @@ class AnthropicClient extends BaseClient {
    */
   checkPromptCacheSupport(modelName) {
     const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
-    if (modelMatch === 'claude-3-5-sonnet' || modelMatch === 'claude-3-haiku') {
+    if (
+      modelMatch === 'claude-3-5-sonnet' ||
+      modelMatch === 'claude-3-haiku' ||
+      modelMatch === 'claude-3-opus'
+    ) {
       return true;
     }
     return false;

diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js
@@ -2,6 +2,8 @@ const crypto = require('crypto');
 const fetch = require('node-fetch');
 const {
   supportsBalanceCheck,
+  isAgentsEndpoint,
+  isParamEndpoint,
   ErrorTypes,
   Constants,
   CacheKeys,
@@ -40,6 +42,14 @@ class BaseClient {
     this.conversationId;
     /** @type {string} */
     this.responseMessageId;
+    /** @type {TAttachment[]} */
+    this.attachments;
+    /** The key for the usage object's input tokens
+     * @type {string} */
+    this.inputTokensKey = 'prompt_tokens';
+    /** The key for the usage object's output tokens
+     * @type {string} */
+    this.outputTokensKey = 'completion_tokens';
   }
 
   setOptions() {
@@ -66,6 +76,17 @@ class BaseClient {
     throw new Error('Subclasses attempted to call summarizeMessages without implementing it');
   }
 
+  /**
+   * @returns {string}
+   */
+  getResponseModel() {
+    if (isAgentsEndpoint(this.options.endpoint) && this.options.agent && this.options.agent.id) {
+      return this.options.agent.id;
+    }
+
+    return this.modelOptions.model;
+  }
+
   /**
    * Abstract method to get the token count for a message. Subclasses must implement this method.
    * @param {TMessage} responseMessage
@@ -217,6 +238,7 @@ class BaseClient {
         userMessage,
         conversationId,
         responseMessageId,
+        sender: this.sender,
       });
     }
 
@@ -548,6 +570,7 @@ class BaseClient {
       });
     }
 
+    /** @type {string|string[]|undefined} */
     const completion = await this.sendCompletion(payload, opts);
     this.abortController.requestCompleted = true;
 
@@ -557,7 +580,7 @@ class BaseClient {
       parentMessageId: userMessage.messageId,
       isCreatedByUser: false,
       isEdited,
-      model: this.modelOptions.model,
+      model: this.getResponseModel(),
       sender: this.sender,
       promptTokens,
       iconURL: this.options.iconURL,
@@ -567,9 +590,14 @@ class BaseClient {
 
     if (typeof completion === 'string') {
       responseMessage.text = addSpaceIfNeeded(generation) + completion;
-    } else if (completion) {
+    } else if (
+      Array.isArray(completion) &&
+      isParamEndpoint(this.options.endpoint, this.options.endpointType)
+    ) {
       responseMessage.text = '';
       responseMessage.content = completion;
+    } else if (Array.isArray(completion)) {
+      responseMessage.text = addSpaceIfNeeded(generation) + completion.join('');
     }
 
     if (
@@ -587,8 +615,8 @@ class BaseClient {
        * @type {StreamUsage | null} */
       const usage = this.getStreamUsage != null ? this.getStreamUsage() : null;
 
-      if (usage != null && Number(usage.output_tokens) > 0) {
-        responseMessage.tokenCount = usage.output_tokens;
+      if (usage != null && Number(usage[this.outputTokensKey]) > 0) {
+        responseMessage.tokenCount = usage[this.outputTokensKey];
         completionTokens = responseMessage.tokenCount;
         await this.updateUserMessageTokenCount({ usage, tokenCountMap, userMessage, opts });
       } else {
@@ -603,6 +631,10 @@ class BaseClient {
       await this.userMessagePromise;
     }
 
+    if (this.artifactPromises) {
+      responseMessage.attachments = (await Promise.all(this.artifactPromises)).filter((a) => a);
+    }
+
     this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
     const messageCache = getLogStores(CacheKeys.MESSAGES);
     messageCache.set(
@@ -638,7 +670,7 @@ class BaseClient {
     /** @type {boolean} */
     const shouldUpdateCount =
       this.calculateCurrentTokenCount != null &&
-      Number(usage.input_tokens) > 0 &&
+      Number(usage[this.inputTokensKey]) > 0 &&
       (this.options.resendFiles ||
         (!this.options.resendFiles && !this.options.attachments?.length)) &&
       !this.options.promptPrefix;