diff --git a/.eslintrc.json b/.eslintrc.json index 6218e8c..10c188c 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -2,15 +2,25 @@ "root": true, "extends": ["eslint:recommended", "plugin:@typescript-eslint/recommended"], "parser": "@typescript-eslint/parser", - "parserOptions": { "project": ["./tsconfig.json"] }, + "parserOptions": { + "project": ["./tsconfig.json"] + }, "plugins": ["@typescript-eslint"], "rules": {}, - "ignorePatterns": [ - "/*.js", - "/*.ts", - "tests", - "dist", - "node_modules", - "scripts" + "ignorePatterns": ["/*.js", "/*.ts", "dist", "node_modules"], + "overrides": [ + { + "files": ["tests/**/*"], + "parserOptions": { + "project": ["./tsconfig.test.json"] + } + }, + + { + "files": ["scripts/**/*"], + "parserOptions": { + "project": ["./tsconfig.scripts.json"] + } + } ] } diff --git a/CHANGELOG.md b/CHANGELOG.md index 09b23e6..8f5258d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## [4.0.0] + +### Added + +- Add `browser` and `browser:min` exports, at `dist/index.umd.js` and `dist/index.umd.min.js`. These exports are browser compatible versions of the SDK, with a few limitations. You can't use the file system and you have to use a temporary auth token with the real-time transcriber. + +### Changed + +- `RealtimeService.sendAudio` accepts audio via type `ArrayBufferLike`. +- **Breaking**: `RealtimeService.stream` returns a [WHATWG Streams Standard stream](https://nodejs.org/api/webstreams.html), instead of a Node stream. In the browser, the native web standard stream will be used. +- `ws` is used as the WebSocket client as before, but in the browser, the native WebSocket client is used. +- Rename Node SDK to JavaScript SDK as the SDK is compatible with more runtimes now. + ## [3.1.1] - 2023-11-21 ### Added diff --git a/README.md b/README.md index f11e9d9..e7cc3d3 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,11 @@ [![Discord](https://img.shields.io/discord/875120158014853141?logo=discord&label=Discord&link=https%3A%2F%2Fdiscord.com%2Fchannels%2F875120158014853141&style=social) ](https://assemblyai.com/discord) -# AssemblyAI Node.js SDK +# AssemblyAI JavaScript SDK -The AssemblyAI Node.js SDK provides an easy-to-use interface for interacting with the AssemblyAI API, +The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models. +It is written primarily for Node.js in TypeScript with all types exported, but also [compatible with other runtimes](./docs/compat.md). ## Installation @@ -51,7 +52,7 @@ You can now use the `client` object to interact with the AssemblyAI API. ## Create a transcript -When you create a transcript, you can either pass in a URL to an audio file, or upload a file directly. +When you create a transcript, you can either pass in a URL to an audio file or upload a file directly. ```javascript // Transcribe file at remote URL @@ -242,10 +243,10 @@ getAudio((chunk) => { }); ``` -Or send audio data via a stream by piping to the realtime stream. +Or send audio data via a stream by piping to the real-time stream. ```typescript -audioStream.pipe(rt.stream()); +audioStream.pipeTo(rt.stream()); ``` Close the connection when you're finished. diff --git a/docs/compat.md b/docs/compat.md new file mode 100644 index 0000000..970f24a --- /dev/null +++ b/docs/compat.md @@ -0,0 +1,59 @@ +# SDK Compatibility + +The JavaScript SDK is developed for Node.js but is also compatible with other runtimes +such as the browser, Deno, Bun, Cloudflare Workers, etc. + +## Browser compatibility + +To make the SDK compatible with the browser, the SDK aims to use web standards as much as possible. +However, there are still incompatibilities between Node.js and the browser. + +- `RealtimeService` doesn't support the AssemblyAI API key in the browser. + Instead, you have to generate a temporary auth token using `client.realtime.createTemporaryToken`, and pass in the resulting token to the real-time transcriber. + + Generate a temporary auth token on the server. + + ```js + import { AssemblyAI } from "assemblyai" + // Ideally, to avoid embedding your API key client side, + // you generate this token on the server, and pass it to the client via an API. + const client = new AssemblyAI({ apiKey: "YOUR_API_KEY" }); + const token = await client.realtime.createTemporaryToken({ expires_in = 480 }); + ``` + + > [!NOTE] + > We recommend generating the token on the server, so you don't embed your AssemblyAI API key in your client app. + > If you embed the API key on the client, everyone can see it and use it for themselves. + + Then pass the token via an API to the client. + On the client, create an instance of `RealtimeService` using the token. + + ```js + import { RealtimeService } from "assemblyai"; + // or the following if you're using UMD + // const { RealtimeService } = assemblyai; + + const token = getToken(); // getToken is a function for you to implement + + const rt = new RealtimeService({ + token: token, + }); + ``` + +- You can't pass local audio file paths to `client.files.upload`, `client.transcripts.transcribe`, and `client.transcripts.submit`. If you do, you'll get the following error: "Function is not supported in this environment.". + If you want to transcribe audio files, you must use a public URL, a stream, or a buffer. + +> [!WARNING] +> The SDK is usable from the browser, but we strongly recommend you don't embed the AssemblyAI API key into your client apps. +> If you embed the API key on the client, everyone can see it and use it for themselves. +> Instead, create use the SDK on the server and provide APIs for your client to call. + +## Deno, Bun, Cloudflare Workers, etc. + +Most server-side JavaScript runtimes include a compatibility layer with Node.js. +Our SDK is developed for Node.js, which makes it compatible with other runtimes through their compatibility layer. +The bugs in these compatibility layers may introduce issues in our SDK. + +## Report issues + +If you find any (undocumented) bugs when using the SDK, [submit a GitHub issue](https://github.com/AssemblyAI/assemblyai-node-sdk). We'll try to fix it or at least document the compatibility issue. diff --git a/package.json b/package.json index d218430..0fb0b91 100644 --- a/package.json +++ b/package.json @@ -1,12 +1,13 @@ { "name": "assemblyai", - "version": "3.1.3", - "description": "The AssemblyAI Node.js SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.", + "version": "4.0.0", + "description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.", "exports": { ".": { "types": "./dist/index.d.ts", "import": "./dist/index.mjs", "require": "./dist/index.cjs", + "browser": "./dist/index.browser.js", "default": "./dist/index.cjs" }, "./package.json": "./package.json" @@ -21,7 +22,7 @@ "url": "git+https://github.com/AssemblyAI/assemblyai-node-sdk.git" }, "publishConfig": { - "tag": "latest", + "tag": "beta", "access": "public", "registry": "https://registry.npmjs.org/" }, @@ -51,8 +52,12 @@ "src" ], "devDependencies": { + "@rollup/plugin-alias": "^5.0.1", + "@rollup/plugin-node-resolve": "^15.2.3", + "@rollup/plugin-terser": "^0.4.4", "@types/jest": "^29.5.5", "@types/node": "^20.5.7", + "@types/websocket": "^1.0.8", "@types/ws": "^8.5.5", "@typescript-eslint/eslint-plugin": "^6.7.5", "dotenv": "^16.3.1", @@ -78,6 +83,8 @@ "typescript": "^5.2.2" }, "dependencies": { + "@swimburger/isomorphic-streams": "^1.0.5", + "isomorphic-ws": "^5.0.0", "ws": "^8.13.0" } } diff --git a/rollup.config.js b/rollup.config.js index 1647e74..c34ab3c 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -1,26 +1,81 @@ const pkg = require("./package.json"); const ts = require("rollup-plugin-typescript2"); +const terser = require("@rollup/plugin-terser"); +const alias = require("@rollup/plugin-alias"); +const { nodeResolve } = require("@rollup/plugin-node-resolve"); + +const cjsFile = pkg.main; +const esmFile = pkg.module; +const browserFile = pkg.exports["."].browser; + +const defaultPlugins = [ + ts({ + tsconfigOverride: { exclude: ["**/*.test.ts"] }, + }), +]; +const defaultConfig = { + plugins: defaultPlugins, + external: ["fs", "isomorphic-ws", "@swimburger/isomorphic-streams"], + input: "src/index.ts", +}; + +const browserConfig = { + ...defaultConfig, + plugins: [ + ...defaultConfig.plugins, + alias({ + entries: [{ find: "fs", replacement: "./src/browser/fs.ts" }], + }), + nodeResolve({ browser: true }), + ], + external: [], +}; module.exports = [ { - plugins: [ - ts({ - tsconfigOverride: { exclude: ["**/*.test.ts"] }, - }), - ], - external: ["axios", "fs", "stream", "ws"], - input: "src/index.ts", + ...defaultConfig, output: [ { - file: pkg.main, + file: cjsFile, format: "cjs", exports: "named", }, { - file: pkg.module, + file: esmFile, format: "es", exports: "named", }, ], }, + { + ...browserConfig, + output: [ + { + name: "assemblyai", + file: browserFile, + format: "esm", + }, + ], + }, + { + ...browserConfig, + output: [ + { + name: "assemblyai", + file: "./dist/assemblyai.umd.js", + format: "umd", + }, + ], + }, + { + ...browserConfig, + plugins: [...browserConfig.plugins, terser()], + output: [ + { + name: "assemblyai", + file: "./dist/assemblyai.umd.min.js", + format: "umd", + }, + ], + }, ]; diff --git a/scripts/generate-types.ts b/scripts/generate-types.ts index a5ed488..7df4b6e 100644 --- a/scripts/generate-types.ts +++ b/scripts/generate-types.ts @@ -6,7 +6,7 @@ async function generateTypes(apiSpecPath: string, outputPath: string) { let output = await openapiTS(localPath, { alphabetize: true, exportType: true, - transform(schemaObject, metadata) { + transform(schemaObject) { if ( "x-fern-type" in schemaObject && schemaObject["x-fern-type"] === "datetime" @@ -26,7 +26,7 @@ async function generateTypes(apiSpecPath: string, outputPath: string) { output.indexOf("\n };\n responses", schemasPosition) ) // Turn components["schemas"]["{TYPE_NAME}"] into TYPE_NAME - .replace(/components\[\"schemas\"]\[\"(\w*)\"\]/gm, "$1") + .replace(/components\["schemas"]\["(\w*)"\]/gm, "$1") .split("\n") // De-indent everything by 4 .map((l) => l.substring(4)) diff --git a/scripts/kitchensink.ts b/scripts/kitchensink.ts index b0dca86..e64b15b 100644 --- a/scripts/kitchensink.ts +++ b/scripts/kitchensink.ts @@ -7,6 +7,7 @@ import { LemurBaseResponse, PartialTranscript, RealtimeTranscript, + CreateRealtimeServiceParams, TranscribeParams, } from "../src"; @@ -16,15 +17,17 @@ const client = new AssemblyAI({ (async function transcribeUsingRealtime() { const useToken = false; - const serviceParams: any = { - sampleRate: 16_000, - wordBoost: ["gore", "climate"], - }; + let token: undefined | string = undefined; if (useToken) { - serviceParams.token = await client.realtime.createTemporaryToken({ + token = await client.realtime.createTemporaryToken({ expires_in: 480, }); } + const serviceParams: CreateRealtimeServiceParams = { + sampleRate: 16_000, + wordBoost: ["gore", "climate"], + token: token, + }; const rt = client.realtime.createService(serviceParams); rt.on("open", ({ sessionId, expiresAt }) => { @@ -282,7 +285,7 @@ const transcribeParams: TranscribeParams = { ); console.log(page); nextPageUrl = page.page_details.next_url; - } while (!!nextPageUrl); + } while (nextPageUrl); })(); async function searchTranscript(transcript: Transcript) { diff --git a/src/browser/fs.ts b/src/browser/fs.ts new file mode 100644 index 0000000..9f89f2f --- /dev/null +++ b/src/browser/fs.ts @@ -0,0 +1,8 @@ +function throwError() { + throw new Error("Function is not supported in this environment."); +} + +export const createReadStream = throwError; +export default { + createReadStream, +}; diff --git a/src/services/files/index.ts b/src/services/files/index.ts index fa60cb5..3c81fbc 100644 --- a/src/services/files/index.ts +++ b/src/services/files/index.ts @@ -1,5 +1,3 @@ -// import the fs module instead if specific named exports -// to keep the assemblyai module more compatible. Some fs polyfills don't include `createReadStream`. import fs from "fs"; import { BaseService } from "../base"; import { UploadedFile, FileUploadParams, FileUploadData } from "../.."; diff --git a/src/services/realtime/service.ts b/src/services/realtime/service.ts index c17a6dd..db563a9 100644 --- a/src/services/realtime/service.ts +++ b/src/services/realtime/service.ts @@ -1,4 +1,6 @@ -import WebSocket from "ws"; +import { WritableStream } from "@swimburger/isomorphic-streams"; +import WebSocket from "isomorphic-ws"; +import { ErrorEvent, MessageEvent, CloseEvent } from "ws"; import { RealtimeEvents, RealtimeListeners, @@ -14,7 +16,6 @@ import { RealtimeErrorMessages, RealtimeErrorType, } from "../../utils/errors"; -import Stream from "stream"; const defaultRealtimeUrl = "wss://api.assemblyai.com/v2/realtime/ws"; @@ -88,16 +89,15 @@ export class RealtimeService { const url = this.connectionUrl(); - let headers; if (this.token) { - headers = undefined; - } else if (this.apiKey) { - headers = { Authorization: this.apiKey }; + this.socket = new WebSocket(url.toString()); + } else { + this.socket = new WebSocket(url.toString(), { + headers: { Authorization: this.apiKey }, + }); } - this.socket = new WebSocket(url.toString(), { headers }); - - this.socket.onclose = ({ code, reason }: WebSocket.CloseEvent) => { + this.socket.onclose = ({ code, reason }: CloseEvent) => { if (!reason) { if (code in RealtimeErrorType) { reason = RealtimeErrorMessages[code as RealtimeErrorType]; @@ -106,12 +106,12 @@ export class RealtimeService { this.listeners.close?.(code, reason); }; - this.socket.onerror = (errorEvent: WebSocket.ErrorEvent) => { - if (errorEvent.error) this.listeners.error?.(errorEvent.error as Error); - else this.listeners.error?.(new Error(errorEvent.message)); + this.socket.onerror = (event: ErrorEvent) => { + if (event.error) this.listeners.error?.(event.error as Error); + else this.listeners.error?.(new Error(event.message)); }; - this.socket.onmessage = ({ data }: WebSocket.MessageEvent) => { + this.socket.onmessage = ({ data }: MessageEvent) => { const message = JSON.parse(data.toString()) as RealtimeMessage; if ("error" in message) { this.listeners.error?.(new RealtimeError(message.error)); @@ -150,25 +150,35 @@ export class RealtimeService { }); } - sendAudio(audio: ArrayBuffer) { + sendAudio(audio: ArrayBufferLike) { if (!this.socket || this.socket.readyState !== WebSocket.OPEN) { throw new Error("Socket is not open for communication"); } - + let audioData; + if (typeof Buffer !== "undefined") { + audioData = Buffer.from(audio).toString("base64"); + } else { + // Buffer is not available in the browser by default + // https://stackoverflow.com/a/42334410/2919731 + audioData = btoa( + new Uint8Array(audio).reduce( + (data, byte) => data + String.fromCharCode(byte), + "" + ) + ); + } const payload = { - audio_data: Buffer.from(audio).toString("base64"), + audio_data: audioData, }; this.socket.send(JSON.stringify(payload)); } - stream(): NodeJS.WritableStream { - const stream = new Stream.Writable({ - write: (chunk: Buffer, encoding, next) => { + stream(): WritableStream { + return new WritableStream({ + write: (chunk: ArrayBufferLike) => { this.sendAudio(chunk); - next(); }, }); - return stream; } async close(waitForSessionTermination = true) { @@ -185,7 +195,7 @@ export class RealtimeService { this.socket.send(terminateSessionMessage); } } - this.socket.removeAllListeners(); + if ("removeAllListeners" in this.socket) this.socket.removeAllListeners(); this.socket.close(); } diff --git a/tests/__mocks__/ws.ts b/tests/__mocks__/ws.ts index 77bc2ad..d5b176d 100644 --- a/tests/__mocks__/ws.ts +++ b/tests/__mocks__/ws.ts @@ -1,6 +1,8 @@ import { WebSocket } from "mock-socket"; export default class MockWebSocket extends WebSocket { + // the unused `options` parameter is required for properly mocking the ws WebSocket class + // eslint-disable-next-line @typescript-eslint/no-unused-vars constructor(address: string | URL, options?: unknown) { super(address); } diff --git a/tests/file.test.ts b/tests/file.test.ts index a6c1c8e..5833b44 100644 --- a/tests/file.test.ts +++ b/tests/file.test.ts @@ -1,15 +1,8 @@ -import { AssemblyAI } from "../src"; import { createReadStream } from "fs"; import { readFile } from "fs/promises"; import fetchMock from "jest-fetch-mock"; import path from "path"; -import { Stream } from "stream"; -import { - createClient, - defaultApiKey, - defaultBaseUrl, - requestMatches, -} from "./utils"; +import { createClient, requestMatches } from "./utils"; fetchMock.enableMocks(); diff --git a/tests/lemur.test.ts b/tests/lemur.test.ts index 7a7d06e..e0b6aab 100644 --- a/tests/lemur.test.ts +++ b/tests/lemur.test.ts @@ -1,5 +1,4 @@ import fetchMock from "jest-fetch-mock"; -import { AssemblyAI } from "../src"; import { createClient, requestMatches } from "./utils"; const knownTranscriptIds = ["transcript_123"]; diff --git a/tests/realtime.test.ts b/tests/realtime.test.ts index c856742..034b44e 100644 --- a/tests/realtime.test.ts +++ b/tests/realtime.test.ts @@ -1,3 +1,4 @@ +import { TransformStream } from "stream/web"; import WS from "jest-websocket-mock"; import fetchMock from "jest-fetch-mock"; import { AssemblyAI, RealtimeService } from "../src"; @@ -6,7 +7,7 @@ import { RealtimeErrorType, RealtimeErrorMessages, } from "../src/utils/errors/realtime"; -import stream from "stream"; + import { createClient, defaultApiKey, requestMatches } from "./utils"; fetchMock.enableMocks(); @@ -141,9 +142,11 @@ describe("realtime", () => { }); it("can send audio using stream", async () => { - const writeStream = new stream.PassThrough(); - writeStream.pipe(rt.stream()); - writeStream.write(Buffer.alloc(5_000)); + const stream = new TransformStream(); + const writer = stream.writable.getWriter(); + stream.readable.pipeTo(rt.stream()); + await writer.ready; + writer.write(Buffer.alloc(5_000)); await expect(server).toReceiveMessage( JSON.stringify({ audio_data: Buffer.alloc(5_000).toString("base64") }) ); diff --git a/tests/transcript.test.ts b/tests/transcript.test.ts index 87e3f97..e2b0997 100644 --- a/tests/transcript.test.ts +++ b/tests/transcript.test.ts @@ -1,4 +1,3 @@ -import { AssemblyAI } from "../src"; import fetchMock from "jest-fetch-mock"; import path from "path"; import {