Skip to content

Commit

Permalink
feat: cache generated TTS file
Browse files Browse the repository at this point in the history
  • Loading branch information
jersou committed Oct 6, 2024
1 parent bd4a041 commit 4c669a8
Show file tree
Hide file tree
Showing 14 changed files with 240 additions and 58 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
/gui/src-tauri/target/
/gui/dist
/gui/.vite
/.spg-TTS-cache/
14 changes: 12 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Studio-Pack-Generator
# Studio-Pack-Generator (SPG)

This project convert a folder or a RSS URL to
[Studio](https://github.com/marian-m12l/studio) pack zip for Lunii device, see
Expand Down Expand Up @@ -245,6 +245,9 @@ Options:
-u, --gui open GUI (on localhost:5555) [boolean] [default: false]
--port port of GUI server [number] [default: 5555]
--config-file json config file [string]
--skip-read-tts-cache disable the TTS cache usage [boolean] [default: false]
--skip-write-tts-cache disable the TTS cache write [boolean] [default: false]
--tts-cache-path path to the TTS cache [string] [default: "<SPG dir>/.spg-TTS-cache"]
```

Separate options by spaces, ex :
Expand Down Expand Up @@ -329,6 +332,10 @@ studio-pack-generator -x -o output/dir 2-full.zip
Note: it doesn't work well with "menu" nodes and with pack without "question"
stage.
## TTS cache
To speed up / save CPU
## json config file
The parameters can be imported from a json file with :
Expand Down Expand Up @@ -373,7 +380,10 @@ File format (all the properties are optionals) :
"coquiTtsModel": "tts_models/multilingual/multi-dataset/xtts_v2",
"coquiTtsLanguageIdx": "fr",
"coquiTtsSpeakerIdx": "Abrahan Mack",
"port": 5555
"port": 5555,
"skipWriteTtsCache": false,
"skipReadTtsCache": false,
"ttsCachePath": "/tmp/spg-tts-cache"
}
```
Expand Down
1 change: 1 addition & 0 deletions deno.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"@jersou/clite": "jsr:@jersou/clite@^0.5.0",
"@libs/xml": "jsr:@libs/xml@^6.0.0",
"@std/assert": "jsr:@std/assert@^1.0.6",
"@std/crypto": "jsr:@std/crypto@^1.0.3",
"@std/encoding": "jsr:@std/encoding@^1.0.5",
"@std/fmt": "jsr:@std/fmt@^1.0.2",
"@std/fs": "jsr:@std/fs@^1.0.4",
Expand Down
5 changes: 5 additions & 0 deletions deno.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 56 additions & 5 deletions generate/basic_tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import {
getPico2waveCommand,
} from "../utils/external_commands.ts";
import type { ModOptions } from "../types.ts";
import { cacheTtsFile, useCachedTtsFile } from "./tts_cache.ts";
import { bgRed } from "@std/fmt/colors";

let hasPico2waveWslCache: undefined | boolean;

Expand Down Expand Up @@ -40,6 +42,18 @@ export async function generate_audio_basic_tts(
if (
Deno.build.os === "windows" && (opt.skipWsl || !(await hasPico2waveWsl()))
) {
await windows_tts(outputPath, opt, title);
} else if (Deno.build.os === "darwin" && !(await hasPico2wave())) {
await macos_tts(outputPath, opt, title);
} else {
await pico2wave_tts(lang, outputPath, opt, title);
}
}

async function windows_tts(outputPath: string, opt: ModOptions, title: string) {
const cacheKey = ["windows_tts", title];

if (opt.skipReadTtsCache || !await useCachedTtsFile(outputPath, cacheKey)) {
const audioFormat = "[System.Speech.AudioFormat.SpeechAudioFormatInfo]::" +
"new(8000,[System.Speech.AudioFormat.AudioBitsPerSample]" +
"::Sixteen,[System.Speech.AudioFormat.AudioChannel]::Mono)";
Expand All @@ -52,18 +66,48 @@ export async function generate_audio_basic_tts(
`$speak.Speak(" . ${title.replace(/["' ]/g, " ")} . "); ` +
`$speak.Dispose();`,
];
await $`PowerShell ${args}`.noThrow();
} else if (Deno.build.os === "darwin" && !(await hasPico2wave())) {
const res = await $`PowerShell ${args}`.noThrow();
if (res.code === 0) {
if (!opt.skipWriteTtsCache) {
await cacheTtsFile(outputPath, cacheKey);
}
} else {
console.log(bgRed(`windows_tts gen KO for "${title}"`));
}
}
}

async function macos_tts(outputPath: string, opt: ModOptions, title: string) {
const cacheKey = ["macos_tts", title];
if (opt.skipReadTtsCache || !await useCachedTtsFile(outputPath, cacheKey)) {
const args = [
"-o",
convertPath(outputPath, opt),
"--file-format",
"WAVE",
"--data-format",
"LEF32@22050",
title,
];
await $`say ${args}`.noThrow();
} else {
const res = await $`say ${args}`.noThrow();
if (res.code === 0) {
if (!opt.skipWriteTtsCache) {
await cacheTtsFile(outputPath, cacheKey);
}
} else {
console.log(bgRed(`macos_tts gen KO for "${title}"`));
}
}
}

async function pico2wave_tts(
lang: string,
outputPath: string,
opt: ModOptions,
title: string,
) {
const cacheKey = ["pico2wave_tts", title, lang];
if (opt.skipReadTtsCache || !await useCachedTtsFile(outputPath, cacheKey)) {
const pico2waveCommand = await getPico2waveCommand();
const cmd = [
pico2waveCommand[0],
Expand All @@ -74,6 +118,13 @@ export async function generate_audio_basic_tts(
convertPath(outputPath, opt),
` . ${title} . `,
];
await $`${cmd}`.noThrow();
const res = await $`${cmd}`.noThrow();
if (res.code === 0) {
if (!opt.skipWriteTtsCache) {
await cacheTtsFile(outputPath, cacheKey);
}
} else {
console.log(bgRed(`pico2wave_tts gen KO for "${title}"`));
}
}
}
45 changes: 45 additions & 0 deletions generate/coqui_tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import type { ModOptions } from "../types.ts";
import { cacheTtsFile, useCachedTtsFile } from "./tts_cache.ts";
import { getCoquiCommand } from "../utils/external_commands.ts";
import $ from "@david/dax";
import { bgRed } from "@std/fmt/colors";

export async function generate_audio_with_coqui(
title: string,
opt: ModOptions,
outputPath: string,
) {
const cacheKey = [
"CoquiTts",
title,
opt.coquiTtsSpeakerIdx,
opt.coquiTtsLanguageIdx,
opt.coquiTtsModel,
];
if (opt.skipReadTtsCache || !await useCachedTtsFile(outputPath, cacheKey)) {
const coquiCommand = await getCoquiCommand();
const cmd = [
...coquiCommand,
"--text",
title,
"--model_name",
opt.coquiTtsModel,
"--out_path",
outputPath,
];
if (opt.coquiTtsLanguageIdx) {
cmd.push("--language_idx", opt.coquiTtsLanguageIdx);
}
if (opt.coquiTtsSpeakerIdx) {
cmd.push("--speaker_idx", opt.coquiTtsSpeakerIdx);
}
const res = await $`${cmd}`.noThrow(true);
if (res.code === 0) {
if (!opt.skipWriteTtsCache) {
await cacheTtsFile(outputPath, cacheKey);
}
} else {
console.log(bgRed(`Coqui gen KO for "${title}"`));
}
}
}
28 changes: 4 additions & 24 deletions generate/gen_audio.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { generate_audio_basic_tts } from "./basic_tts.ts";
import { generate_audio_with_openAI } from "./openai_tts.ts";
import type { ModOptions } from "../types.ts";
import { getCoquiCommand } from "../utils/external_commands.ts";
import $ from "@david/dax";
import { generate_audio_with_coqui } from "./coqui_tts.ts";

export async function generateAudio(
title: string,
Expand All @@ -11,29 +10,10 @@ export async function generateAudio(
opt: ModOptions,
) {
if (opt.useOpenAiTts) {
await generate_audio_with_openAI(
title,
outputPath.replace(/\.wav/i, ".mp3"),
opt,
);
const output = outputPath.replace(/\.wav/i, ".mp3");
await generate_audio_with_openAI(title, output, opt);
} else if (opt.useCoquiTts) {
const coquiCommand = await getCoquiCommand();
const cmd = [
...coquiCommand,
"--text",
title,
"--model_name",
opt.coquiTtsModel,
"--out_path",
outputPath,
];
if (opt.coquiTtsLanguageIdx) {
cmd.push("--language_idx", opt.coquiTtsLanguageIdx);
}
if (opt.coquiTtsSpeakerIdx) {
cmd.push("--speaker_idx", opt.coquiTtsSpeakerIdx);
}
await $`${cmd}`;
await generate_audio_with_coqui(title, opt, outputPath);
} else {
await generate_audio_basic_tts(title, outputPath, lang, opt);
}
Expand Down
4 changes: 2 additions & 2 deletions generate/gen_missing_items.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ export async function genMissingItems(
if (!opt.skipImageItemGen && !getFileImageItem(file, folder)) {
await generateImage(
getTitle(getNameWithoutExt(file.name)),
`${rootpath}/${getNameWithoutExt(file.name)}-generated.item.png`,
`${rootpath}/${getNameWithoutExt(file.name)}.item.png`,
opt.imageItemGenFont,
);
}
if (!opt.skipAudioItemGen && !getFileAudioItem(file, folder)) {
await generateAudio(
getTitle(getNameWithoutExt(file.name)),
`${rootpath}/${getNameWithoutExt(file.name)}-generated.item.wav`,
`${rootpath}/${getNameWithoutExt(file.name)}.item.wav`,
lang,
opt,
);
Expand Down
53 changes: 30 additions & 23 deletions generate/openai_tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { bgRed, blue } from "@std/fmt/colors";
import $ from "@david/dax";

import type { ModOptions } from "../types.ts";
import { cacheTtsFile, useCachedTtsFile } from "./tts_cache.ts";

let openAI_client: OpenAI;

Expand All @@ -11,30 +12,36 @@ export async function generate_audio_with_openAI(
outputPath: string,
opt: ModOptions,
) {
if (!openAI_client) {
if (opt?.openAiApiKey) {
openAI_client = new OpenAI({ apiKey: opt.openAiApiKey });
} else if (Deno.env.has("OPENAI_API_KEY")) {
openAI_client = new OpenAI();
const cacheKey = ["OpenAiTts", title, opt.openAiVoice, opt.openAiModel];
if (opt.skipReadTtsCache || !await useCachedTtsFile(outputPath, cacheKey)) {
if (!openAI_client) {
if (opt?.openAiApiKey) {
openAI_client = new OpenAI({ apiKey: opt.openAiApiKey });
} else if (Deno.env.has("OPENAI_API_KEY")) {
openAI_client = new OpenAI();
} else {
const apiKey = await $.prompt({
message: "OPENAI_API_KEY :",
mask: true,
});
openAI_client = new OpenAI({ apiKey });
}
}
const result = await openAI_client.audio.speech.create({
input: title,
response_format: "mp3",
model: opt?.openAiModel ?? "tts-1",
voice: opt?.openAiVoice ?? "onyx",
});
if (result.ok) {
console.log(blue(`OpenAI gen OK of "${title}" in ${outputPath}`));
const file = await Deno.open(outputPath, { create: true, write: true });
await result.body!.pipeTo(file.writable);
if (!opt.skipWriteTtsCache) {
await cacheTtsFile(outputPath, cacheKey);
}
} else {
const apiKey = await $.prompt({
message: "OPENAI_API_KEY :",
mask: true,
});
openAI_client = new OpenAI({ apiKey });
console.log(bgRed(`OpenAI gen KO for "${title}"`), result);
}
}
const result = await openAI_client.audio.speech.create({
input: title,
response_format: "mp3",
model: opt?.openAiModel ?? "tts-1",
voice: opt?.openAiVoice ?? "onyx",
});
if (result.ok) {
console.log(blue(`OpenAI gen OK of "${title}" in ${outputPath}`));
const file = await Deno.open(outputPath, { create: true, write: true });
await result.body!.pipeTo(file.writable);
} else {
console.log(bgRed(`OpenAI gen KO of ${title}`), result);
}
}
39 changes: 39 additions & 0 deletions generate/tts_cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { getSpgDirPath } from "../utils/utils.ts";
import { crypto } from "@std/crypto/crypto";
import { encodeHex } from "@std/encoding/hex";
import $ from "@david/dax";
import { green, yellow } from "@std/fmt/colors";

export function getDefaultTtsPath() {
return getSpgDirPath().resolve(".spg-TTS-cache");
}

export function getCachePath(key: (string | boolean | undefined)[]) {
const data = new TextEncoder().encode(JSON.stringify(key));
const sum = encodeHex(crypto.subtle.digestSync("MD5", data));
return getDefaultTtsPath().join(sum.substring(0, 2)).join(sum);
}

export async function cacheTtsFile(
output: string,
key: (string | undefined | boolean)[],
) {
const cachePath = getCachePath(key);
await cachePath.resolve("..").mkdir({ recursive: true });
await $.path(output).copyFile(cachePath);
}

export async function useCachedTtsFile(
output: string,
key: (string | undefined | boolean)[],
): Promise<boolean> {
const cachePath = getCachePath(key);
if (await cachePath.exists()) {
await cachePath.copyFile(output);
console.log(green(`use TTS cached for ${output}`));
return true;
} else {
console.log(yellow(`no TTS cache found for ${output}`));
return false;
}
}
9 changes: 9 additions & 0 deletions generate/tts_cache_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { assertEquals } from "@std/assert";
import { getCachePath, getDefaultTtsPath } from "./tts_cache.ts";

Deno.test("getCachePath", () => {
assertEquals(
(getCachePath(["aa", "bb"])).toString(),
getDefaultTtsPath().join("e9/e9350e939ff0f72285307e1a792fe739").toString(),
);
});
Loading

0 comments on commit 4c669a8

Please sign in to comment.