Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

avoid using stt-align-node! #101

Open
Niceblueman opened this issue Nov 27, 2023 · 0 comments
Open

avoid using stt-align-node! #101

Niceblueman opened this issue Nov 27, 2023 · 0 comments

Comments

@Niceblueman
Copy link

Hi! 👋

Firstly, thanks for your work on this project! 🙂

Today I used patch-package to patch slate-transcript-editor@0.1.6-alpha.19 for the project I'm working on.

Here is the diff that solved my problem:

diff --git a/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js b/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js
index be190f0..25a9686 100644
--- a/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js
+++ b/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js
@@ -1,8 +1,81 @@
-import { alignSTT } from 'stt-align-node';
+// import { alignSTT } from 'stt-align-node';
 import { shortTimecode } from '../../../timecode-converter';
 import countWords from '../../../count-words';
 import generatePreviousTimingsUpToCurrent from '../../../dpe-to-slate/generate-previous-timings-up-to-current';
+function alignSTT(sttWords, transcriptText, start, end) {
+  const sttWordsList = sttWords.words;
+  const opCodes = calculateDiff(sttWordsList, transcriptText);
+  const transcriptWords = convertRefTextToList(transcriptText);
+  const alignedResults = alignRefTextWithSTT(
+    opCodes,
+    sttWordsList,
+    transcriptWords,
+    start,
+    end
+  );
+  return alignedResults;
+}
+
+// Function to calculate the difference between two arrays of words
+function calculateDiff(array1, array2) {
+  const opCodes = [];
+
+  // Iterate over the arrays and find the differences
+  let i = 0;
+  let j = 0;
+
+  while (i < array1.length && j < array2.length) {
+    if (array1[i] === array2[j]) {
+      opCodes.push(['equal', i, i + 1, j, j + 1]);
+      i++;
+      j++;
+    } else {
+      opCodes.push(['delete', i, i + 1, j, j]);
+      i++;
+    }
+  }
 
+  // Handle remaining elements in array1
+  while (i < array1.length) {
+    opCodes.push(['delete', i, i + 1, j, j]);
+    i++;
+  }
+
+  // Handle remaining elements in array2
+  while (j < array2.length) {
+    opCodes.push(['insert', i, i, j, j + 1]);
+    j++;
+  }
+
+  return opCodes;
+}
+
+// Function to convert a text string to a list of words
+function convertRefTextToList(text) {
+  return text.split(/\s+/);
+}
+
+// Function to align reference text with STT output based on calculated diff
+function alignRefTextWithSTT(opCodes, sttWords, refWords, start, end) {
+  let alignedResults = [];
+
+  for (const op of opCodes) {
+    const [tag, i1, i2, j1, j2] = op;
+
+    if (tag === 'equal') {
+      alignedResults.push(...sttWords.slice(i1, i2));
+    } else if (tag === 'delete') {
+      alignedResults.push(...Array(i2 - i1).fill(''));
+    } else if (tag === 'insert') {
+      alignedResults.push(...refWords.slice(j1, j2));
+    }
+  }
+
+  // Trim the result based on the specified start and end indices
+  alignedResults = alignedResults.slice(start, end);
+
+  return alignedResults;
+}
 const createSlateContentFromSlateJsParagraphs = (currentContent, newEntities) => {
   // Update entites to block structure.
   const updatedBlockArray = [];

This issue body was partially generated by patch-package.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant