Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configurable Google Speech to Text transcription #429

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,15 @@ sudo apt-get update && sudo apt-get install google-cloud-sdk google-cloud-sdk-ap
gcloud init
gcloud auth application-default login
```
It is possible to enable or disable the functionality of Google Cloud Speech to Text.
By default, the properties
`org.jitsi.jigasi.transcription.ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION=false`
and
`org.jitsi.jigasi.transcription.ENABLE_GOOGLE_PROFANITY_FILTER=false`
in
`jigasi-home/sip-communicator.properties`
disable automatic punctuation, profanity filter results for the transcription.
To change this, simply set the desired property to `true` or `false`.

Vosk configuration
==================
Expand Down Expand Up @@ -206,6 +215,11 @@ XMPP account must also be set to make Jigasi be able to join a conference room.
in plain text. Note that this will result in the chat being somewhat
spammed.</td>
</tr>
<tr>
<td>org.jitsi.jigasi.transcription.ENABLE_INTERIM_RESULTS</td>
<td>false</td>
<td>Whether or not to send interim non-final results. Note that interim results should be handled so that no repeated transcriptions are displayed to the user.</td>
</tr>
</table>

Call control MUCs (brewery)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,35 @@ public class GoogleCloudTranscriptionService
private final static String P_NAME_USE_VIDEO_MODEL
= "org.jitsi.jigasi.transcription.USE_VIDEO_MODEL";

/**
* Property name to determine whether the Google Speech API should get
* automatic punctuation
*/
private final static String P_NAME_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION
= "org.jitsi.jigasi.transcription.ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION";

/**
* Property name to determine whether the Google Speech API should censor
* profane words
*/
private final static String P_NAME_ENABLE_GOOGLE_PROFANITY_FILTER
= "org.jitsi.jigasi.transcription.ENABLE_GOOGLE_PROFANITY_FILTER";

/**
* The default value for the property USE_VIDEO_MODEL
*/
private final static boolean DEFAULT_VALUE_USE_VIDEO_MODEL = false;

/**
* The default value for the property ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION
*/
private final static boolean DEFAULT_VALUE_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION = false;

/**
* The default value for the property ENABLE_GOOGLE_PROFANITY_FILTER
*/
private final static boolean DEFAULT_VALUE_ENABLE_GOOGLE_PROFANITY_FILTER = false;

/**
* Check whether the given string contains a supported language tag
*
Expand Down Expand Up @@ -224,6 +248,16 @@ private static void validateLanguageTag(String tag)
*/
private boolean useVideoModel;

/**
* Whether to get automatic punctuation
*/
private boolean enableAutomaticPunctuation;

/**
* Wheteher to enable profanity filter
*/
private boolean enableProfanityFilter;

/**
* Creates the RecognitionConfig the Google service uses based
* on the TranscriptionRequest
Expand Down Expand Up @@ -263,6 +297,12 @@ private RecognitionConfig getRecognitionConfig(TranscriptionRequest request)
builder.setModel("video");
}

// set punctuation mode
builder.setEnableAutomaticPunctuation(enableAutomaticPunctuation);

// set profanity filter
builder.setProfanityFilter(enableProfanityFilter);

// set the Language tag
String languageTag = request.getLocale().toLanguageTag();
validateLanguageTag(languageTag);
Expand All @@ -284,6 +324,12 @@ public GoogleCloudTranscriptionService()
{
useVideoModel = JigasiBundleActivator.getConfigurationService()
.getBoolean(P_NAME_USE_VIDEO_MODEL, DEFAULT_VALUE_USE_VIDEO_MODEL);

enableAutomaticPunctuation = JigasiBundleActivator.getConfigurationService()
.getBoolean(P_NAME_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION, DEFAULT_VALUE_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION);

enableProfanityFilter = JigasiBundleActivator.getConfigurationService()
.getBoolean(P_NAME_ENABLE_GOOGLE_PROFANITY_FILTER, DEFAULT_VALUE_ENABLE_GOOGLE_PROFANITY_FILTER);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.jitsi.jigasi.transcription;

import org.jitsi.jigasi.*;
import net.java.sip.communicator.service.protocol.*;
import org.json.*;

Expand All @@ -31,11 +32,27 @@ public class RemotePublisherTranscriptionHandler
extends LocalJsonTranscriptHandler
implements TranscriptionEventListener
{
/**
* Property name to determine whether to send the interim results
*/
private final static String P_NAME_ENABLE_INTERIM_RESULTS
= "org.jitsi.jigasi.transcription.ENABLE_INTERIM_RESULTS";

/**
* The default value for the property ENABLE_INTERIM_RESULTS
*/
private final static boolean DEFAULT_VALUE_ENABLE_INTERIM_RESULTS = false;

/**
* List of remote services to notify for transcriptions.
*/
private List<String> urls = new ArrayList<>();

/**
* Whether to send interim non-final results
*/
private boolean enableInterimResults;

/**
* Constructs RemotePublisherTranscriptionHandler, initializing its config.
*
Expand All @@ -52,12 +69,15 @@ public RemotePublisherTranscriptionHandler(String urlsStr)
{
urls.add(tokens.nextToken().trim());
}

enableInterimResults = JigasiBundleActivator.getConfigurationService()
.getBoolean(P_NAME_ENABLE_INTERIM_RESULTS, DEFAULT_VALUE_ENABLE_INTERIM_RESULTS);
}

@Override
public void publish(ChatRoom room, TranscriptionResult result)
{
if (result.isInterim())
if (!enableInterimResults && result.isInterim())
return;

JSONObject eventObject = createTranscriptionJSONObject(result);
Expand Down