Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dry-run + ability to specify models + minor edits #39

Merged
merged 4 commits into from
Sep 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ In the age of OTT platforms, there are still some who prefer to download movies/
* For a GPU build that is reusable (saving time on instantiating the program):
```bash
$ docker build --build-arg BASEIMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 --build-arg DEPSLIST=requirements-gpu.txt -t autosub-base . && \
docker run --gpus all --name autosub-base autosub-base || \
docker commit autosub-base autosub-instance
docker run --gpus all --name autosub-base autosub-base --dry-run || \
docker commit --change 'CMD []' autosub-base autosub-instance
```
* Then
```bash
Expand Down
6 changes: 3 additions & 3 deletions autosub/audioProcessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import sys
import subprocess
import numpy as np

from os.path import basename

def extract_audio(input_file, audio_file_name):
"""Extract audio from input video file and save to audio/ in root dir
Expand All @@ -17,8 +17,8 @@ def extract_audio(input_file, audio_file_name):
try:
command = ["ffmpeg", "-hide_banner", "-loglevel", "warning", "-i", input_file, "-ac", "1", "-ar", "16000",
"-vn", "-f", "wav", audio_file_name]
ret = subprocess.call(command)
print("Extracted audio to audio/{}".format(audio_file_name.split("/")[-1]))
ret = subprocess.run(command).returncode
print("Extracted audio to audio/{}".format(basename(audio_file_name)))
except Exception as e:
print("Error: ", str(e))
sys.exit(1)
Expand Down
160 changes: 126 additions & 34 deletions autosub/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,52 +107,130 @@ def ds_process_audio(ds, audio_file, output_file_handle_dict, split_duration):
def main():
global line_count
print("AutoSub\n")

for x in os.listdir():
if x.endswith(".pbmm"):
print("Model: ", os.path.join(os.getcwd(), x))
ds_model = os.path.join(os.getcwd(), x)
if x.endswith(".scorer"):
print("Scorer: ", os.path.join(os.getcwd(), x))
ds_scorer = os.path.join(os.getcwd(), x)

# Load DeepSpeech model
try:
ds = Model(ds_model)
except:
print("Invalid model file. Exiting\n")
sys.exit(1)

try:
ds.enableExternalScorer(ds_scorer)
except:
print("Invalid scorer file. Running inference using only model file\n")


#Parsing Arguments Section
supported_output_formats = ['srt', 'vtt', 'txt']
parser = argparse.ArgumentParser(description="AutoSub")
parser.add_argument('--file', required=True,
help='Input video file')

#Runtime Options
parser.add_argument('--format', choices=supported_output_formats, nargs='+',
help='Create only certain output formats rather than all formats',
default=supported_output_formats)
parser.add_argument('--split-duration', type=float,
help='Split run-on sentences exceededing this duration (in seconds) into multiple subtitles',
default=5)
args = parser.parse_args()
parser.add_argument('--dry-run', dest="dry_run", action="store_true",
help="Perform dry-run to verify options prior to running. Also useful to instantiate cuda/tensorflow cache prior to running multiple times.")

if os.path.isfile(args.file):
input_file = args.file
print("\nInput file:", input_file)
#Files that should be supplied
#File no longer required here, but will check manually later
#Basically EITHER file OR dry-run is sufficient
parser.add_argument('--file', required=False,
help='Input video file')
parser.add_argument('--model', required=False,
help='Input *.pbmm model file')
parser.add_argument('--scorer', required=False,
help='Input *.scorer file')

args = parser.parse_args()

#Please keep the following because I need it for verifying dockerfiles.
print(sys.argv[0:])
print("ARGS:", args)

def getmodel(args, arg_name):
#prioritize supplied argument

if arg_name == 'model':
arg_extension = '.pbmm'
elif arg_name == 'scorer':
arg_extension = '.scorer'
else:
print("Coding Error. This function only accepts model or scorer for arg_name.")
sys.exit(1)

arg = args.__getattribute__(arg_name)

if arg is not None:
model = os.path.abspath(arg)
if not os.path.isfile(model):
print(f"Error. Supplied file {arg} doesn't exist. Please supply a valid {arg_name} file via the --{arg_name} flag.")
sys.exit(1)
else:
#try to find local models
models_ = os.listdir()
models = []
for file in models_:
if file.endswith(arg_extension):
models.append(file)
del(models_)

num_models = len(models)

if num_models == 0:
print(f"Warning no {arg_name}s specified via --{arg_name} and none found in local directory. Please run getmodel.sh convenience script from autosub repo to get some.")
if arg_name == 'model':
print("Error: Must have pbmm model. Exiting")
sys.exit(1)
else:
model = ''
elif num_models != 1:
print(f"Warning. Detected multiple *{arg_extension} files in local dir. You must specify which one you wish to use via the --{arg_name} field. Details: \n {num_models} {models}")
if arg_name == 'model':
print("Error: Must specify pbmm model. Exiting")
sys.exit(1)
else:
print("Since I cannot know which scorer you wish to use, I just won't use any and try to run inference without it.")
model = ''
else:
model = os.path.abspath(models[0])

print(f"{arg_name}: ", model)
return(model)

def InstantiateModel(model, scorer):
# Load DeepSpeech model
try:
ds = Model(model)
except:
print("Invalid model file. Exiting\n")
sys.exit(1)

try:
ds.enableExternalScorer(scorer)
except:
print("Invalid scorer file. Running inference using only model file\n")
return(ds)


ds_model = getmodel(args, 'model')
ds_scorer = getmodel(args, 'scorer')

if args.dry_run:
InstantiateModel(ds_model, ds_scorer)
if args.file is not None:
if not os.path.isfile(args.file):
print(f"Error: {args.file}: No such file exists")
sys.exit(0)

#Not a dry-run
if args.file is not None:
if os.path.isfile(args.file):
input_file = args.file
print("\nInput file:", input_file)
else:
print(args.file, ": No such file exists")
sys.exit(1)
else:
print(args.file, ": No such file exists")
print("Error. You must supply a file with --file or to instantiate cuda cache you must supply a --dry-run.")
sys.exit(1)

base_directory = os.getcwd()
output_directory = os.path.join(base_directory, "output")
audio_directory = os.path.join(base_directory, "audio")
video_prefix = os.path.splitext(os.path.basename(input_file))[0]
audio_file_name = os.path.join(audio_directory, video_prefix + ".wav")

output_file_handle_dict = {}
for format in args.format:
output_filename = os.path.join(output_directory, video_prefix + "." + format)
Expand Down Expand Up @@ -183,11 +261,25 @@ def main():

print("\nRunning inference:")

for filename in tqdm(sort_alphanumeric(os.listdir(audio_directory))):
# Only run inference on relevant files, and don't run inference on the original audio file
if filename.startswith(video_prefix) and (filename != os.path.basename(audio_file_name)):
audio_segment_path = os.path.join(audio_directory, filename)
ds_process_audio(ds, audio_segment_path, output_file_handle_dict, split_duration=args.split_duration)
#Remove master audio file
audiofiles=sort_alphanumeric(os.listdir(audio_directory))
audiofiles.remove(os.path.basename(audio_file_name))

#Remove non related audiofiles potentially from other instances of autosub
audiofiles_ = []
for filename in audiofiles:
if filename.startswith(video_prefix):
audiofiles_.append(filename)
audiofiles = audiofiles_
del(audiofiles_)

#Process Segments

ds = InstantiateModel(ds_model, ds_scorer)

for filename in tqdm(audiofiles):
audio_segment_path = os.path.join(audio_directory, filename)
ds_process_audio(ds, audio_segment_path, output_file_handle_dict, split_duration=args.split_duration)

print("\n")
for format in output_file_handle_dict:
Expand Down
2 changes: 1 addition & 1 deletion scripts/gpu-build.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
cd "$(dirname "$(readlink -f "$0")")"
cd ..
docker build --build-arg BASEIMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 --build-arg DEPSLIST=requirements-gpu.txt -t autosub-base . && docker run --gpus all --name autosub-base autosub-base || docker commit autosub-base autosub-instance && docker container rm autosub-base
docker build --build-arg BASEIMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 --build-arg DEPSLIST=requirements-gpu.txt -t autosub-base . && docker run --gpus all --name autosub-base autosub-base --dry-run && docker commit --change 'CMD []' autosub-base autosub-instance && docker container rm autosub-base