VoskEngine.java

/*
 * Licensed to The Apereo Foundation under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 *
 * The Apereo Foundation licenses this file to you under the Educational
 * Community License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License
 * at:
 *
 *   http://opensource.org/licenses/ecl2.txt
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations under
 * the License.
 *
 */

package org.opencastproject.speechtotext.impl.engine;

import org.opencastproject.speechtotext.api.SpeechToTextEngine;
import org.opencastproject.speechtotext.api.SpeechToTextEngineException;
import org.opencastproject.util.IoSupport;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.osgi.service.component.ComponentContext;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Modified;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;

/** Vosk implementation of the Speech-to-text engine interface. */
@Component(
    property = {
        "service.description=Vosk implementation of the SpeechToTextEngine interface",
        "enginetype=vosk"
    }
)
public class VoskEngine implements SpeechToTextEngine {

  private static final Logger logger = LoggerFactory.getLogger(VoskEngine.class);

  /** Name of the engine. */
  private static final String engineName = "Vosk";

  /** Config key for setting the path to the vosk. */
  private static final String VOSK_EXECUTABLE_PATH_CONFIG_KEY = "vosk.root.path";

  /** Default path to vosk. */
  public static final String VOSK_EXECUTABLE_DEFAULT_PATH = "vosk-cli";

  /** Currently used path of the vosk installation. */
  private String voskExecutable = VOSK_EXECUTABLE_DEFAULT_PATH;

  /** Config key to set default language */
  private static final String VOSK_DEFAULT_LANGUAGE_KEY = "vosk.default.language";

  /** Default Language */
  public static final String VOSK_DEFAULT_LANGUAGE = "eng";

  /** Currently used default language for Vosk */
  private  String voskLanguage = VOSK_DEFAULT_LANGUAGE;


  @Override
  public String getEngineName() {
    return engineName;
  }

  @Activate
  @Modified
  public void activate(ComponentContext cc) {
    logger.debug("Activated/Modified Vosk engine service class");
    voskExecutable = StringUtils.defaultIfBlank(
            (String) cc.getProperties().get(VOSK_EXECUTABLE_PATH_CONFIG_KEY), VOSK_EXECUTABLE_DEFAULT_PATH);
    voskLanguage = StringUtils.defaultIfBlank(
        (String)  cc.getProperties().get(VOSK_DEFAULT_LANGUAGE_KEY), VOSK_DEFAULT_LANGUAGE);
    logger.debug("Set vosk path to {}", voskExecutable);
    logger.debug("Set default vosk language to {}", voskLanguage);
    logger.debug("Finished activating/updating speech-to-text service");
  }

  /**
   * {@inheritDoc}
   *
   * @see org.opencastproject.speechtotext.api.SpeechToTextEngine#generateSubtitlesFile(File, File, String, Boolean)
   */
  @Override
  public Result generateSubtitlesFile(File mediaFile, File workingDirectory,
      String language, Boolean translate)
          throws SpeechToTextEngineException {

    if (language.isBlank()) {
      logger.debug("Language field empty, using {} as default language", voskLanguage);
      language = voskLanguage;
    }

    var output = new File(workingDirectory, FilenameUtils.getBaseName(mediaFile.getAbsolutePath()) + ".vtt");
    final List<String> command = Arrays.asList(
            voskExecutable,
            "-i", mediaFile.getAbsolutePath(),
            "-o", output.getAbsolutePath(),
            "-l", language);
    logger.info("Executing Vosk's transcription command: {}", command);

    Process process = null;
    try {
      ProcessBuilder processBuilder = new ProcessBuilder(command);
      processBuilder.redirectErrorStream(true);
      process = processBuilder.start();

      // wait until the task is finished
      int exitCode = process.waitFor();
      if (exitCode != 0) {
        var error = "";
        try (var errorStream = process.getInputStream()) {
          error = "\n Output:\n" + IOUtils.toString(errorStream, StandardCharsets.UTF_8);
        }
        throw new SpeechToTextEngineException(
                String.format("Vosk exited abnormally with status %d (command: %s)%s", exitCode, command, error));
      }
      if (!output.isFile()) {
        throw new SpeechToTextEngineException("Vosk produced no output");
      }
      logger.info("Subtitles file generated successfully: {}", output);
    } catch (Exception e) {
      logger.debug("Transcription failed closing Vosk transcription process for: {}", mediaFile);
      throw new SpeechToTextEngineException(e);
    } finally {
      IoSupport.closeQuietly(process);
    }
    return new Result(language, output);
  }

}