MicrosoftAzureSpeechTranscriptionJson.java

/*
 * Licensed to The Apereo Foundation under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 *
 * The Apereo Foundation licenses this file to you under the Educational
 * Community License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License
 * at:
 *
 *   http://opensource.org/licenses/ecl2.txt
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations under
 * the License.
 *
 */

package org.opencastproject.transcription.microsoft.azure.model;

import org.apache.commons.lang3.StringUtils;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

public class MicrosoftAzureSpeechTranscriptionJson {

  // CHECKSTYLE:OFF checkstyle:LineLength

  // Documentation:
  // https://eastus.dev.cognitive.microsoft.com/docs/services/speech-to-text-api-v3-1/operations/Transcriptions_ListFiles

  // CHECKSTYLE:ON checkstyle:LineLength
  // CHECKSTYLE:OFF checkstyle:VisibilityModifier

  public String source;
  public String timestamp;
  public long durationInTicks;
  public String duration;
  public List<Map<String, Object>> combinedRecognizedPhrases;
  public List<MicrosoftAzureSpeechTranscriptionJsonRecognizedPhrases> recognizedPhrases;

  // CHECKSTYLE:ON checkstyle:VisibilityModifier

  public MicrosoftAzureSpeechTranscriptionJson() { }

  public String toSrt(float minConfidence, int maxCueLength) {
    StringBuilder sb = new StringBuilder();
    long segmentIndex = 1;
    for (MicrosoftAzureSpeechTranscriptionJsonRecognizedPhrases phrase : recognizedPhrases) {
      String[] cues = phrase.toSrt(minConfidence, maxCueLength);
      if (cues != null) {
        for (String cue : cues) {
          sb.append(String.format("\n%d\n", segmentIndex++)).append(cue);
        }
      }
    }
    return sb.toString();
  }

  public String toWebVtt(float minConfidence, int maxCueLength) {
    StringBuilder sb = new StringBuilder();
    sb.append("WEBVTT\n");
    for (MicrosoftAzureSpeechTranscriptionJsonRecognizedPhrases phrase : recognizedPhrases) {
      String[] cues = phrase.toWebVtt(minConfidence, maxCueLength);
      if (cues != null) {
        for (String cue : cues) {
          if (StringUtils.isNotBlank(cue)) {
            sb.append("\n");
            sb.append(cue);
          }
        }
      }
    }
    return sb.toString();
  }

  public Map<String, Float> getRecognizedLocales() {
    Map<String, Long> localeDurations = new HashMap<>();
    for (MicrosoftAzureSpeechTranscriptionJsonRecognizedPhrases recognizedPhrase : recognizedPhrases) {
      if (StringUtils.isNotBlank(recognizedPhrase.locale)) {
        localeDurations.put(recognizedPhrase.locale,
            localeDurations.getOrDefault(recognizedPhrase.locale, 0L) + recognizedPhrase.durationInTicks);
      }
    }
    Map<String, Float> relativeLocaleDurations = new HashMap<>();
    for (Map.Entry<String, Long> localeDuration : localeDurations.entrySet()) {
      relativeLocaleDurations.put(localeDuration.getKey(),
          localeDuration.getValue().floatValue() / Long.valueOf(durationInTicks).floatValue());
    }
    return relativeLocaleDurations;
  }

  public String getRecognizedLocale() {
    Optional<Map.Entry<String, Float>> localeOpt = getRecognizedLocales().entrySet().stream()
        .max(Map.Entry.comparingByValue());
    return localeOpt.isPresent() ? localeOpt.get().getKey() : "";
  }
}