1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.opencastproject.transcription.microsoft.azure.model;
23
24 import org.apache.commons.lang3.StringUtils;
25
26 import java.text.SimpleDateFormat;
27 import java.util.ArrayList;
28 import java.util.Date;
29 import java.util.List;
30 import java.util.Optional;
31 import java.util.TimeZone;
32
33 public class MicrosoftAzureSpeechTranscriptionJsonRecognizedPhrases {
34
35
36
37
38
39
40
41
42
43 public String recognitionStatus;
44 public int channel;
45 public String offset;
46 public String duration;
47 public long offsetInTicks;
48 public long durationInTicks;
49 public List<MicrosoftAzureSpeechTranscriptionJsonRecognizedPhrase> nBest;
50 public String locale;
51
52
53
54 public MicrosoftAzureSpeechTranscriptionJsonRecognizedPhrases() { }
55
56 public String[] toSrt(float minConfidence, int maxCueLength) {
57 String text = getBestRecognizedText(minConfidence);
58 String[] cueText = splitCueText(text, maxCueLength);
59 return timestampCues(false, cueText);
60 }
61
62 public String[] toWebVtt(float minConfidence, int maxCueLength) {
63 String text = getBestRecognizedText(minConfidence);
64 String[] cueText = splitCueText(text, maxCueLength);
65 return timestampCues(true, cueText);
66 }
67
68 String[] timestampCues(boolean formatWebVtt, String[] cueText) {
69 long ticksPerMillisecond = 10000;
70 String format;
71 if (formatWebVtt) {
72 format = "HH:mm:ss.SSS";
73 } else {
74
75 format = "HH:mm:ss,SSS";
76 }
77 SimpleDateFormat formatter = new SimpleDateFormat(format);
78
79 formatter.setTimeZone(TimeZone.getTimeZone("GMT"));
80 int cueTextLength = 0;
81 int[] cuesTextLenth = new int[cueText.length];
82 for (int i = 0; i < cueText.length; i++) {
83 cuesTextLenth[i] = StringUtils.length(cueText[i]);
84 cueTextLength += cuesTextLenth[i];
85 }
86 String[] result = new String[cueText.length];
87 long cueOffsetInTicks = 0;
88 for (int i = 0; i < cueText.length; i++) {
89 long cueLengthInTicks = (long)Math.ceil((double)durationInTicks * (double)cuesTextLenth[i]
90 / (double)cueTextLength);
91
92 Date startTime = new Date((offsetInTicks + cueOffsetInTicks) / ticksPerMillisecond);
93 Date endTime = new Date((offsetInTicks + cueOffsetInTicks + cueLengthInTicks) / ticksPerMillisecond);
94 cueOffsetInTicks += cueLengthInTicks;
95 result[i] = String.format("%s --> %s\n%s\n", formatter.format(startTime), formatter.format(endTime), cueText[i]);
96 }
97 return result;
98 }
99
100 public String getBestRecognizedText(float minConfidence) {
101 if (nBest == null) {
102 return null;
103 }
104 Optional<MicrosoftAzureSpeechTranscriptionJsonRecognizedPhrase> bestPhrase;
105 if (minConfidence >= 0 && minConfidence < 1) {
106 bestPhrase = nBest.stream()
107 .filter(phrase -> phrase.confidence >= minConfidence)
108 .sorted((t1, t2) -> Float.compare(t2.confidence, t1.confidence))
109 .findFirst();
110 } else if (minConfidence >= 1) {
111 bestPhrase = nBest.stream().findFirst();
112 } else {
113 bestPhrase = nBest.stream()
114 .sorted((t1, t2) -> Float.compare(t2.confidence, t1.confidence))
115 .findFirst();
116 }
117 return bestPhrase.isPresent() ? bestPhrase.get().display : "";
118 }
119
120 public static String[] splitCueText(String text, int maxCueLength) {
121 int textLength = StringUtils.length(text);
122 if (textLength == 0) {
123 return new String[0];
124 } else if (textLength <= maxCueLength) {
125 return new String[] { text };
126 }
127 List<String> result = new ArrayList<>();
128 int start = 0;
129 do {
130 if (textLength - start <= maxCueLength) {
131 result.add(StringUtils.trimToEmpty(StringUtils.substring(text, start, textLength)));
132 break;
133 }
134 int end = StringUtils.lastIndexOf(text, " ", start + maxCueLength);
135 if (start >= end) {
136 end = Math.min(textLength, start + maxCueLength);
137 }
138 result.add(StringUtils.trimToEmpty(StringUtils.substring(text, start, end)));
139 start = end;
140 } while (start < textLength);
141 return result.toArray(new String[0]);
142 }
143 }