1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.opencastproject.transcription.microsoft.azure;
23
24 import org.opencastproject.transcription.microsoft.azure.model.MicrosoftAzureSpeechTranscription;
25 import org.opencastproject.transcription.microsoft.azure.model.MicrosoftAzureSpeechTranscriptionFile;
26 import org.opencastproject.transcription.microsoft.azure.model.MicrosoftAzureSpeechTranscriptionFiles;
27 import org.opencastproject.transcription.microsoft.azure.model.MicrosoftAzureSpeechTranscriptionJson;
28 import org.opencastproject.transcription.microsoft.azure.model.MicrosoftAzureSpeechTranscriptions;
29 import org.opencastproject.workspace.api.Workspace;
30
31 import com.google.gson.Gson;
32 import com.google.gson.GsonBuilder;
33
34 import org.apache.commons.lang3.StringUtils;
35 import org.apache.http.HttpStatus;
36 import org.apache.http.client.methods.CloseableHttpResponse;
37 import org.apache.http.client.methods.HttpDelete;
38 import org.apache.http.client.methods.HttpGet;
39 import org.apache.http.client.methods.HttpPost;
40 import org.apache.http.entity.ContentType;
41 import org.apache.http.entity.StringEntity;
42 import org.apache.http.impl.client.CloseableHttpClient;
43 import org.apache.http.util.EntityUtils;
44 import org.slf4j.Logger;
45 import org.slf4j.LoggerFactory;
46
47 import java.io.ByteArrayInputStream;
48 import java.io.IOException;
49 import java.io.InputStream;
50 import java.net.URI;
51 import java.net.URLEncoder;
52 import java.nio.charset.StandardCharsets;
53 import java.util.HashMap;
54 import java.util.List;
55 import java.util.Map;
56 import java.util.UUID;
57
58 public class MicrosoftAzureSpeechServicesClient {
59
60 private static final Logger logger = LoggerFactory.getLogger(MicrosoftAzureSpeechServicesClient.class);
61 private static final String WORKSPACE_COLLECTION = "azure-speech-services";
62 private static final String DEFAULT_TRANSCRIPTION_TIME_TO_LIVE = "P14D";
63 private final String azureSpeechServicesEndpoint;
64 private final String azureCognitiveServicesSubscriptionKey;
65
66 public MicrosoftAzureSpeechServicesClient(String azureSpeechServicesEndpoint,
67 String azureCognitiveServicesSubscriptionKey) {
68 this.azureSpeechServicesEndpoint = StringUtils.trimToEmpty(azureSpeechServicesEndpoint);
69 this.azureCognitiveServicesSubscriptionKey = StringUtils.trimToEmpty(azureCognitiveServicesSubscriptionKey);
70 }
71
72 public List<MicrosoftAzureSpeechTranscription> getTranscriptions(int skip, int top)
73 throws IOException, MicrosoftAzureNotAllowedException, MicrosoftAzureSpeechClientException {
74 return getTranscriptions(skip, top, null);
75 }
76
77 public List<MicrosoftAzureSpeechTranscription> getTranscriptions(int skip, int top, String filter)
78 throws IOException, MicrosoftAzureNotAllowedException, MicrosoftAzureSpeechClientException {
79
80
81 StringBuilder url = new StringBuilder(azureSpeechServicesEndpoint + "/speechtotext/v3.1/transcriptions");
82 StringBuilder params = new StringBuilder();
83 if (skip > 0) {
84 params.append("skip=" + skip);
85 }
86 if (top > 0) {
87 params.append("top=" + top);
88 }
89 if (StringUtils.isNotBlank(filter)) {
90 params.append("filter=" + URLEncoder.encode(filter, StandardCharsets.UTF_8));
91 }
92 if (params.length() > 0) {
93 url.append("?");
94 url.append(params);
95 }
96 try (CloseableHttpClient httpClient = HttpUtils.makeHttpClient()) {
97 HttpGet httpGet = new HttpGet(url.toString());
98 httpGet.addHeader("Ocp-Apim-Subscription-Key", azureCognitiveServicesSubscriptionKey);
99 try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
100 int code = response.getStatusLine().getStatusCode();
101 String responseString = "";
102 if (response.getEntity() != null) {
103 responseString = EntityUtils.toString(response.getEntity());
104 }
105 switch (code) {
106 case HttpStatus.SC_OK:
107 break;
108 case HttpStatus.SC_FORBIDDEN:
109 throw new MicrosoftAzureNotAllowedException(String.format("Not allowed to get transcriptions. "
110 + "Microsoft Azure Speech Services response: %s", responseString));
111 default:
112 throw new MicrosoftAzureSpeechClientException(String.format(
113 "Getting transcriptions failed with HTTP response code %d. "
114 + "Microsoft Azure Speech Services response: %s", code, responseString));
115 }
116 Gson gson = new GsonBuilder().create();
117 MicrosoftAzureSpeechTranscriptions transcriptions = gson.fromJson(responseString,
118 MicrosoftAzureSpeechTranscriptions.class);
119 return transcriptions.values;
120 }
121 }
122 }
123
124 public MicrosoftAzureSpeechTranscription getTranscriptionById(String transcriptionId)
125 throws IOException, MicrosoftAzureNotAllowedException, MicrosoftAzureSpeechClientException,
126 MicrosoftAzureNotFoundException {
127 String transcriptionUrl = azureSpeechServicesEndpoint + "/speechtotext/v3.1/transcriptions/"
128 + StringUtils.trimToEmpty(transcriptionId);
129 return getTranscription(transcriptionUrl);
130 }
131
132 public MicrosoftAzureSpeechTranscription getTranscription(String transcriptionUrl)
133 throws IOException, MicrosoftAzureNotAllowedException, MicrosoftAzureSpeechClientException,
134 MicrosoftAzureNotFoundException {
135 if (StringUtils.isBlank(transcriptionUrl)) {
136 throw new IllegalArgumentException("Transcription URL not set.");
137 }
138
139
140 String url = StringUtils.trimToEmpty(transcriptionUrl);
141 try (CloseableHttpClient httpClient = HttpUtils.makeHttpClient()) {
142 HttpGet httpGet = new HttpGet(url);
143 httpGet.addHeader("Ocp-Apim-Subscription-Key", azureCognitiveServicesSubscriptionKey);
144 try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
145 int code = response.getStatusLine().getStatusCode();
146 String responseString = "";
147 if (response.getEntity() != null) {
148 responseString = EntityUtils.toString(response.getEntity());
149 }
150 switch (code) {
151 case HttpStatus.SC_OK:
152 break;
153 case HttpStatus.SC_FORBIDDEN:
154 throw new MicrosoftAzureNotAllowedException(String.format("Not allowed to get transcription '%s'. "
155 + "Microsoft Azure Speech Services response: %s", transcriptionUrl, responseString));
156 case HttpStatus.SC_NOT_FOUND:
157 throw new MicrosoftAzureNotFoundException(String.format("Transcription '%s' not found.", transcriptionUrl));
158 default:
159 throw new MicrosoftAzureSpeechClientException(String.format(
160 "Getting transcription '%s' failed with HTTP response code %d. "
161 + "Microsoft Azure Speech Services response: %s", transcriptionUrl, code, responseString));
162 }
163 Gson gson = new GsonBuilder().create();
164 return gson.fromJson(responseString, MicrosoftAzureSpeechTranscription.class);
165 }
166 }
167 }
168
169 public MicrosoftAzureSpeechTranscription createTranscription(List<String> contentUrls, String destinationContainerUrl,
170 String displayName , String locale, List<String> candidateLocales, String timeToLive,
171 Map<String, Object> properties)
172 throws IOException, MicrosoftAzureNotAllowedException, MicrosoftAzureSpeechClientException {
173
174
175
176
177
178 String url = azureSpeechServicesEndpoint + "/speechtotext/v3.1/transcriptions";
179 MicrosoftAzureSpeechTranscription requestTranscription = new MicrosoftAzureSpeechTranscription();
180
181 requestTranscription.displayName = displayName;
182 requestTranscription.locale = locale;
183 requestTranscription.contentUrls = contentUrls;
184
185 requestTranscription.properties = new HashMap<>();
186 if (properties != null && !properties.isEmpty()) {
187 requestTranscription.properties.putAll(properties);
188 }
189
190
191
192
193 if (candidateLocales != null && !candidateLocales.isEmpty()) {
194 Map<String, Object> languageIdentification = new HashMap<>();
195 languageIdentification.put("candidateLocales", candidateLocales);
196 requestTranscription.properties.put("languageIdentification",languageIdentification);
197 }
198 if (StringUtils.isNotEmpty(timeToLive)) {
199 requestTranscription.properties.put("timeToLive", timeToLive);
200 } else {
201 requestTranscription.properties.put("timeToLive", DEFAULT_TRANSCRIPTION_TIME_TO_LIVE);
202 }
203 Gson gson = new GsonBuilder().create();
204 try (CloseableHttpClient httpClient = HttpUtils.makeHttpClient()) {
205 HttpPost httpPost = new HttpPost(url);
206 httpPost.addHeader("Ocp-Apim-Subscription-Key", azureCognitiveServicesSubscriptionKey);
207 httpPost.setEntity(new StringEntity(gson.toJson(requestTranscription), ContentType.APPLICATION_JSON));
208 try (CloseableHttpResponse response = httpClient.execute(httpPost)) {
209 int code = response.getStatusLine().getStatusCode();
210 String responseString = "";
211 if (response.getEntity() != null) {
212 responseString = EntityUtils.toString(response.getEntity());
213 }
214 switch (code) {
215 case HttpStatus.SC_OK:
216 case HttpStatus.SC_CREATED:
217 break;
218 case HttpStatus.SC_FORBIDDEN:
219 throw new MicrosoftAzureNotAllowedException(String.format(
220 "Not allowed to create transcription '%s'. Microsoft Azure Speech Services response: %s",
221 displayName, responseString));
222 default:
223 throw new MicrosoftAzureSpeechClientException(String.format(
224 "Creating transcription '%s' failed with HTTP response code %d. "
225 + "Microsoft Azure Speech Services response: %s", displayName, code, responseString));
226 }
227 return gson.fromJson(responseString, MicrosoftAzureSpeechTranscription.class);
228 }
229 }
230 }
231
232 public MicrosoftAzureSpeechTranscriptionFiles getTranscriptionFilesById(String transcriptionId)
233 throws IOException, MicrosoftAzureNotAllowedException, MicrosoftAzureSpeechClientException,
234 MicrosoftAzureNotFoundException {
235 String transcriptionUrl = String.format("%s/speechtotext/v3.1/transcriptions/%s/files", azureSpeechServicesEndpoint,
236 StringUtils.trimToEmpty(transcriptionId));
237 return getTranscriptionFiles(transcriptionUrl);
238 }
239
240 public MicrosoftAzureSpeechTranscriptionFiles getTranscriptionFiles(String transcriptionFilesUrl)
241 throws IOException, MicrosoftAzureNotAllowedException, MicrosoftAzureSpeechClientException,
242 MicrosoftAzureNotFoundException {
243 if (StringUtils.isBlank(transcriptionFilesUrl)) {
244 throw new IllegalArgumentException("Transcription files URL not set.");
245 }
246
247
248 String url = StringUtils.trimToEmpty(transcriptionFilesUrl);
249 try (CloseableHttpClient httpClient = HttpUtils.makeHttpClient()) {
250 HttpGet httpGet = new HttpGet(url);
251 httpGet.addHeader("Ocp-Apim-Subscription-Key", azureCognitiveServicesSubscriptionKey);
252 try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
253 int code = response.getStatusLine().getStatusCode();
254 String responseString = "";
255 if (response.getEntity() != null) {
256 responseString = EntityUtils.toString(response.getEntity());
257 }
258 switch (code) {
259 case HttpStatus.SC_OK:
260 break;
261 case HttpStatus.SC_FORBIDDEN:
262 throw new MicrosoftAzureNotAllowedException(String.format("Not allowed to get transcription files '%s'. "
263 + "Microsoft Azure Speech Services response: %s", transcriptionFilesUrl, responseString));
264 case HttpStatus.SC_NOT_FOUND:
265 throw new MicrosoftAzureNotFoundException(String.format("Transcription files '%s' not found. "
266 + "Microsoft Azure Speech Services response: %s", transcriptionFilesUrl, responseString));
267 default:
268 throw new MicrosoftAzureSpeechClientException(String.format(
269 "Getting transcription files '%s' failed with HTTP response code %d. "
270 + "Microsoft Azure Speech Services response: %s", transcriptionFilesUrl, code, responseString));
271 }
272 Gson gson = new GsonBuilder().create();
273 return gson.fromJson(responseString, MicrosoftAzureSpeechTranscriptionFiles.class);
274 }
275 }
276 }
277
278 public static MicrosoftAzureSpeechTranscriptionJson getTranscriptionJson(
279 MicrosoftAzureSpeechTranscriptionFile transcriptionFile)
280 throws IOException, MicrosoftAzureNotAllowedException, MicrosoftAzureSpeechClientException,
281 MicrosoftAzureNotFoundException {
282 String transcriptionUrl = transcriptionFile.links.contentUrl;
283 try (CloseableHttpClient httpClient = HttpUtils.makeHttpClient()) {
284 HttpGet httpGet = new HttpGet(transcriptionUrl);
285 try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
286 int code = response.getStatusLine().getStatusCode();
287 String responseString = "";
288 if (response.getEntity() != null) {
289 responseString = EntityUtils.toString(response.getEntity());
290 }
291 switch (code) {
292 case HttpStatus.SC_OK:
293 break;
294 case HttpStatus.SC_FORBIDDEN:
295 throw new MicrosoftAzureNotAllowedException(String.format("Not allowed to get transcription file '%s'. "
296 + "Microsoft Azure Speech Services response: %s",
297 transcriptionUrl, responseString));
298 case HttpStatus.SC_NOT_FOUND:
299 throw new MicrosoftAzureNotFoundException(String.format("Transcription file '%s' not found. "
300 + "Microsoft Azure Speech Services response: %s", transcriptionUrl, responseString));
301 default:
302 throw new MicrosoftAzureSpeechClientException(String.format(
303 "Getting transcription file '%s' failed with HTTP response code %d. "
304 + "Microsoft Azure Speech Services response: %s", transcriptionUrl, code, responseString));
305 }
306 Gson gson = new GsonBuilder().create();
307 return gson.fromJson(responseString, MicrosoftAzureSpeechTranscriptionJson.class);
308 }
309 }
310 }
311
312 public static URI writeTranscriptionFile(MicrosoftAzureSpeechTranscriptionJson transcriptionJson,
313 Workspace workspace, String format, float minConfidence, int maxCueLength) throws IOException {
314 boolean formatIsWebVtt;
315 switch (StringUtils.lowerCase(format)) {
316 case "vtt":
317 formatIsWebVtt = true;
318 break;
319 case "srt":
320 formatIsWebVtt = false;
321 break;
322 default:
323 throw new IllegalArgumentException("format should be srt or vtt");
324 }
325 String content;
326 if (formatIsWebVtt) {
327 content = transcriptionJson.toWebVtt(minConfidence, maxCueLength);
328 } else {
329 content = transcriptionJson.toSrt(minConfidence, maxCueLength);
330 }
331 String fileName = UUID.randomUUID().toString();
332 if (formatIsWebVtt) {
333 fileName += ".vtt";
334 } else {
335 fileName += ".srt";
336 }
337 try (InputStream is = new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8))) {
338 return workspace.putInCollection(WORKSPACE_COLLECTION, fileName, is);
339 }
340 }
341
342 public void deleteTranscription(String transcriptionId)
343 throws IOException, MicrosoftAzureNotAllowedException, MicrosoftAzureSpeechClientException {
344 String transcriptionDeleteUrl = azureSpeechServicesEndpoint + "/speechtotext/v3.1/transcriptions/"
345 + StringUtils.trimToEmpty(transcriptionId);
346 try (CloseableHttpClient httpClient = HttpUtils.makeHttpClient()) {
347 HttpDelete httpDelete = new HttpDelete(transcriptionDeleteUrl);
348 httpDelete.addHeader("Ocp-Apim-Subscription-Key", azureCognitiveServicesSubscriptionKey);
349 try (CloseableHttpResponse response = httpClient.execute(httpDelete)) {
350 int code = response.getStatusLine().getStatusCode();
351 String responseString = "";
352 if (response.getEntity() != null) {
353 responseString = EntityUtils.toString(response.getEntity());
354 }
355 switch (code) {
356 case HttpStatus.SC_OK:
357 case HttpStatus.SC_NO_CONTENT:
358 case HttpStatus.SC_NOT_FOUND:
359 break;
360 case HttpStatus.SC_FORBIDDEN:
361 throw new MicrosoftAzureNotAllowedException(String.format("Not allowed to delete transcription '%s'. "
362 + "Microsoft Azure Speech Services response: %s",
363 transcriptionId, responseString));
364 default:
365 throw new MicrosoftAzureSpeechClientException(String.format(
366 "Deleting transcription '%s' failed with HTTP response code %d. "
367 + "Microsoft Azure Speech Services response: %s", transcriptionId, code, responseString));
368 }
369 }
370 }
371 }
372 }