1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.opencastproject.workflow.handler.speechtotext;
22
23 import org.opencastproject.inspection.api.MediaInspectionService;
24 import org.opencastproject.job.api.Job;
25 import org.opencastproject.job.api.JobContext;
26 import org.opencastproject.mediapackage.MediaPackage;
27 import org.opencastproject.mediapackage.MediaPackageElement;
28 import org.opencastproject.mediapackage.MediaPackageElementFlavor;
29 import org.opencastproject.mediapackage.MediaPackageElementParser;
30 import org.opencastproject.mediapackage.MediaPackageElements;
31 import org.opencastproject.mediapackage.Track;
32 import org.opencastproject.mediapackage.attachment.AttachmentImpl;
33 import org.opencastproject.mediapackage.selector.TrackSelector;
34 import org.opencastproject.mediapackage.track.TrackImpl;
35 import org.opencastproject.metadata.api.MediaPackageMetadata;
36 import org.opencastproject.metadata.dublincore.DublinCoreCatalogService;
37 import org.opencastproject.serviceregistry.api.ServiceRegistry;
38 import org.opencastproject.speechtotext.api.SpeechToTextService;
39 import org.opencastproject.speechtotext.api.SpeechToTextServiceException;
40 import org.opencastproject.workflow.api.AbstractWorkflowOperationHandler;
41 import org.opencastproject.workflow.api.ConfiguredTagsAndFlavors;
42 import org.opencastproject.workflow.api.WorkflowInstance;
43 import org.opencastproject.workflow.api.WorkflowOperationException;
44 import org.opencastproject.workflow.api.WorkflowOperationHandler;
45 import org.opencastproject.workflow.api.WorkflowOperationInstance;
46 import org.opencastproject.workflow.api.WorkflowOperationResult;
47 import org.opencastproject.workspace.api.Workspace;
48
49 import org.apache.commons.io.FilenameUtils;
50 import org.apache.commons.lang3.BooleanUtils;
51 import org.apache.commons.lang3.StringUtils;
52 import org.osgi.service.component.ComponentContext;
53 import org.osgi.service.component.annotations.Activate;
54 import org.osgi.service.component.annotations.Component;
55 import org.osgi.service.component.annotations.Reference;
56 import org.slf4j.Logger;
57 import org.slf4j.LoggerFactory;
58
59 import java.io.IOException;
60 import java.io.InputStream;
61 import java.net.URI;
62 import java.util.ArrayList;
63 import java.util.Collection;
64 import java.util.List;
65 import java.util.Objects;
66 import java.util.UUID;
67 import java.util.stream.Collectors;
68
69
70
71
72 @Component(
73 immediate = true,
74 service = WorkflowOperationHandler.class,
75 property = {
76 "service.description=Speech-to-Text Workflow Operation Handler",
77 "workflow.operation=speechtotext"
78 }
79 )
80 public class
81 SpeechToTextWorkflowOperationHandler extends AbstractWorkflowOperationHandler {
82
83 private static final Logger logger = LoggerFactory.getLogger(SpeechToTextWorkflowOperationHandler.class);
84
85
86 private static final String LANGUAGE_CODE = "language-code";
87
88
89 private static final String LANGUAGE_FALLBACK = "language-fallback";
90
91
92 private static final String TARGET_ELEMENT = "target-element";
93
94
95 private static final String PLACEHOLDER_LANG = "#{lang}";
96
97
98 private static final String TRANSLATE_MODE = "translate";
99
100
101 private static final String TRACK_SELECTION_STRATEGY = "track-selection-strategy";
102
103
104 private static final String LIMIT_TO_ONE = "limit-to-one";
105
106
107 private static final String ASYNCHRONOUS = "async";
108
109
110 private static final String JOBS_WORKFLOW_CONFIGURATION = "speech-to-text-jobs";
111
112 private enum TrackSelectionStrategy {
113 PRESENTER_OR_NOTHING,
114 PRESENTATION_OR_NOTHING,
115 TRY_PRESENTER_FIRST,
116 TRY_PRESENTATION_FIRST,
117 EVERYTHING;
118
119 private static TrackSelectionStrategy fromString(String value) {
120 for (TrackSelectionStrategy strategy : values()) {
121 if (strategy.name().equalsIgnoreCase(value)) {
122 return strategy;
123 }
124 }
125 throw new IllegalArgumentException(
126 "No TrackSelectionStrategy enum constant " + TrackSelectionStrategy.class.getCanonicalName() + "." + value);
127 }
128 }
129
130 private enum AppendSubtitleAs {
131 attachment, track
132 }
133
134
135 private SpeechToTextService speechToTextService = null;
136
137
138 private Workspace workspace;
139
140
141 private MediaInspectionService mediaInspectionService;
142
143
144 private DublinCoreCatalogService dublinCoreCatalogService;
145
146 @Override
147 @Activate
148 public void activate(ComponentContext cc) {
149 super.activate(cc);
150 logger.info("Registering speech-to-text workflow operation handler");
151 }
152
153
154
155
156
157
158
159
160 @Override
161 public WorkflowOperationResult start(WorkflowInstance workflowInstance, JobContext context)
162 throws WorkflowOperationException {
163
164 MediaPackage mediaPackage = workflowInstance.getMediaPackage();
165 logger.info("Start speech-to-text workflow operation for media package {}", mediaPackage);
166
167
168 var async = BooleanUtils.toBoolean(workflowInstance.getCurrentOperation().getConfiguration(ASYNCHRONOUS));
169
170 ConfiguredTagsAndFlavors tagsAndFlavors = getTagsAndFlavors(workflowInstance,
171 Configuration.none, Configuration.one,
172 Configuration.many, Configuration.one);
173 MediaPackageElementFlavor sourceFlavor = tagsAndFlavors.getSingleSrcFlavor();
174
175 TrackSelector trackSelector = new TrackSelector();
176 trackSelector.addFlavor(sourceFlavor);
177 Collection<Track> tracks = trackSelector.select(mediaPackage, false);
178
179 if (tracks.isEmpty()) {
180 throw new WorkflowOperationException(
181 String.format("No tracks with source flavor '%s' found for transcription", sourceFlavor));
182 }
183
184 logger.info("Found {} track(s) with source flavor '{}'.", tracks.size(), sourceFlavor);
185
186
187 String languageCode = getMediaPackageLanguage(mediaPackage, workflowInstance);
188
189
190 AppendSubtitleAs appendSubtitleAs = howToAppendTheSubtitles(workflowInstance);
191
192
193 Boolean translate = getTranslationMode(workflowInstance);
194
195
196 List<Track> tracksWithAudio = tracks.stream().filter(Track::hasAudio).collect(Collectors.toList());
197
198
199
200 TrackSelectionStrategy trackSelectionStrategy = getTrackSelectionStrategy(mediaPackage, workflowInstance);
201
202
203 List<Track> tracksToTranscribe = filterTracksByStrategy(tracksWithAudio, trackSelectionStrategy);
204 if (tracksToTranscribe.isEmpty()) {
205 logger.info("No subtitles were created for media package {}. "
206 + "Workflow Configuration 'track-selection-strategy' is set to {}", mediaPackage, trackSelectionStrategy);
207 return createResult(mediaPackage, WorkflowOperationResult.Action.SKIP);
208 }
209
210
211
212 boolean limitToOne = BooleanUtils.toBoolean(workflowInstance.getCurrentOperation().getConfiguration(LIMIT_TO_ONE));
213 if (limitToOne) {
214 tracksToTranscribe = List.of(tracksToTranscribe.get(0));
215 }
216
217 if (async) {
218 createSubtitleAsync(workflowInstance, tracksToTranscribe, languageCode, translate);
219 } else {
220 for (Track track : tracksToTranscribe) {
221 createSubtitle(track, languageCode, mediaPackage, tagsAndFlavors, appendSubtitleAs, translate);
222 }
223 }
224
225 logger.info("Speech-To-Text workflow operation for media package {} completed", mediaPackage);
226 return createResult(mediaPackage, WorkflowOperationResult.Action.CONTINUE);
227 }
228
229
230
231
232
233
234
235 private List<Track> filterTracksByStrategy(List<Track> tracksWithAudio,
236 TrackSelectionStrategy trackSelectionStrategy) {
237
238 List<Track> tracksToTranscribe = new ArrayList<>();
239 if (!tracksWithAudio.isEmpty()) {
240
241 String presenterTypeConstant = MediaPackageElements.PRESENTER_SOURCE.getType();
242 String presentationTypeConstant = MediaPackageElements.PRESENTATION_SOURCE.getType();
243
244
245 List<Track> presenterTracksWithAudio = tracksWithAudio.stream()
246 .filter(track -> Objects.equals(track.getFlavor().getType(), presenterTypeConstant))
247 .collect(Collectors.toList());
248
249
250 List<Track> presentationTracksWithAudio = tracksWithAudio.stream()
251 .filter(track -> Objects.equals(track.getFlavor().getType(), presentationTypeConstant))
252 .collect(Collectors.toList());
253
254 if (TrackSelectionStrategy.PRESENTER_OR_NOTHING.equals(trackSelectionStrategy)) {
255 tracksToTranscribe.addAll(presenterTracksWithAudio);
256 }
257
258 if (TrackSelectionStrategy.PRESENTATION_OR_NOTHING.equals(trackSelectionStrategy)) {
259 tracksToTranscribe.addAll(presentationTracksWithAudio);
260 }
261
262 if (TrackSelectionStrategy.TRY_PRESENTER_FIRST.equals(trackSelectionStrategy)) {
263 tracksToTranscribe.addAll(presenterTracksWithAudio);
264 if (tracksToTranscribe.isEmpty()) {
265 tracksToTranscribe.addAll(tracksWithAudio);
266 }
267 }
268
269 if (TrackSelectionStrategy.TRY_PRESENTATION_FIRST.equals(trackSelectionStrategy)) {
270 tracksToTranscribe.addAll((presentationTracksWithAudio));
271 if (tracksToTranscribe.isEmpty()) {
272 tracksToTranscribe.addAll(tracksWithAudio);
273 }
274 }
275
276 if (TrackSelectionStrategy.EVERYTHING.equals(trackSelectionStrategy)) {
277 tracksToTranscribe.addAll(tracksWithAudio);
278 }
279 }
280 return tracksToTranscribe;
281 }
282
283
284
285
286
287
288
289
290
291
292
293
294 private void createSubtitle(Track track, String languageCode, MediaPackage parentMediaPackage,
295 ConfiguredTagsAndFlavors tagsAndFlavors, AppendSubtitleAs appendSubtitleAs, Boolean translate)
296 throws WorkflowOperationException {
297
298
299 URI trackURI = track.getURI();
300
301 Job job;
302 logger.info("Generating subtitle for '{}'...", trackURI);
303 try {
304 job = speechToTextService.transcribe(trackURI, languageCode, translate);
305 } catch (SpeechToTextServiceException e) {
306 throw new WorkflowOperationException(
307 String.format("Generating subtitles for '%s' in media package '%s' failed",
308 trackURI, parentMediaPackage), e);
309 }
310
311 if (!waitForStatus(job).isSuccess()) {
312 throw new WorkflowOperationException(
313 String.format("Speech-to-Text job for media package '%s' failed", parentMediaPackage));
314 }
315
316
317 try {
318 String[] jobOutput = job.getPayload().split(",");
319 URI output = new URI(jobOutput[0]);
320 String outputLanguage = jobOutput[1];
321 String engineType = jobOutput[2];
322
323 String mediaPackageIdentifier = UUID.randomUUID().toString();
324
325 MediaPackageElement subtitleMediaPackageElement;
326 switch (appendSubtitleAs) {
327 case attachment:
328 subtitleMediaPackageElement = new AttachmentImpl();
329 break;
330 case track:
331 default:
332 subtitleMediaPackageElement = new TrackImpl();
333 }
334
335 subtitleMediaPackageElement.setIdentifier(mediaPackageIdentifier);
336 try (InputStream in = workspace.read(output)) {
337 URI uri = workspace.put(parentMediaPackage.getIdentifier().toString(), mediaPackageIdentifier,
338 FilenameUtils.getName(output.getPath()), in);
339 subtitleMediaPackageElement.setURI(uri);
340 }
341 MediaPackageElementFlavor targetFlavor = tagsAndFlavors.getSingleTargetFlavor().applyTo(track.getFlavor());
342 subtitleMediaPackageElement.setFlavor(targetFlavor);
343
344 List<String> targetTags = tagsAndFlavors.getTargetTags();
345 targetTags.add("lang:" + outputLanguage);
346 targetTags.add("generator-type:auto");
347 targetTags.add("generator:" + engineType.toLowerCase());
348
349
350 Job inspection = mediaInspectionService.enrich(subtitleMediaPackageElement, true);
351 if (!waitForStatus(inspection).isSuccess()) {
352 throw new SpeechToTextServiceException(String.format(
353 "Transcription for '%s' failed at enriching process", trackURI));
354 }
355
356 subtitleMediaPackageElement = MediaPackageElementParser.getFromXml(inspection.getPayload());
357
358 for (String tag : targetTags) {
359 subtitleMediaPackageElement.addTag(tag);
360 }
361
362 parentMediaPackage.add(subtitleMediaPackageElement);
363
364 workspace.delete(output);
365 } catch (Exception e) {
366 throw new WorkflowOperationException("Error handling text-to-speech service output", e);
367 }
368
369 try {
370 workspace.cleanup(parentMediaPackage.getIdentifier());
371 } catch (IOException e) {
372 throw new WorkflowOperationException(e);
373 }
374 }
375
376
377
378
379
380
381
382
383
384
385 private void createSubtitleAsync(WorkflowInstance workflow, List<Track> tracks, String languageCode,
386 Boolean translate) throws WorkflowOperationException {
387
388 logger.info("Asynchronously generating subtitles");
389 StringBuilder jobs = new StringBuilder();
390 try {
391 for (var track: tracks) {
392 var job = speechToTextService.transcribe(track.getURI(), languageCode, translate);
393 jobs.append(",").append(job.getId());
394 }
395 } catch (SpeechToTextServiceException e) {
396 throw new WorkflowOperationException(
397 String.format("Starting subtitle job in media package '%s' failed",
398 workflow.getMediaPackage().getIdentifier()), e);
399 }
400
401 var config = Objects.toString(workflow.getConfiguration(JOBS_WORKFLOW_CONFIGURATION), "") + jobs;
402 workflow.setConfiguration(JOBS_WORKFLOW_CONFIGURATION, config.replaceFirst("^,", ""));
403 }
404
405
406
407
408
409
410
411
412
413 private TrackSelectionStrategy getTrackSelectionStrategy(MediaPackage mediaPackage, WorkflowInstance workflowInstance)
414 throws WorkflowOperationException {
415
416 WorkflowOperationInstance operation = workflowInstance.getCurrentOperation();
417 String strategyCfg = StringUtils.trimToEmpty(operation.getConfiguration(TRACK_SELECTION_STRATEGY)).toLowerCase();
418
419 if (strategyCfg.isEmpty()) {
420 return TrackSelectionStrategy.EVERYTHING;
421 }
422 try {
423 return TrackSelectionStrategy.fromString(strategyCfg);
424 } catch (IllegalArgumentException e) {
425 throw new WorkflowOperationException(String.format(
426 "Speech-to-Text job for media package '%s' failed, because of wrong workflow configuration. "
427 + "track-selection-strategy of type '%s' does not exist.", mediaPackage, strategyCfg));
428 }
429 }
430
431
432
433
434
435
436
437
438
439 private AppendSubtitleAs howToAppendTheSubtitles(WorkflowInstance workflowInstance)
440 throws WorkflowOperationException {
441 WorkflowOperationInstance operation = workflowInstance.getCurrentOperation();
442 String targetElement = StringUtils.trimToEmpty(operation.getConfiguration(TARGET_ELEMENT)).toLowerCase();
443 if (targetElement.isEmpty()) {
444 return AppendSubtitleAs.track;
445 }
446 try {
447 return AppendSubtitleAs.valueOf(targetElement);
448 } catch (IllegalArgumentException e) {
449 throw new WorkflowOperationException(String.format(
450 "Speech-to-Text job for media package '%s' failed, because of wrong workflow configuration. "
451 + "target-element of type '%s' does not exist.", workflowInstance.getMediaPackage(), targetElement));
452 }
453 }
454
455
456
457
458
459
460
461 private Boolean getTranslationMode(WorkflowInstance workflowInstance) {
462 WorkflowOperationInstance operation = workflowInstance.getCurrentOperation();
463 return BooleanUtils.toBoolean(StringUtils.trimToEmpty(operation.getConfiguration(TRANSLATE_MODE)));
464 }
465
466
467
468
469
470
471
472
473 private String getMediaPackageLanguage(MediaPackage mediaPackage, WorkflowInstance workflowInstance) {
474
475
476 WorkflowOperationInstance operation = workflowInstance.getCurrentOperation();
477 String language = StringUtils.trimToEmpty(operation.getConfiguration(LANGUAGE_CODE));
478
479 if (language.isEmpty()) {
480
481 MediaPackageMetadata dublinCoreMetadata = dublinCoreCatalogService.getMetadata(mediaPackage);
482 language = StringUtils.trimToEmpty(dublinCoreMetadata.getLanguage());
483 }
484
485 if (language.isEmpty()) {
486
487 language = StringUtils.trimToEmpty(mediaPackage.getLanguage());
488 }
489
490 if (language.isEmpty()) {
491
492 language = Objects.toString(operation.getConfiguration(LANGUAGE_FALLBACK), "");
493 }
494
495 return language;
496 }
497
498
499
500
501
502
503 @Reference
504 public void setSpeechToTextService(SpeechToTextService speechToTextService) {
505 this.speechToTextService = speechToTextService;
506 }
507
508 @Reference
509 public void setMediaInspectionService(MediaInspectionService mediaInspectionService) {
510 this.mediaInspectionService = mediaInspectionService;
511 }
512
513 @Reference
514 public void setWorkspace(Workspace workspace) {
515 this.workspace = workspace;
516 }
517
518 @Reference
519 public void setDublinCoreCatalogService(DublinCoreCatalogService dublinCoreCatalogService) {
520 this.dublinCoreCatalogService = dublinCoreCatalogService;
521 }
522
523 @Reference
524 public void setServiceRegistry(ServiceRegistry serviceRegistry) {
525 this.serviceRegistry = serviceRegistry;
526 }
527 }