1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.opencastproject.workflow.handler.workflow;
23
24 import org.opencastproject.job.api.JobContext;
25 import org.opencastproject.mediapackage.MediaPackage;
26 import org.opencastproject.mediapackage.MediaPackageElement;
27 import org.opencastproject.mediapackage.MediaPackageElementBuilder;
28 import org.opencastproject.mediapackage.MediaPackageElementBuilderFactory;
29 import org.opencastproject.mediapackage.MediaPackageElementFlavor;
30 import org.opencastproject.mediapackage.Track;
31 import org.opencastproject.mediapackage.selector.SimpleElementSelector;
32 import org.opencastproject.serviceregistry.api.ServiceRegistry;
33 import org.opencastproject.subtitleparser.SubtitleParsingException;
34 import org.opencastproject.subtitleparser.webvttparser.WebVTTParser;
35 import org.opencastproject.subtitleparser.webvttparser.WebVTTSubtitle;
36 import org.opencastproject.subtitleparser.webvttparser.WebVTTSubtitleCue;
37 import org.opencastproject.util.NotFoundException;
38 import org.opencastproject.workflow.api.AbstractWorkflowOperationHandler;
39 import org.opencastproject.workflow.api.ConfiguredTagsAndFlavors;
40 import org.opencastproject.workflow.api.WorkflowInstance;
41 import org.opencastproject.workflow.api.WorkflowOperationException;
42 import org.opencastproject.workflow.api.WorkflowOperationHandler;
43 import org.opencastproject.workflow.api.WorkflowOperationResult;
44 import org.opencastproject.workflow.api.WorkflowOperationResult.Action;
45 import org.opencastproject.workspace.api.Workspace;
46
47 import com.google.gson.Gson;
48
49 import org.apache.commons.io.IOUtils;
50 import org.osgi.service.component.ComponentContext;
51 import org.osgi.service.component.annotations.Component;
52 import org.osgi.service.component.annotations.Reference;
53 import org.slf4j.Logger;
54 import org.slf4j.LoggerFactory;
55
56 import java.io.IOException;
57 import java.io.InputStream;
58 import java.net.URI;
59 import java.nio.charset.StandardCharsets;
60 import java.util.ArrayList;
61 import java.util.Collection;
62 import java.util.List;
63 import java.util.Optional;
64
65
66
67
68 @Component(
69 immediate = true,
70 service = WorkflowOperationHandler.class,
71 property = {
72 "service.description=Processes a WebVTT subtitle document into CutMarks for the editor",
73 "workflow.operation=webvtt-to-cutmarks"
74 }
75 )
76 public class WebvttToCutMarksWorkflowOperationHandler extends AbstractWorkflowOperationHandler {
77
78
79 private static final Logger logger = LoggerFactory.getLogger(WebvttToCutMarksWorkflowOperationHandler.class);
80
81
82
83
84
85 private static final String CFGK_MIN_TIME_SILENCE_IN_MS = "min-time-silence-in-ms";
86 private static final String CFGK_MIN_TIME_SILENCE_IN_MS_DEFAULT = "0";
87
88
89 private static final String CFGK_BUFFER_AROUND_SUBTITLE_IN_MS = "buffer-time-around-subtitle";
90 private static final String CFGK_BUFFER_AROUND_SUBTITLE_IN_MS_DEFAULT = "0";
91
92
93 private static final String CFGK_TRACK_FLAVOR = "track-flavor";
94
95
96 private static final String CFGK_MIN_TIME_SILENCE_TREATMENT_START = "start-treatment";
97 private static final String CFGK_MIN_TIME_SILENCE_TREATMENT_START_DEFAULT = "IGNORE";
98
99 private static final String CFGK_MIN_TIME_SILENCE_TREATMENT_END = "end-treatment";
100 private static final String CFGK_MIN_TIME_SILENCE_TREATMENT_END_DEFAULT = "IGNORE";
101
102
103 private static final String TARGET_FILENAME = "cut-marks.json";
104
105 private static final Gson gson = new Gson();
106
107 private static class WFConfiguration {
108 protected long minTimeSilenceInMS;
109 protected long bufferTime;
110 protected MediaPackageElementFlavor sourceFlavor;
111 protected MediaPackageElementFlavor targetFlavor;
112 protected Optional<String> trackFlavor;
113 protected Treatment treatmentStart;
114 protected Treatment treatmentEnd;
115 }
116
117 private static class Times {
118 private Long begin;
119 private Long duration;
120 }
121
122
123 private enum Treatment {
124 IGNORE,
125 USE_FOR_MIN_TIME,
126 ALWAYS_INCLUDE
127 }
128
129
130 private Workspace workspace;
131
132 @Override
133 public WorkflowOperationResult start(WorkflowInstance workflowInstance, JobContext context)
134 throws WorkflowOperationException {
135
136 MediaPackage mp = workflowInstance.getMediaPackage();
137 logger.debug("Start WebVTT to CutMarks operation for mediapackage {}", mp.getIdentifier().toString());
138
139
140 WFConfiguration config = readConfiguration(workflowInstance);
141
142
143 WebVTTSubtitle webvtt = readAndParseWebVTT(mp, config.sourceFlavor);
144
145
146 Optional<Long> trackDuration = getTrackDuration(mp, config.trackFlavor);
147
148
149 List<Times> cutMarks = processWebVTTIntoCutPoints(
150 webvtt,
151 config.minTimeSilenceInMS,
152 config.bufferTime,
153 trackDuration,
154 config.treatmentStart,
155 config.treatmentEnd
156 );
157
158 saveCutMarks(mp, cutMarks, config.targetFlavor);
159
160 return createResult(mp, Action.CONTINUE);
161 }
162
163 private WFConfiguration readConfiguration(WorkflowInstance workflowInstance)
164 throws WorkflowOperationException {
165 ConfiguredTagsAndFlavors tagsAndFlavors = getTagsAndFlavors(workflowInstance,
166 Configuration.none, Configuration.one, Configuration.none, Configuration.one);
167 MediaPackageElementFlavor sourceFlavor = tagsAndFlavors.getSingleSrcFlavor();
168 MediaPackageElementFlavor targetFlavor = tagsAndFlavors.getSingleTargetFlavor();
169
170 long minTimeSilenceInMS;
171 long bufferTime;
172 try {
173 minTimeSilenceInMS = Long.parseLong(
174 getConfig(workflowInstance, CFGK_MIN_TIME_SILENCE_IN_MS, CFGK_MIN_TIME_SILENCE_IN_MS_DEFAULT)
175 );
176 bufferTime = Long.parseLong(
177 getConfig(workflowInstance, CFGK_BUFFER_AROUND_SUBTITLE_IN_MS, CFGK_BUFFER_AROUND_SUBTITLE_IN_MS_DEFAULT)
178 );
179
180 if (minTimeSilenceInMS < 0 || bufferTime < 0) {
181 throw new NumberFormatException("Negative Integer, must be positive");
182 }
183 } catch (NumberFormatException error) {
184 throw new WorkflowOperationException(
185 CFGK_MIN_TIME_SILENCE_IN_MS + " and " + CFGK_BUFFER_AROUND_SUBTITLE_IN_MS + "must be a postive integer",
186 error
187 );
188 }
189 if (minTimeSilenceInMS < 2 * bufferTime) {
190 throw new WorkflowOperationException(
191 CFGK_MIN_TIME_SILENCE_IN_MS + " must be at least double the value of "
192 + CFGK_BUFFER_AROUND_SUBTITLE_IN_MS
193 );
194 }
195
196 Optional<String> trackFlavor = getOptConfig(workflowInstance, CFGK_TRACK_FLAVOR);
197
198 String treatmentStrStart = getConfig(
199 workflowInstance,
200 CFGK_MIN_TIME_SILENCE_TREATMENT_START,
201 CFGK_MIN_TIME_SILENCE_TREATMENT_START_DEFAULT
202 );
203 String treatmentStrEnd = getConfig(
204 workflowInstance,
205 CFGK_MIN_TIME_SILENCE_TREATMENT_END,
206 CFGK_MIN_TIME_SILENCE_TREATMENT_END_DEFAULT
207 );
208 Treatment treatmentStart;
209 Treatment treatmentEnd;
210 try {
211 treatmentStart = Treatment.valueOf(treatmentStrStart);
212 treatmentEnd = Treatment.valueOf(treatmentStrEnd);
213 } catch (IllegalArgumentException error) {
214 throw new WorkflowOperationException(
215 CFGK_MIN_TIME_SILENCE_TREATMENT_START + " and "
216 + CFGK_MIN_TIME_SILENCE_TREATMENT_END
217 + " must be one of the values IGNORE, USE_FOR_MIN_TIME, ALWAYS_INCLUDE",
218 error
219 );
220 }
221 if (treatmentEnd != Treatment.IGNORE && trackFlavor.isEmpty()) {
222 throw new WorkflowOperationException(
223 CFGK_TRACK_FLAVOR + " is not defined, but "
224 + CFGK_MIN_TIME_SILENCE_TREATMENT_END + " is not set to IGNORE, therefore a "
225 + CFGK_TRACK_FLAVOR + " is needed"
226 );
227 }
228
229 WFConfiguration config = new WFConfiguration();
230
231 config.minTimeSilenceInMS = minTimeSilenceInMS;
232 config.bufferTime = bufferTime;
233 config.sourceFlavor = sourceFlavor;
234 config.targetFlavor = targetFlavor;
235 config.trackFlavor = trackFlavor;
236 config.treatmentStart = treatmentStart;
237 config.treatmentEnd = treatmentEnd;
238
239 return config;
240 }
241
242 private List<Times> processWebVTTIntoCutPoints(
243 WebVTTSubtitle webvtt,
244 long minTimeSilenceInMS,
245 long bufferTime,
246 Optional<Long> trackDuration,
247 Treatment treatmentStart,
248 Treatment treatmentEnd
249 ) {
250 List<Times> cutMarks = new ArrayList<Times>();
251 List<WebVTTSubtitleCue> cues = webvtt.getCues();
252 if (cues.size() > 0) {
253 WebVTTSubtitleCue firstCue = cues.remove(0);
254
255 long oldMarkStart = firstCue.getStartTime();
256 long oldMarkEnd = firstCue.getEndTime();
257
258 for (WebVTTSubtitleCue cue : webvtt.getCues()) {
259 long newMarkStart = cue.getStartTime();
260 long newMarkEnd = cue.getEndTime();
261
262
263 if (newMarkStart - oldMarkEnd > minTimeSilenceInMS) {
264
265 Times oldMark = new Times();
266
267 oldMark.begin = oldMarkStart - bufferTime;
268 oldMark.duration = oldMarkEnd - oldMark.begin + bufferTime;
269 cutMarks.add(oldMark);
270
271
272 oldMarkStart = newMarkStart;
273 oldMarkEnd = newMarkEnd;
274 } else if (newMarkEnd > oldMarkEnd) {
275
276 oldMarkEnd = newMarkEnd;
277 }
278 }
279
280
281 Times lastMark = new Times();
282
283 lastMark.begin = oldMarkStart - bufferTime;
284 lastMark.duration = oldMarkEnd - lastMark.begin + bufferTime;
285 cutMarks.add(lastMark);
286
287
288
289
290
291
292
293 Times firstCutMark = cutMarks.get(0);
294 if (treatmentStart == Treatment.ALWAYS_INCLUDE) {
295 updateTimesBegin(firstCutMark, 0L);
296 } else if (treatmentStart == Treatment.USE_FOR_MIN_TIME) {
297 if ((firstCutMark.begin + bufferTime) - 0L <= minTimeSilenceInMS) {
298 updateTimesBegin(firstCutMark, 0L);
299 }
300 } else if (treatmentStart == Treatment.IGNORE) {
301 if (firstCutMark.begin < 0) {
302 updateTimesBegin(firstCutMark, 0L);
303 }
304 }
305 if (trackDuration.isPresent()) {
306 long trackDur = trackDuration.get();
307 Times lastCutMark = cutMarks.get(cutMarks.size() - 1);
308 if (treatmentEnd == Treatment.ALWAYS_INCLUDE) {
309 updateTimesEnd(lastCutMark, trackDur);
310 } else if (treatmentEnd == Treatment.USE_FOR_MIN_TIME) {
311 if (trackDur - (lastCutMark.begin + lastCutMark.duration - bufferTime) <= minTimeSilenceInMS) {
312 updateTimesEnd(lastCutMark, trackDur);
313 }
314 } else if (treatmentEnd == Treatment.IGNORE) {
315
316 if (lastCutMark.begin + lastCutMark.duration > trackDur) {
317 updateTimesEnd(lastCutMark, trackDur);
318 }
319 }
320 }
321 }
322
323 return cutMarks;
324 }
325
326
327 private void updateTimesBegin(Times toUpdate, long newBegin) {
328 Long end = toUpdate.begin + toUpdate.duration;
329 toUpdate.begin = newBegin;
330 toUpdate.duration = end - newBegin;
331 }
332
333 private void updateTimesEnd(Times toUpdate, long newEnd) {
334 toUpdate.duration = newEnd - toUpdate.begin;
335 }
336
337 private void saveCutMarks(MediaPackage mp, List<Times> cutMarks, MediaPackageElementFlavor targetFlavor)
338 throws WorkflowOperationException {
339 String jsonCutMarks = gson.toJson(cutMarks);
340
341 try {
342 InputStream cutMarksOut = IOUtils.toInputStream(jsonCutMarks, StandardCharsets.UTF_8);
343
344 MediaPackageElementBuilder mpeBuilder = MediaPackageElementBuilderFactory.newInstance().newElementBuilder();
345 MediaPackageElement mpe = mpeBuilder.newElement(MediaPackageElement.Type.Attachment, targetFlavor);
346 mpe.generateIdentifier();
347
348 URI cutMarksURI = workspace.put(mp.getIdentifier().toString(), mpe.getIdentifier(), TARGET_FILENAME, cutMarksOut);
349
350 mpe.setURI(cutMarksURI);
351
352 mp.add(mpe);
353 } catch (IOException e) {
354 throw new WorkflowOperationException("Couldn't write resulting cutMarks");
355 }
356 }
357
358 private WebVTTSubtitle readAndParseWebVTT(MediaPackage mp, MediaPackageElementFlavor sourceFlavor)
359 throws WorkflowOperationException {
360
361 SimpleElementSelector elementSelector = new SimpleElementSelector();
362 elementSelector.addFlavor(sourceFlavor);
363 Collection<MediaPackageElement> elements = elementSelector.select(mp, false);
364 MediaPackageElement[] webvttElements = elements.toArray(new MediaPackageElement[elements.size()]);
365 if (webvttElements.length != 1) {
366 throw new WorkflowOperationException("Couldn't uniqly identify WebVTT Element");
367 }
368 URI webvttURI = webvttElements[0].getURI();
369
370
371 InputStream webvttIS = null;
372 WebVTTSubtitle webvtt;
373 try {
374 webvttIS = workspace.read(webvttURI);
375 WebVTTParser wvparser = new WebVTTParser();
376
377 webvtt = wvparser.parse(webvttIS);
378 } catch (NullPointerException | IOException | NotFoundException e) {
379 throw new WorkflowOperationException("Couldn't open WebVTT file for parsing", e);
380 } catch (SubtitleParsingException e) {
381 throw new WorkflowOperationException("Failed to parse WebVTT File", e);
382 } finally {
383 try {
384 if (webvttIS != null) {
385 webvttIS.close();
386 } else {
387 logger.debug("WebVTT InputStream is null (mediapackage {})", mp.getIdentifier().toString());
388 }
389 } catch (IOException e) {
390 logger.warn("Couldn't close '{}' properly (mediapackage {})", webvttURI.toString(),
391 mp.getIdentifier().toString());
392 }
393 }
394
395 return webvtt;
396 }
397
398 private Optional<Long> getTrackDuration(MediaPackage mp, Optional<String> trackFlavor)
399 throws WorkflowOperationException {
400 if (trackFlavor.isPresent()) {
401 String flavor = trackFlavor.get();
402 Track[] tracks;
403 try {
404 tracks = mp.getTracks(MediaPackageElementFlavor.parseFlavor(flavor));
405 } catch (IllegalArgumentException e) {
406 throw new WorkflowOperationException("Couldn't parse " + CFGK_TRACK_FLAVOR, e);
407 }
408 if (tracks.length != 1) {
409 throw new WorkflowOperationException(
410 "Multiple tracks or no track found with flavor '"
411 + flavor + "' in mediapackage '"
412 + mp.getIdentifier().toString() + "', exactly one needed"
413 );
414 }
415 return Optional.ofNullable(tracks[0].getDuration());
416 }
417
418 return Optional.empty();
419 }
420
421 @Override
422 public void activate(ComponentContext cc) {
423 super.activate(cc);
424 logger.info("Registering webvtt-to-cutmarks workflow operation handler");
425 }
426
427 @Reference
428 public void setWorkspace(Workspace workspace) {
429 this.workspace = workspace;
430 }
431
432 @Reference
433 @Override
434 public void setServiceRegistry(ServiceRegistry serviceRegistry) {
435 super.setServiceRegistry(serviceRegistry);
436 }
437
438 }
439