1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.opencastproject.workflow.handler.workflow;
23
24 import org.opencastproject.job.api.JobContext;
25 import org.opencastproject.mediapackage.MediaPackage;
26 import org.opencastproject.mediapackage.MediaPackageElement;
27 import org.opencastproject.mediapackage.MediaPackageElementBuilder;
28 import org.opencastproject.mediapackage.MediaPackageElementBuilderFactory;
29 import org.opencastproject.mediapackage.MediaPackageElementFlavor;
30 import org.opencastproject.mediapackage.Track;
31 import org.opencastproject.mediapackage.selector.SimpleElementSelector;
32 import org.opencastproject.serviceregistry.api.ServiceRegistry;
33 import org.opencastproject.subtitleparser.SubtitleParsingException;
34 import org.opencastproject.subtitleparser.webvttparser.WebVTTParser;
35 import org.opencastproject.subtitleparser.webvttparser.WebVTTSubtitle;
36 import org.opencastproject.subtitleparser.webvttparser.WebVTTSubtitleCue;
37 import org.opencastproject.util.NotFoundException;
38 import org.opencastproject.workflow.api.AbstractWorkflowOperationHandler;
39 import org.opencastproject.workflow.api.ConfiguredTagsAndFlavors;
40 import org.opencastproject.workflow.api.WorkflowInstance;
41 import org.opencastproject.workflow.api.WorkflowOperationException;
42 import org.opencastproject.workflow.api.WorkflowOperationHandler;
43 import org.opencastproject.workflow.api.WorkflowOperationResult;
44 import org.opencastproject.workflow.api.WorkflowOperationResult.Action;
45 import org.opencastproject.workspace.api.Workspace;
46
47 import com.entwinemedia.fn.data.Opt;
48 import com.google.gson.Gson;
49
50 import org.apache.commons.io.IOUtils;
51 import org.osgi.service.component.ComponentContext;
52 import org.osgi.service.component.annotations.Component;
53 import org.osgi.service.component.annotations.Reference;
54 import org.slf4j.Logger;
55 import org.slf4j.LoggerFactory;
56
57 import java.io.IOException;
58 import java.io.InputStream;
59 import java.net.URI;
60 import java.nio.charset.StandardCharsets;
61 import java.util.ArrayList;
62 import java.util.Collection;
63 import java.util.List;
64 import java.util.UUID;
65
66
67
68
69 @Component(
70 immediate = true,
71 service = WorkflowOperationHandler.class,
72 property = {
73 "service.description=Processes a WebVTT subtitle document into CutMarks for the editor",
74 "workflow.operation=webvtt-to-cutmarks"
75 }
76 )
77 public class WebvttToCutMarksWorkflowOperationHandler extends AbstractWorkflowOperationHandler {
78
79
80 private static final Logger logger = LoggerFactory.getLogger(WebvttToCutMarksWorkflowOperationHandler.class);
81
82
83
84
85
86 private static final String CFGK_MIN_TIME_SILENCE_IN_MS = "min-time-silence-in-ms";
87 private static final String CFGK_MIN_TIME_SILENCE_IN_MS_DEFAULT = "0";
88
89
90 private static final String CFGK_BUFFER_AROUND_SUBTITLE_IN_MS = "buffer-time-around-subtitle";
91 private static final String CFGK_BUFFER_AROUND_SUBTITLE_IN_MS_DEFAULT = "0";
92
93
94 private static final String CFGK_TRACK_FLAVOR = "track-flavor";
95
96
97 private static final String CFGK_MIN_TIME_SILENCE_TREATMENT_START = "start-treatment";
98 private static final String CFGK_MIN_TIME_SILENCE_TREATMENT_START_DEFAULT = "IGNORE";
99
100 private static final String CFGK_MIN_TIME_SILENCE_TREATMENT_END = "end-treatment";
101 private static final String CFGK_MIN_TIME_SILENCE_TREATMENT_END_DEFAULT = "IGNORE";
102
103
104 private static final String TARGET_FILENAME = "cut-marks.json";
105
106 private static final Gson gson = new Gson();
107
108 private static class WFConfiguration {
109 protected long minTimeSilenceInMS;
110 protected long bufferTime;
111 protected MediaPackageElementFlavor sourceFlavor;
112 protected MediaPackageElementFlavor targetFlavor;
113 protected Opt<String> trackFlavor;
114 protected Treatment treatmentStart;
115 protected Treatment treatmentEnd;
116 }
117
118 private static class Times {
119 private Long begin;
120 private Long duration;
121 }
122
123
124 private enum Treatment {
125 IGNORE,
126 USE_FOR_MIN_TIME,
127 ALWAYS_INCLUDE
128 }
129
130
131 private Workspace workspace;
132
133 @Override
134 public WorkflowOperationResult start(WorkflowInstance workflowInstance, JobContext context)
135 throws WorkflowOperationException {
136
137 MediaPackage mp = workflowInstance.getMediaPackage();
138 logger.debug("Start WebVTT to CutMarks operation for mediapackage {}", mp.getIdentifier().toString());
139
140
141 WFConfiguration config = readConfiguration(workflowInstance);
142
143
144 WebVTTSubtitle webvtt = readAndParseWebVTT(mp, config.sourceFlavor);
145
146
147 Opt<Long> trackDuration = getTrackDuration(mp, config.trackFlavor);
148
149
150 List<Times> cutMarks = processWebVTTIntoCutPoints(
151 webvtt,
152 config.minTimeSilenceInMS,
153 config.bufferTime,
154 trackDuration,
155 config.treatmentStart,
156 config.treatmentEnd
157 );
158
159 saveCutMarks(mp, cutMarks, config.targetFlavor);
160
161 return createResult(mp, Action.CONTINUE);
162 }
163
164 private WFConfiguration readConfiguration(WorkflowInstance workflowInstance)
165 throws WorkflowOperationException {
166 ConfiguredTagsAndFlavors tagsAndFlavors = getTagsAndFlavors(workflowInstance,
167 Configuration.none, Configuration.one, Configuration.none, Configuration.one);
168 MediaPackageElementFlavor sourceFlavor = tagsAndFlavors.getSingleSrcFlavor();
169 MediaPackageElementFlavor targetFlavor = tagsAndFlavors.getSingleTargetFlavor();
170
171 long minTimeSilenceInMS;
172 long bufferTime;
173 try {
174 minTimeSilenceInMS = Long.parseLong(
175 getConfig(workflowInstance, CFGK_MIN_TIME_SILENCE_IN_MS, CFGK_MIN_TIME_SILENCE_IN_MS_DEFAULT)
176 );
177 bufferTime = Long.parseLong(
178 getConfig(workflowInstance, CFGK_BUFFER_AROUND_SUBTITLE_IN_MS, CFGK_BUFFER_AROUND_SUBTITLE_IN_MS_DEFAULT)
179 );
180
181 if (minTimeSilenceInMS < 0 || bufferTime < 0) {
182 throw new NumberFormatException("Negative Integer, must be positive");
183 }
184 } catch (NumberFormatException error) {
185 throw new WorkflowOperationException(
186 CFGK_MIN_TIME_SILENCE_IN_MS + " and " + CFGK_BUFFER_AROUND_SUBTITLE_IN_MS + "must be a postive integer",
187 error
188 );
189 }
190 if (minTimeSilenceInMS < 2 * bufferTime) {
191 throw new WorkflowOperationException(
192 CFGK_MIN_TIME_SILENCE_IN_MS + " must be at least double the value of "
193 + CFGK_BUFFER_AROUND_SUBTITLE_IN_MS
194 );
195 }
196
197 Opt<String> trackFlavor = getOptConfig(workflowInstance, CFGK_TRACK_FLAVOR);
198
199 String treatmentStrStart = getConfig(
200 workflowInstance,
201 CFGK_MIN_TIME_SILENCE_TREATMENT_START,
202 CFGK_MIN_TIME_SILENCE_TREATMENT_START_DEFAULT
203 );
204 String treatmentStrEnd = getConfig(
205 workflowInstance,
206 CFGK_MIN_TIME_SILENCE_TREATMENT_END,
207 CFGK_MIN_TIME_SILENCE_TREATMENT_END_DEFAULT
208 );
209 Treatment treatmentStart;
210 Treatment treatmentEnd;
211 try {
212 treatmentStart = Treatment.valueOf(treatmentStrStart);
213 treatmentEnd = Treatment.valueOf(treatmentStrEnd);
214 } catch (IllegalArgumentException error) {
215 throw new WorkflowOperationException(
216 CFGK_MIN_TIME_SILENCE_TREATMENT_START + " and "
217 + CFGK_MIN_TIME_SILENCE_TREATMENT_END
218 + " must be one of the values IGNORE, USE_FOR_MIN_TIME, ALWAYS_INCLUDE",
219 error
220 );
221 }
222 if (treatmentEnd != Treatment.IGNORE && trackFlavor.isEmpty()) {
223 throw new WorkflowOperationException(
224 CFGK_TRACK_FLAVOR + " is not defined, but "
225 + CFGK_MIN_TIME_SILENCE_TREATMENT_END + " is not set to IGNORE, therefore a "
226 + CFGK_TRACK_FLAVOR + " is needed"
227 );
228 }
229
230 WFConfiguration config = new WFConfiguration();
231
232 config.minTimeSilenceInMS = minTimeSilenceInMS;
233 config.bufferTime = bufferTime;
234 config.sourceFlavor = sourceFlavor;
235 config.targetFlavor = targetFlavor;
236 config.trackFlavor = trackFlavor;
237 config.treatmentStart = treatmentStart;
238 config.treatmentEnd = treatmentEnd;
239
240 return config;
241 }
242
243 private List<Times> processWebVTTIntoCutPoints(
244 WebVTTSubtitle webvtt,
245 long minTimeSilenceInMS,
246 long bufferTime,
247 Opt<Long> trackDuration,
248 Treatment treatmentStart,
249 Treatment treatmentEnd
250 ) {
251 List<Times> cutMarks = new ArrayList<Times>();
252 List<WebVTTSubtitleCue> cues = webvtt.getCues();
253 if (cues.size() > 0) {
254 WebVTTSubtitleCue firstCue = cues.remove(0);
255
256 long oldMarkStart = firstCue.getStartTime();
257 long oldMarkEnd = firstCue.getEndTime();
258
259 for (WebVTTSubtitleCue cue : webvtt.getCues()) {
260 long newMarkStart = cue.getStartTime();
261 long newMarkEnd = cue.getEndTime();
262
263
264 if (newMarkStart - oldMarkEnd > minTimeSilenceInMS) {
265
266 Times oldMark = new Times();
267
268 oldMark.begin = oldMarkStart - bufferTime;
269 oldMark.duration = oldMarkEnd - oldMark.begin + bufferTime;
270 cutMarks.add(oldMark);
271
272
273 oldMarkStart = newMarkStart;
274 oldMarkEnd = newMarkEnd;
275 } else if (newMarkEnd > oldMarkEnd) {
276
277 oldMarkEnd = newMarkEnd;
278 }
279 }
280
281
282 Times lastMark = new Times();
283
284 lastMark.begin = oldMarkStart - bufferTime;
285 lastMark.duration = oldMarkEnd - lastMark.begin + bufferTime;
286 cutMarks.add(lastMark);
287
288
289
290
291
292
293 Times firstCutMark = cutMarks.get(0);
294 if (treatmentStart == Treatment.ALWAYS_INCLUDE) {
295 updateTimesBegin(firstCutMark, 0L);
296 } else if (treatmentStart == Treatment.USE_FOR_MIN_TIME) {
297 if ((firstCutMark.begin + bufferTime) - 0L <= minTimeSilenceInMS) {
298 updateTimesBegin(firstCutMark, 0L);
299 }
300 } else if (treatmentStart == Treatment.IGNORE) {
301 if (firstCutMark.begin < 0) {
302 updateTimesBegin(firstCutMark, 0L);
303 }
304 }
305 if (trackDuration.isDefined()) {
306 long trackDur = trackDuration.get();
307 Times lastCutMark = cutMarks.get(cutMarks.size() - 1);
308 if (treatmentEnd == Treatment.ALWAYS_INCLUDE) {
309 updateTimesEnd(lastCutMark, trackDur);
310 } else if (treatmentEnd == Treatment.USE_FOR_MIN_TIME) {
311 if (trackDur - (lastCutMark.begin + lastCutMark.duration - bufferTime) <= minTimeSilenceInMS) {
312 updateTimesEnd(lastCutMark, trackDur);
313 }
314 } else if (treatmentEnd == Treatment.IGNORE) {
315
316 if (lastCutMark.begin + lastCutMark.duration > trackDur) {
317 updateTimesEnd(lastCutMark, trackDur);
318 }
319 }
320 }
321 }
322
323 return cutMarks;
324 }
325
326
327 private void updateTimesBegin(Times toUpdate, long newBegin) {
328 Long end = toUpdate.begin + toUpdate.duration;
329 toUpdate.begin = newBegin;
330 toUpdate.duration = end - newBegin;
331 }
332
333 private void updateTimesEnd(Times toUpdate, long newEnd) {
334 toUpdate.duration = newEnd - toUpdate.begin;
335 }
336
337 private void saveCutMarks(MediaPackage mp, List<Times> cutMarks, MediaPackageElementFlavor targetFlavor)
338 throws WorkflowOperationException {
339 String jsonCutMarks = gson.toJson(cutMarks);
340
341 try {
342 InputStream cutMarksOut = IOUtils.toInputStream(jsonCutMarks, StandardCharsets.UTF_8);
343
344 MediaPackageElementBuilder mpeBuilder = MediaPackageElementBuilderFactory.newInstance().newElementBuilder();
345 MediaPackageElement mpe = mpeBuilder.newElement(MediaPackageElement.Type.Attachment, targetFlavor);
346 mpe.setIdentifier(UUID.randomUUID().toString());
347
348 URI cutMarksURI = workspace.put(mp.getIdentifier().toString(), mpe.getIdentifier(), TARGET_FILENAME, cutMarksOut);
349
350 mpe.setURI(cutMarksURI);
351
352 mp.add(mpe);
353 } catch (IOException e) {
354 throw new WorkflowOperationException("Couldn't write resulting cutMarks");
355 }
356 }
357
358 private WebVTTSubtitle readAndParseWebVTT(MediaPackage mp, MediaPackageElementFlavor sourceFlavor)
359 throws WorkflowOperationException {
360
361 SimpleElementSelector elementSelector = new SimpleElementSelector();
362 elementSelector.addFlavor(sourceFlavor);
363 Collection<MediaPackageElement> elements = elementSelector.select(mp, false);
364 MediaPackageElement[] webvttElements = elements.toArray(new MediaPackageElement[elements.size()]);
365 if (webvttElements.length != 1) {
366 throw new WorkflowOperationException("Couldn't uniqly identify WebVTT Element");
367 }
368 URI webvttURI = webvttElements[0].getURI();
369
370
371 InputStream webvttIS = null;
372 WebVTTSubtitle webvtt;
373 try {
374 webvttIS = workspace.read(webvttURI);
375 WebVTTParser wvparser = new WebVTTParser();
376
377 webvtt = wvparser.parse(webvttIS);
378 } catch (NullPointerException | IOException | NotFoundException e) {
379 throw new WorkflowOperationException("Couldn't open WebVTT file for parsing", e);
380 } catch (SubtitleParsingException e) {
381 throw new WorkflowOperationException("Failed to parse WebVTT File", e);
382 } finally {
383 try {
384 if (webvttIS != null) {
385 webvttIS.close();
386 } else {
387 logger.debug("WebVTT InputStream is null (mediapackage {})", mp.getIdentifier().toString());
388 }
389 } catch (IOException e) {
390 logger.warn("Couldn't close '{}' properly (mediapackage {})", webvttURI.toString(), mp.getIdentifier().toString());
391 }
392 }
393
394 return webvtt;
395 }
396
397 private Opt<Long> getTrackDuration(MediaPackage mp, Opt<String> trackFlavor)
398 throws WorkflowOperationException {
399 if (trackFlavor.isDefined()) {
400 String flavor = trackFlavor.get();
401 Track[] tracks;
402 try {
403 tracks = mp.getTracks(MediaPackageElementFlavor.parseFlavor(flavor));
404 } catch (IllegalArgumentException e) {
405 throw new WorkflowOperationException("Couldn't parse " + CFGK_TRACK_FLAVOR, e);
406 }
407 if (tracks.length != 1) {
408 throw new WorkflowOperationException(
409 "Multiple tracks or no track found with flavor '"
410 + flavor + "' in mediapackage '"
411 + mp.getIdentifier().toString() + "', exactly one needed"
412 );
413 }
414 return Opt.nul(tracks[0].getDuration());
415 }
416
417 return Opt.none();
418 }
419
420 @Override
421 public void activate(ComponentContext cc) {
422 super.activate(cc);
423 logger.info("Registering webvtt-to-cutmarks workflow operation handler");
424 }
425
426 @Reference
427 public void setWorkspace(Workspace workspace) {
428 this.workspace = workspace;
429 }
430
431 @Reference
432 @Override
433 public void setServiceRegistry(ServiceRegistry serviceRegistry) {
434 super.setServiceRegistry(serviceRegistry);
435 }
436
437 }
438