View Javadoc
1   /*
2    * Licensed to The Apereo Foundation under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional
4    * information regarding copyright ownership.
5    *
6    *
7    * The Apereo Foundation licenses this file to you under the Educational
8    * Community License, Version 2.0 (the "License"); you may not use this file
9    * except in compliance with the License. You may obtain a copy of the License
10   * at:
11   *
12   *   http://opensource.org/licenses/ecl2.txt
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
17   * License for the specific language governing permissions and limitations under
18   * the License.
19   *
20   */
21  
22  package org.opencastproject.caption.converters;
23  
24  import org.opencastproject.caption.api.Caption;
25  import org.opencastproject.caption.api.CaptionConverter;
26  import org.opencastproject.caption.api.CaptionConverterException;
27  import org.opencastproject.caption.api.IllegalTimeFormatException;
28  import org.opencastproject.caption.api.Time;
29  import org.opencastproject.caption.impl.CaptionImpl;
30  import org.opencastproject.caption.impl.TimeImpl;
31  import org.opencastproject.mediapackage.MediaPackageElement;
32  import org.opencastproject.mediapackage.MediaPackageElement.Type;
33  import org.opencastproject.metadata.mpeg7.Audio;
34  import org.opencastproject.metadata.mpeg7.AudioSegment;
35  import org.opencastproject.metadata.mpeg7.FreeTextAnnotation;
36  import org.opencastproject.metadata.mpeg7.FreeTextAnnotationImpl;
37  import org.opencastproject.metadata.mpeg7.MediaDuration;
38  import org.opencastproject.metadata.mpeg7.MediaTime;
39  import org.opencastproject.metadata.mpeg7.MediaTimeImpl;
40  import org.opencastproject.metadata.mpeg7.MediaTimePoint;
41  import org.opencastproject.metadata.mpeg7.Mpeg7Catalog;
42  import org.opencastproject.metadata.mpeg7.Mpeg7CatalogImpl;
43  import org.opencastproject.metadata.mpeg7.TemporalDecomposition;
44  import org.opencastproject.metadata.mpeg7.TextAnnotation;
45  import org.opencastproject.util.XmlSafeParser;
46  
47  import org.osgi.service.component.annotations.Component;
48  import org.slf4j.Logger;
49  import org.slf4j.LoggerFactory;
50  
51  import java.io.IOException;
52  import java.io.InputStream;
53  import java.io.OutputStream;
54  import java.util.ArrayList;
55  import java.util.Calendar;
56  import java.util.HashSet;
57  import java.util.Iterator;
58  import java.util.List;
59  import java.util.Set;
60  import java.util.TimeZone;
61  
62  import javax.xml.parsers.ParserConfigurationException;
63  import javax.xml.transform.Transformer;
64  import javax.xml.transform.TransformerConfigurationException;
65  import javax.xml.transform.TransformerException;
66  import javax.xml.transform.TransformerFactoryConfigurationError;
67  import javax.xml.transform.dom.DOMSource;
68  import javax.xml.transform.stream.StreamResult;
69  
70  /**
71   * This is converter for Mpeg7 caption format.
72   */
73  @Component(
74      immediate = true,
75      service = { CaptionConverter.class },
76      property = {
77          "service.description=Mpeg7 caption converter",
78          "caption.format=mpeg7"
79      }
80  )
81  public class Mpeg7CaptionConverter implements CaptionConverter {
82  
83    /** File extension for mpeg 7 catalogs */
84    private static final String EXTENSION = "xml";
85  
86    /** The logger */
87    private static final Logger logger = LoggerFactory.getLogger(Mpeg7CaptionConverter.class);
88  
89    /**
90     * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
91     */
92    @SuppressWarnings("unchecked")
93    @Override
94    public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
95      List<Caption> captions = new ArrayList<Caption>();
96      Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
97      Iterator<Audio> audioContentIterator = catalog.audioContent();
98      if (audioContentIterator == null)
99        return captions;
100     content: while (audioContentIterator.hasNext()) {
101       Audio audioContent = audioContentIterator.next();
102       TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent
103               .getTemporalDecomposition();
104       Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
105       if (audioSegmentIterator == null)
106         continue content;
107       while (audioSegmentIterator.hasNext()) {
108         AudioSegment segment = audioSegmentIterator.next();
109         Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
110         if (annotationIterator == null)
111           continue content;
112         while (annotationIterator.hasNext()) {
113           TextAnnotation annotation = annotationIterator.next();
114           if (!annotation.getLanguage().equals(language)) {
115             logger.debug("Skipping audio content '{}' because of language mismatch", audioContent.getId());
116             continue content;
117           }
118 
119           List<String> captionLines = new ArrayList<String>();
120           Iterator<FreeTextAnnotation> freeTextAnnotationIterator = annotation.freeTextAnnotations();
121           if (freeTextAnnotationIterator == null)
122             continue;
123 
124           while (freeTextAnnotationIterator.hasNext()) {
125             FreeTextAnnotation freeTextAnnotation = freeTextAnnotationIterator.next();
126             captionLines.add(freeTextAnnotation.getText());
127           }
128 
129           MediaTime segmentTime = segment.getMediaTime();
130           MediaTimePoint stp = segmentTime.getMediaTimePoint();
131           MediaDuration d = segmentTime.getMediaDuration();
132 
133           Calendar startCalendar = Calendar.getInstance();
134           int millisAtStart = (int) (stp.getTimeInMilliseconds() - (((stp.getHour() * 60 + stp.getMinutes()) * 60 + stp
135                   .getSeconds()) * 1000));
136           int millisAtEnd = (int) (d.getDurationInMilliseconds() - (((d.getHours() * 60 + d.getMinutes()) * 60 + d
137                   .getSeconds()) * 1000));
138 
139           startCalendar.set(Calendar.HOUR, stp.getHour());
140           startCalendar.set(Calendar.MINUTE, stp.getMinutes());
141           startCalendar.set(Calendar.SECOND, stp.getSeconds());
142           startCalendar.set(Calendar.MILLISECOND, millisAtStart);
143 
144           startCalendar.add(Calendar.HOUR, d.getHours());
145           startCalendar.add(Calendar.MINUTE, d.getMinutes());
146           startCalendar.add(Calendar.SECOND, d.getSeconds());
147           startCalendar.set(Calendar.MILLISECOND, millisAtEnd);
148 
149           try {
150             Time startTime = new TimeImpl(stp.getHour(), stp.getMinutes(), stp.getSeconds(), millisAtStart);
151             Time endTime = new TimeImpl(startCalendar.get(Calendar.HOUR), startCalendar.get(Calendar.MINUTE),
152                     startCalendar.get(Calendar.SECOND), startCalendar.get(Calendar.MILLISECOND));
153             Caption caption = new CaptionImpl(startTime, endTime, captionLines.toArray(new String[captionLines.size()]));
154             captions.add(caption);
155           } catch (IllegalTimeFormatException e) {
156             logger.warn("Error setting caption time: {}", e.getMessage());
157           }
158         }
159       }
160     }
161 
162     return captions;
163   }
164 
165   @Override
166   public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
167 
168     Mpeg7Catalog mpeg7 = Mpeg7CatalogImpl.newInstance();
169 
170     MediaTime mediaTime = new MediaTimeImpl(0, 0);
171     Audio audioContent = mpeg7.addAudioContent("captions", mediaTime, null);
172     @SuppressWarnings("unchecked")
173     TemporalDecomposition<AudioSegment> captionDecomposition = (TemporalDecomposition<AudioSegment>) audioContent
174             .getTemporalDecomposition();
175 
176     int segmentCount = 0;
177     for (Caption caption : captions) {
178 
179       // Get all the words/parts for the transcript
180       String[] words = caption.getCaption();
181       if (words.length == 0)
182         continue;
183 
184       // Create a new segment
185       AudioSegment segment = captionDecomposition.createSegment("segment-" + segmentCount++);
186 
187       Time captionST = caption.getStartTime();
188       Time captionET = caption.getStopTime();
189 
190       // Calculate start time
191       Calendar startTime = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
192       startTime.setTimeInMillis(0);
193       startTime.add(Calendar.HOUR_OF_DAY, captionST.getHours());
194       startTime.add(Calendar.MINUTE, captionST.getMinutes());
195       startTime.add(Calendar.SECOND, captionST.getSeconds());
196       startTime.add(Calendar.MILLISECOND, captionST.getMilliseconds());
197 
198       // Calculate end time
199       Calendar endTime = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
200       endTime.setTimeInMillis(0);
201       endTime.add(Calendar.HOUR_OF_DAY, captionET.getHours());
202       endTime.add(Calendar.MINUTE, captionET.getMinutes());
203       endTime.add(Calendar.SECOND, captionET.getSeconds());
204       endTime.add(Calendar.MILLISECOND, captionET.getMilliseconds());
205 
206       long startTimeInMillis = startTime.getTimeInMillis();
207       long endTimeInMillis = endTime.getTimeInMillis();
208 
209       long duration = endTimeInMillis - startTimeInMillis;
210 
211       segment.setMediaTime(new MediaTimeImpl(startTimeInMillis, duration));
212       TextAnnotation textAnnotation = segment.createTextAnnotation(0, 0, language);
213 
214       // Collect all the words in the segment
215       StringBuffer captionLine = new StringBuffer();
216 
217       // Add each words/parts as segment to the catalog
218       for (String word : words) {
219         if (captionLine.length() > 0)
220           captionLine.append(' ');
221         captionLine.append(word);
222       }
223 
224       // Append the text to the annotation
225       textAnnotation.addFreeTextAnnotation(new FreeTextAnnotationImpl(captionLine.toString()));
226 
227     }
228 
229     Transformer tf = null;
230     try {
231       tf = XmlSafeParser.newTransformerFactory().newTransformer();
232       DOMSource xmlSource = new DOMSource(mpeg7.toXml());
233       tf.transform(xmlSource, new StreamResult(outputStream));
234     } catch (TransformerConfigurationException e) {
235       logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
236       throw new IOException(e);
237     } catch (TransformerFactoryConfigurationError e) {
238       logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
239       throw new IOException(e);
240     } catch (TransformerException e) {
241       logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
242       throw new IOException(e);
243     } catch (ParserConfigurationException e) {
244       logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
245       throw new IOException(e);
246     }
247   }
248 
249   /**
250    * @see org.opencastproject.caption.api.CaptionConverter#getLanguageList(java.io.InputStream)
251    */
252   @SuppressWarnings("unchecked")
253   @Override
254   public String[] getLanguageList(InputStream inputStream) throws CaptionConverterException {
255     Set<String> languages = new HashSet<String>();
256 
257     Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
258     Iterator<Audio> audioContentIterator = catalog.audioContent();
259     if (audioContentIterator == null)
260       return languages.toArray(new String[languages.size()]);
261     content: while (audioContentIterator.hasNext()) {
262       Audio audioContent = audioContentIterator.next();
263       TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent
264               .getTemporalDecomposition();
265       Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
266       if (audioSegmentIterator == null)
267         continue content;
268       while (audioSegmentIterator.hasNext()) {
269         AudioSegment segment = audioSegmentIterator.next();
270         Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
271         if (annotationIterator == null)
272           continue content;
273         while (annotationIterator.hasNext()) {
274           TextAnnotation annotation = annotationIterator.next();
275           String language = annotation.getLanguage();
276           if (language != null)
277             languages.add(language);
278         }
279       }
280     }
281 
282     return languages.toArray(new String[languages.size()]);
283   }
284 
285   /**
286    * @see org.opencastproject.caption.api.CaptionConverter#getExtension()
287    */
288   @Override
289   public String getExtension() {
290     return EXTENSION;
291   }
292 
293   @Override
294   public Type getElementType() {
295     return MediaPackageElement.Type.Catalog;
296   }
297 }