View Javadoc
1   /*
2    * Licensed to The Apereo Foundation under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional
4    * information regarding copyright ownership.
5    *
6    *
7    * The Apereo Foundation licenses this file to you under the Educational
8    * Community License, Version 2.0 (the "License"); you may not use this file
9    * except in compliance with the License. You may obtain a copy of the License
10   * at:
11   *
12   *   http://opensource.org/licenses/ecl2.txt
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
17   * License for the specific language governing permissions and limitations under
18   * the License.
19   *
20   */
21  
22  package org.opencastproject.caption.converters;
23  
24  import org.opencastproject.caption.api.Caption;
25  import org.opencastproject.caption.api.CaptionConverter;
26  import org.opencastproject.caption.api.CaptionConverterException;
27  import org.opencastproject.caption.api.IllegalTimeFormatException;
28  import org.opencastproject.caption.api.Time;
29  import org.opencastproject.caption.impl.CaptionImpl;
30  import org.opencastproject.caption.impl.TimeImpl;
31  import org.opencastproject.mediapackage.MediaPackageElement;
32  import org.opencastproject.mediapackage.MediaPackageElement.Type;
33  import org.opencastproject.metadata.mpeg7.Audio;
34  import org.opencastproject.metadata.mpeg7.AudioSegment;
35  import org.opencastproject.metadata.mpeg7.FreeTextAnnotation;
36  import org.opencastproject.metadata.mpeg7.FreeTextAnnotationImpl;
37  import org.opencastproject.metadata.mpeg7.MediaDuration;
38  import org.opencastproject.metadata.mpeg7.MediaTime;
39  import org.opencastproject.metadata.mpeg7.MediaTimeImpl;
40  import org.opencastproject.metadata.mpeg7.MediaTimePoint;
41  import org.opencastproject.metadata.mpeg7.Mpeg7Catalog;
42  import org.opencastproject.metadata.mpeg7.Mpeg7CatalogImpl;
43  import org.opencastproject.metadata.mpeg7.TemporalDecomposition;
44  import org.opencastproject.metadata.mpeg7.TextAnnotation;
45  import org.opencastproject.util.XmlSafeParser;
46  
47  import org.osgi.service.component.annotations.Component;
48  import org.slf4j.Logger;
49  import org.slf4j.LoggerFactory;
50  
51  import java.io.IOException;
52  import java.io.InputStream;
53  import java.io.OutputStream;
54  import java.util.ArrayList;
55  import java.util.Calendar;
56  import java.util.HashSet;
57  import java.util.Iterator;
58  import java.util.List;
59  import java.util.Set;
60  import java.util.TimeZone;
61  
62  import javax.xml.parsers.ParserConfigurationException;
63  import javax.xml.transform.Transformer;
64  import javax.xml.transform.TransformerConfigurationException;
65  import javax.xml.transform.TransformerException;
66  import javax.xml.transform.TransformerFactoryConfigurationError;
67  import javax.xml.transform.dom.DOMSource;
68  import javax.xml.transform.stream.StreamResult;
69  
70  /**
71   * This is converter for Mpeg7 caption format.
72   */
73  @Component(
74      immediate = true,
75      service = { CaptionConverter.class },
76      property = {
77          "service.description=Mpeg7 caption converter",
78          "caption.format=mpeg7"
79      }
80  )
81  public class Mpeg7CaptionConverter implements CaptionConverter {
82  
83    /** File extension for mpeg 7 catalogs */
84    private static final String EXTENSION = "xml";
85  
86    /** The logger */
87    private static final Logger logger = LoggerFactory.getLogger(Mpeg7CaptionConverter.class);
88  
89    /**
90     * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
91     */
92    @SuppressWarnings("unchecked")
93    @Override
94    public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
95      List<Caption> captions = new ArrayList<Caption>();
96      Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
97      Iterator<Audio> audioContentIterator = catalog.audioContent();
98      if (audioContentIterator == null) {
99        return captions;
100     }
101     content: while (audioContentIterator.hasNext()) {
102       Audio audioContent = audioContentIterator.next();
103       TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent
104               .getTemporalDecomposition();
105       Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
106       if (audioSegmentIterator == null) {
107         continue content;
108       }
109       while (audioSegmentIterator.hasNext()) {
110         AudioSegment segment = audioSegmentIterator.next();
111         Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
112         if (annotationIterator == null) {
113           continue content;
114         }
115         while (annotationIterator.hasNext()) {
116           TextAnnotation annotation = annotationIterator.next();
117           if (!annotation.getLanguage().equals(language)) {
118             logger.debug("Skipping audio content '{}' because of language mismatch", audioContent.getId());
119             continue content;
120           }
121 
122           List<String> captionLines = new ArrayList<String>();
123           Iterator<FreeTextAnnotation> freeTextAnnotationIterator = annotation.freeTextAnnotations();
124           if (freeTextAnnotationIterator == null) {
125             continue;
126           }
127 
128           while (freeTextAnnotationIterator.hasNext()) {
129             FreeTextAnnotation freeTextAnnotation = freeTextAnnotationIterator.next();
130             captionLines.add(freeTextAnnotation.getText());
131           }
132 
133           MediaTime segmentTime = segment.getMediaTime();
134           MediaTimePoint stp = segmentTime.getMediaTimePoint();
135           MediaDuration d = segmentTime.getMediaDuration();
136 
137           Calendar startCalendar = Calendar.getInstance();
138           int millisAtStart = (int) (stp.getTimeInMilliseconds() - (((stp.getHour() * 60 + stp.getMinutes()) * 60 + stp
139                   .getSeconds()) * 1000));
140           int millisAtEnd = (int) (d.getDurationInMilliseconds() - (((d.getHours() * 60 + d.getMinutes()) * 60 + d
141                   .getSeconds()) * 1000));
142 
143           startCalendar.set(Calendar.HOUR, stp.getHour());
144           startCalendar.set(Calendar.MINUTE, stp.getMinutes());
145           startCalendar.set(Calendar.SECOND, stp.getSeconds());
146           startCalendar.set(Calendar.MILLISECOND, millisAtStart);
147 
148           startCalendar.add(Calendar.HOUR, d.getHours());
149           startCalendar.add(Calendar.MINUTE, d.getMinutes());
150           startCalendar.add(Calendar.SECOND, d.getSeconds());
151           startCalendar.set(Calendar.MILLISECOND, millisAtEnd);
152 
153           try {
154             Time startTime = new TimeImpl(stp.getHour(), stp.getMinutes(), stp.getSeconds(), millisAtStart);
155             Time endTime = new TimeImpl(startCalendar.get(Calendar.HOUR), startCalendar.get(Calendar.MINUTE),
156                     startCalendar.get(Calendar.SECOND), startCalendar.get(Calendar.MILLISECOND));
157             Caption caption = new CaptionImpl(startTime, endTime, captionLines.toArray(
158                 new String[captionLines.size()]));
159             captions.add(caption);
160           } catch (IllegalTimeFormatException e) {
161             logger.warn("Error setting caption time: {}", e.getMessage());
162           }
163         }
164       }
165     }
166 
167     return captions;
168   }
169 
170   @Override
171   public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
172 
173     Mpeg7Catalog mpeg7 = Mpeg7CatalogImpl.newInstance();
174 
175     MediaTime mediaTime = new MediaTimeImpl(0, 0);
176     Audio audioContent = mpeg7.addAudioContent("captions", mediaTime, null);
177     @SuppressWarnings("unchecked")
178     TemporalDecomposition<AudioSegment> captionDecomposition = (TemporalDecomposition<AudioSegment>) audioContent
179             .getTemporalDecomposition();
180 
181     int segmentCount = 0;
182     for (Caption caption : captions) {
183 
184       // Get all the words/parts for the transcript
185       String[] words = caption.getCaption();
186       if (words.length == 0) {
187         continue;
188       }
189 
190       // Create a new segment
191       AudioSegment segment = captionDecomposition.createSegment("segment-" + segmentCount++);
192 
193       Time captionST = caption.getStartTime();
194       Time captionET = caption.getStopTime();
195 
196       // Calculate start time
197       Calendar startTime = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
198       startTime.setTimeInMillis(0);
199       startTime.add(Calendar.HOUR_OF_DAY, captionST.getHours());
200       startTime.add(Calendar.MINUTE, captionST.getMinutes());
201       startTime.add(Calendar.SECOND, captionST.getSeconds());
202       startTime.add(Calendar.MILLISECOND, captionST.getMilliseconds());
203 
204       // Calculate end time
205       Calendar endTime = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
206       endTime.setTimeInMillis(0);
207       endTime.add(Calendar.HOUR_OF_DAY, captionET.getHours());
208       endTime.add(Calendar.MINUTE, captionET.getMinutes());
209       endTime.add(Calendar.SECOND, captionET.getSeconds());
210       endTime.add(Calendar.MILLISECOND, captionET.getMilliseconds());
211 
212       long startTimeInMillis = startTime.getTimeInMillis();
213       long endTimeInMillis = endTime.getTimeInMillis();
214 
215       long duration = endTimeInMillis - startTimeInMillis;
216 
217       segment.setMediaTime(new MediaTimeImpl(startTimeInMillis, duration));
218       TextAnnotation textAnnotation = segment.createTextAnnotation(0, 0, language);
219 
220       // Collect all the words in the segment
221       StringBuffer captionLine = new StringBuffer();
222 
223       // Add each words/parts as segment to the catalog
224       for (String word : words) {
225         if (captionLine.length() > 0) {
226           captionLine.append(' ');
227         }
228         captionLine.append(word);
229       }
230 
231       // Append the text to the annotation
232       textAnnotation.addFreeTextAnnotation(new FreeTextAnnotationImpl(captionLine.toString()));
233 
234     }
235 
236     Transformer tf = null;
237     try {
238       tf = XmlSafeParser.newTransformerFactory().newTransformer();
239       DOMSource xmlSource = new DOMSource(mpeg7.toXml());
240       tf.transform(xmlSource, new StreamResult(outputStream));
241     } catch (TransformerConfigurationException e) {
242       logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
243       throw new IOException(e);
244     } catch (TransformerFactoryConfigurationError e) {
245       logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
246       throw new IOException(e);
247     } catch (TransformerException e) {
248       logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
249       throw new IOException(e);
250     } catch (ParserConfigurationException e) {
251       logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
252       throw new IOException(e);
253     }
254   }
255 
256   /**
257    * @see org.opencastproject.caption.api.CaptionConverter#getLanguageList(java.io.InputStream)
258    */
259   @SuppressWarnings("unchecked")
260   @Override
261   public String[] getLanguageList(InputStream inputStream) throws CaptionConverterException {
262     Set<String> languages = new HashSet<String>();
263 
264     Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
265     Iterator<Audio> audioContentIterator = catalog.audioContent();
266     if (audioContentIterator == null) {
267       return languages.toArray(new String[languages.size()]);
268     }
269     content: while (audioContentIterator.hasNext()) {
270       Audio audioContent = audioContentIterator.next();
271       TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent
272               .getTemporalDecomposition();
273       Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
274       if (audioSegmentIterator == null) {
275         continue content;
276       }
277       while (audioSegmentIterator.hasNext()) {
278         AudioSegment segment = audioSegmentIterator.next();
279         Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
280         if (annotationIterator == null) {
281           continue content;
282         }
283         while (annotationIterator.hasNext()) {
284           TextAnnotation annotation = annotationIterator.next();
285           String language = annotation.getLanguage();
286           if (language != null) {
287             languages.add(language);
288           }
289         }
290       }
291     }
292 
293     return languages.toArray(new String[languages.size()]);
294   }
295 
296   /**
297    * @see org.opencastproject.caption.api.CaptionConverter#getExtension()
298    */
299   @Override
300   public String getExtension() {
301     return EXTENSION;
302   }
303 
304   @Override
305   public Type getElementType() {
306     return MediaPackageElement.Type.Catalog;
307   }
308 }