View Javadoc
1   /*
2    * Licensed to The Apereo Foundation under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional
4    * information regarding copyright ownership.
5    *
6    *
7    * The Apereo Foundation licenses this file to you under the Educational
8    * Community License, Version 2.0 (the "License"); you may not use this file
9    * except in compliance with the License. You may obtain a copy of the License
10   * at:
11   *
12   *   http://opensource.org/licenses/ecl2.txt
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
17   * License for the specific language governing permissions and limitations under
18   * the License.
19   *
20   */
21  
22  package org.opencastproject.caption.converters;
23  
24  import org.opencastproject.caption.api.Caption;
25  import org.opencastproject.caption.api.CaptionConverter;
26  import org.opencastproject.caption.api.CaptionConverterException;
27  import org.opencastproject.caption.api.IllegalTimeFormatException;
28  import org.opencastproject.caption.api.Time;
29  import org.opencastproject.caption.impl.CaptionImpl;
30  import org.opencastproject.caption.impl.TimeImpl;
31  import org.opencastproject.caption.util.TimeUtil;
32  import org.opencastproject.mediapackage.MediaPackageElement;
33  import org.opencastproject.mediapackage.MediaPackageElement.Type;
34  
35  import org.osgi.service.component.annotations.Component;
36  import org.slf4j.Logger;
37  import org.slf4j.LoggerFactory;
38  
39  import java.io.BufferedWriter;
40  import java.io.IOException;
41  import java.io.InputStream;
42  import java.io.OutputStream;
43  import java.io.OutputStreamWriter;
44  import java.util.ArrayList;
45  import java.util.List;
46  import java.util.Scanner;
47  
48  /**
49   * Converter engine for SubRip srt caption format. It does not support advanced SubRip format (SubRip format with
50   * annotations). Advanced format will be parsed but all annotations will be stripped off.
51   *
52   */
53  @Component(
54      immediate = true,
55      service = { CaptionConverter.class },
56      property = {
57          "service.description=SubRip caption converter",
58          "caption.format=subrip"
59      }
60  )
61  public class SubRipCaptionConverter implements CaptionConverter {
62  
63    /** Logging utility */
64    private static final Logger logger = LoggerFactory.getLogger(SubRipCaptionConverter.class);
65  
66    private static final String EXTENSION = "srt";
67  
68    /** line ending used in srt - windows native in specification */
69    private static final String LINE_ENDING = "\r\n";
70  
71    /**
72     * {@inheritDoc} Since srt does not store information about language, language parameter is ignored.
73     *
74     * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
75     */
76    @Override
77    public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
78  
79      List<Caption> collection = new ArrayList<Caption>();
80  
81      // initialize scanner object
82      Scanner scanner = new Scanner(in, "UTF-8");
83      scanner.useDelimiter("[\n(\r\n)]{2}");
84  
85      // create initial time
86      Time time = null;
87      try {
88        time = new TimeImpl(0, 0, 0, 0);
89      } catch (IllegalTimeFormatException e1) {
90      }
91  
92      while (scanner.hasNext()) {
93        String captionString = scanner.next();
94        // convert line endings to \n
95        captionString = captionString.replace("\r\n", "\n");
96  
97        // split to number, time and caption
98        String[] captionParts = captionString.split("\n", 3);
99        // check for table length
100       if (captionParts.length != 3) {
101         throw new CaptionConverterException("Invalid caption for SubRip format: " + captionString);
102       }
103 
104       // get time part
105       String[] timePart = captionParts[1].split("-->");
106 
107       // parse time
108       Time inTime;
109       Time outTime;
110       try {
111         inTime = TimeUtil.importSrt(timePart[0].trim());
112         outTime = TimeUtil.importSrt(timePart[1].trim());
113       } catch (IllegalTimeFormatException e) {
114         throw new CaptionConverterException(e.getMessage());
115       }
116 
117       // check for time validity
118       if (inTime.compareTo(time) < 0 || outTime.compareTo(inTime) <= 0) {
119         logger.warn("Caption with invalid time encountered. Skipping...");
120         continue;
121       }
122       time = outTime;
123 
124       // get text captions
125       String[] captionLines = createCaptionLines(captionParts[2]);
126       if (captionLines == null) {
127         throw new CaptionConverterException("Caption does not contain any caption text: " + captionString);
128       }
129 
130       // create caption object and add to caption collection
131       Caption caption = new CaptionImpl(inTime, outTime, captionLines);
132       collection.add(caption);
133     }
134 
135     return collection;
136   }
137 
138   /**
139    * {@inheritDoc} Since srt does not store information about language, language parameter is ignored.
140    */
141   @Override
142   public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
143 
144     if (language != null) {
145       logger.debug("SubRip format does not include language information. Ignoring language attribute.");
146     }
147 
148     // initialize stream writer
149     OutputStreamWriter osw = new OutputStreamWriter(outputStream, "UTF-8");
150     BufferedWriter bw = new BufferedWriter(osw);
151 
152     // initialize counter
153     int counter = 1;
154     for (Caption caption : captions) {
155       String captionString = String.format("%2$d%1$s%3$s --> %4$s%1$s%5$s%1$s%1$s", LINE_ENDING, counter,
156               TimeUtil.exportToSrt(caption.getStartTime()), TimeUtil.exportToSrt(caption.getStopTime()),
157               createCaptionText(caption.getCaption()));
158       bw.append(captionString);
159       counter++;
160     }
161 
162     bw.flush();
163     bw.close();
164     osw.close();
165   }
166 
167   /**
168    * Helper function that creates caption text.
169    *
170    * @param captionLines
171    *          array containing caption lines
172    * @return string representation of caption text
173    */
174   private String createCaptionText(String[] captionLines) {
175     StringBuilder builder = new StringBuilder(captionLines[0]);
176     for (int i = 1; i < captionLines.length; i++) {
177       builder.append(LINE_ENDING);
178       builder.append(captionLines[i]);
179     }
180     return builder.toString();
181   }
182 
183   /**
184    * Helper function that splits text into lines and remove any style annotation
185    *
186    * @param captionText
187    * @return array of caption's text lines
188    */
189   private String[] createCaptionLines(String captionText) {
190     String[] captionLines = captionText.split("\n");
191     if (captionLines.length == 0) {
192       return null;
193     }
194     for (int i = 0; i < captionLines.length; i++) {
195       captionLines[i] = captionLines[i].replaceAll("(<\\s*.\\s*>)|(</\\s*.\\s*>)", "").trim();
196     }
197     return captionLines;
198   }
199 
200   /**
201    * {@inheritDoc} Returns empty list since srt format does not store any information about language.
202    *
203    * @see org.opencastproject.caption.api.CaptionConverter#getLanguageList(java.io.InputStream)
204    */
205   @Override
206   public String[] getLanguageList(InputStream input) throws CaptionConverterException {
207     return new String[0];
208   }
209 
210   /**
211    * {@inheritDoc}
212    *
213    * @see org.opencastproject.caption.api.CaptionConverter#getExtension()
214    */
215   @Override
216   public String getExtension() {
217     return EXTENSION;
218   }
219 
220   @Override
221   public Type getElementType() {
222     return MediaPackageElement.Type.Attachment;
223   }
224 }