1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.opencastproject.caption.converters;
23
24 import org.opencastproject.caption.api.Caption;
25 import org.opencastproject.caption.api.CaptionConverter;
26 import org.opencastproject.caption.api.CaptionConverterException;
27 import org.opencastproject.caption.api.IllegalTimeFormatException;
28 import org.opencastproject.caption.api.Time;
29 import org.opencastproject.caption.impl.CaptionImpl;
30 import org.opencastproject.caption.impl.TimeImpl;
31 import org.opencastproject.caption.util.TimeUtil;
32 import org.opencastproject.mediapackage.MediaPackageElement;
33 import org.opencastproject.mediapackage.MediaPackageElement.Type;
34 import org.opencastproject.util.XmlSafeParser;
35
36 import org.apache.commons.io.IOUtils;
37 import org.osgi.service.component.annotations.Component;
38 import org.slf4j.Logger;
39 import org.slf4j.LoggerFactory;
40 import org.w3c.dom.Document;
41 import org.w3c.dom.Element;
42 import org.w3c.dom.Node;
43 import org.w3c.dom.NodeList;
44 import org.xml.sax.Attributes;
45 import org.xml.sax.SAXException;
46 import org.xml.sax.helpers.DefaultHandler;
47
48 import java.io.IOException;
49 import java.io.InputStream;
50 import java.io.OutputStream;
51 import java.io.OutputStreamWriter;
52 import java.util.ArrayList;
53 import java.util.LinkedList;
54 import java.util.List;
55
56 import javax.xml.parsers.DocumentBuilder;
57 import javax.xml.parsers.ParserConfigurationException;
58 import javax.xml.parsers.SAXParser;
59 import javax.xml.parsers.SAXParserFactory;
60 import javax.xml.transform.Transformer;
61 import javax.xml.transform.TransformerConfigurationException;
62 import javax.xml.transform.TransformerException;
63 import javax.xml.transform.TransformerFactory;
64 import javax.xml.transform.dom.DOMSource;
65 import javax.xml.transform.stream.StreamResult;
66
67
68
69
70
71 @Component(
72 immediate = true,
73 service = { CaptionConverter.class },
74 property = {
75 "service.description=DFXP caption converter",
76 "caption.format=dfxp"
77 }
78 )
79 public class DFXPCaptionConverter implements CaptionConverter {
80
81
82 private static final Logger logger = LoggerFactory.getLogger(DFXPCaptionConverter.class);
83
84 private static final String EXTENSION = "dfxp.xml";
85
86
87
88
89
90
91
92
93 @Override
94 public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
95
96
97 List<Caption> collection = new ArrayList<Caption>();
98
99 Document doc;
100 try {
101 DocumentBuilder builder = XmlSafeParser.newDocumentBuilderFactory().newDocumentBuilder();
102 doc = builder.parse(in);
103 doc.getDocumentElement().normalize();
104 } catch (ParserConfigurationException e) {
105 throw new CaptionConverterException("Could not parse captions", e);
106 } catch (SAXException e) {
107 throw new CaptionConverterException("Could not parse captions", e);
108 } catch (IOException e) {
109 throw new CaptionConverterException("Could not parse captions", e);
110 }
111
112
113 NodeList divElements = doc.getElementsByTagName("div");
114
115 Element targetDiv = null;
116 if (language != null) {
117
118 for (int i = 0; i < divElements.getLength(); i++) {
119 Element n = (Element) divElements.item(i);
120 if (n.getAttribute("xml:lang").equals(language)) {
121 targetDiv = n;
122 break;
123 }
124 }
125 } else {
126 if (divElements.getLength() > 1) {
127
128 logger.warn("More than one <div> element available. Parsing first one...");
129 }
130 if (divElements.getLength() != 0) {
131 targetDiv = (Element) divElements.item(0);
132 }
133 }
134
135
136 if (targetDiv == null) {
137 logger.warn("No suitable <div> element found for language {}", language);
138 } else {
139 NodeList pElements = targetDiv.getElementsByTagName("p");
140
141
142 Time time = null;
143 try {
144 time = new TimeImpl(0, 0, 0, 0);
145 } catch (IllegalTimeFormatException e1) {
146 }
147
148 for (int i = 0; i < pElements.getLength(); i++) {
149 try {
150 Caption caption = parsePElement((Element) pElements.item(i));
151
152 if (caption.getStartTime().compareTo(time) < 0
153 || caption.getStopTime().compareTo(caption.getStartTime()) <= 0) {
154 logger.warn("Caption with invalid time encountered. Skipping...");
155 continue;
156 }
157 collection.add(caption);
158 } catch (IllegalTimeFormatException e) {
159 logger.warn("Caption with invalid time format encountered. Skipping...");
160 }
161 }
162 }
163
164
165 return collection;
166 }
167
168
169
170
171
172
173
174
175
176
177 private Caption parsePElement(Element p) throws IllegalTimeFormatException {
178 Time begin = TimeUtil.importDFXP(p.getAttribute("begin").trim());
179 Time end = TimeUtil.importDFXP(p.getAttribute("end").trim());
180
181
182
183 String[] textArray = getTextCore(p).split("\n");
184
185 return new CaptionImpl(begin, end, textArray);
186 }
187
188
189
190
191
192
193
194
195 private String getTextCore(Node p) {
196 StringBuffer captionText = new StringBuffer();
197
198 NodeList list = p.getChildNodes();
199 for (int i = 0; i < list.getLength(); i++) {
200 if (list.item(i).getNodeType() == Node.TEXT_NODE) {
201 captionText.append(list.item(i).getTextContent());
202 } else if ("br".equals(list.item(i).getNodeName())) {
203 captionText.append("\n");
204 } else {
205 captionText.append(getTextCore(list.item(i)));
206 }
207 }
208 return captionText.toString().trim();
209 }
210
211
212
213
214 @Override
215 public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
216
217 Document doc = null;
218 InputStream is = null;
219 try {
220 DocumentBuilder builder = XmlSafeParser.newDocumentBuilderFactory().newDocumentBuilder();
221
222 is = DFXPCaptionConverter.class.getResourceAsStream("/templates/template.dfxp.xml");
223 doc = builder.parse(is);
224 } catch (ParserConfigurationException e) {
225
226 throw new RuntimeException(e);
227 } catch (SAXException e) {
228
229 throw new RuntimeException(e);
230 } catch (IOException e) {
231
232 throw new RuntimeException(e);
233 } finally {
234 IOUtils.closeQuietly(is);
235 }
236
237
238 Node bodyNode = doc.getElementsByTagName("body").item(0);
239
240
241 Element divNode = doc.createElement("div");
242 divNode.setAttribute("xml:lang", language != null ? language : "und");
243 bodyNode.appendChild(divNode);
244
245
246 for (Caption caption : captions) {
247 Element newNode = doc.createElement("p");
248 newNode.setAttribute("begin", TimeUtil.exportToDFXP(caption.getStartTime()));
249 newNode.setAttribute("end", TimeUtil.exportToDFXP(caption.getStopTime()));
250 String[] captionText = caption.getCaption();
251
252 newNode.appendChild(doc.createTextNode(captionText[0]));
253 for (int i = 1; i < captionText.length; i++) {
254 newNode.appendChild(doc.createElement("br"));
255 newNode.appendChild(doc.createTextNode(captionText[i]));
256 }
257 divNode.appendChild(newNode);
258 }
259
260
261 OutputStreamWriter osw = new OutputStreamWriter(outputStream, "UTF-8");
262 StreamResult result = new StreamResult(osw);
263 DOMSource source = new DOMSource(doc);
264 TransformerFactory tfactory = XmlSafeParser.newTransformerFactory();
265 Transformer transformer;
266 try {
267 transformer = tfactory.newTransformer();
268 transformer.transform(source, result);
269 osw.flush();
270 } catch (TransformerConfigurationException e) {
271
272 throw new RuntimeException(e);
273 } catch (TransformerException e) {
274
275 throw new RuntimeException(e);
276 } finally {
277 IOUtils.closeQuietly(osw);
278 }
279 }
280
281
282
283
284
285
286 @Override
287 public String[] getLanguageList(InputStream input) throws CaptionConverterException {
288
289
290 final List<String> langList = new LinkedList<String>();
291
292
293 SAXParserFactory factory = XmlSafeParser.newSAXParserFactory();
294 try {
295 SAXParser parser = factory.newSAXParser();
296
297 DefaultHandler handler = new DefaultHandler() {
298 @Override
299 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
300 if ("div".equals(qName)) {
301
302 String lang = attributes.getValue("xml:lang");
303 if (lang == null) {
304
305 logger.warn("Missing xml:lang attribute for div element.");
306 } else if (langList.contains(lang)) {
307 logger.warn("Multiple div elements with same language.");
308 } else {
309 langList.add(lang);
310 }
311 }
312 }
313 };
314
315
316 parser.parse(input, handler);
317 } catch (ParserConfigurationException e) {
318
319 throw new RuntimeException(e);
320 } catch (SAXException e) {
321 throw new CaptionConverterException("Could not parse captions", e);
322 } catch (IOException e) {
323 throw new RuntimeException(e);
324 }
325
326 return langList.toArray(new String[0]);
327 }
328
329
330
331
332
333
334 @Override
335 public String getExtension() {
336 return EXTENSION;
337 }
338
339 @Override
340 public Type getElementType() {
341 return MediaPackageElement.Type.Attachment;
342 }
343
344 }