1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.opencastproject.textanalyzer.impl;
23
24 import org.opencastproject.dictionary.api.DictionaryService;
25 import org.opencastproject.job.api.AbstractJobProducer;
26 import org.opencastproject.job.api.Job;
27 import org.opencastproject.mediapackage.Attachment;
28 import org.opencastproject.mediapackage.Catalog;
29 import org.opencastproject.mediapackage.MediaPackageElementBuilderFactory;
30 import org.opencastproject.mediapackage.MediaPackageElementParser;
31 import org.opencastproject.mediapackage.MediaPackageElements;
32 import org.opencastproject.mediapackage.MediaPackageException;
33 import org.opencastproject.metadata.mpeg7.MediaTime;
34 import org.opencastproject.metadata.mpeg7.MediaTimeImpl;
35 import org.opencastproject.metadata.mpeg7.Mpeg7CatalogImpl;
36 import org.opencastproject.metadata.mpeg7.Mpeg7CatalogService;
37 import org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition;
38 import org.opencastproject.metadata.mpeg7.TemporalDecomposition;
39 import org.opencastproject.metadata.mpeg7.Textual;
40 import org.opencastproject.metadata.mpeg7.Video;
41 import org.opencastproject.metadata.mpeg7.VideoSegment;
42 import org.opencastproject.metadata.mpeg7.VideoText;
43 import org.opencastproject.metadata.mpeg7.VideoTextImpl;
44 import org.opencastproject.security.api.OrganizationDirectoryService;
45 import org.opencastproject.security.api.SecurityService;
46 import org.opencastproject.security.api.UserDirectoryService;
47 import org.opencastproject.serviceregistry.api.ServiceRegistry;
48 import org.opencastproject.serviceregistry.api.ServiceRegistryException;
49 import org.opencastproject.textanalyzer.api.TextAnalyzerException;
50 import org.opencastproject.textanalyzer.api.TextAnalyzerService;
51 import org.opencastproject.textextractor.api.TextExtractor;
52 import org.opencastproject.textextractor.api.TextExtractorException;
53 import org.opencastproject.util.LoadUtil;
54 import org.opencastproject.util.NotFoundException;
55 import org.opencastproject.util.ReadinessIndicator;
56 import org.opencastproject.workspace.api.Workspace;
57
58 import org.osgi.service.cm.ConfigurationException;
59 import org.osgi.service.cm.ManagedService;
60 import org.osgi.service.component.ComponentContext;
61 import org.osgi.service.component.annotations.Activate;
62 import org.osgi.service.component.annotations.Component;
63 import org.osgi.service.component.annotations.Reference;
64 import org.slf4j.Logger;
65 import org.slf4j.LoggerFactory;
66
67 import java.io.File;
68 import java.io.IOException;
69 import java.io.InputStream;
70 import java.net.URI;
71 import java.util.ArrayList;
72 import java.util.Arrays;
73 import java.util.Dictionary;
74 import java.util.List;
75
76
77
78
79 @Component(
80 immediate = true,
81 service = { TextAnalyzerService.class,ManagedService.class },
82 property = {
83 "service.description=Text Analysis Service",
84 "service.pid=org.opencastproject.textanalyzer.impl.TextAnalyzerServiceImpl"
85 }
86 )
87 public class TextAnalyzerServiceImpl extends AbstractJobProducer implements TextAnalyzerService, ManagedService {
88
89
90 private static final Logger logger = LoggerFactory.getLogger(TextAnalyzerServiceImpl.class);
91
92
93 private enum Operation {
94 Extract
95 };
96
97
98 public static final String COLLECTION_ID = "ocrtext";
99
100
101 public static final float DEFAULT_ANALYSIS_JOB_LOAD = 0.2f;
102
103
104 public static final String ANALYSIS_JOB_LOAD_KEY = "job.load.analysis";
105
106
107 private float analysisJobLoad = DEFAULT_ANALYSIS_JOB_LOAD;
108
109
110 private TextExtractor textExtractor = null;
111
112
113 private ServiceRegistry serviceRegistry = null;
114
115
116 private Workspace workspace = null;
117
118
119 protected Mpeg7CatalogService mpeg7CatalogService;
120
121
122 protected DictionaryService dictionaryService;
123
124
125 protected SecurityService securityService = null;
126
127
128 protected UserDirectoryService userDirectoryService = null;
129
130
131 protected OrganizationDirectoryService organizationDirectoryService = null;
132
133
134
135
136 public TextAnalyzerServiceImpl() {
137 super(JOB_TYPE);
138 }
139
140
141
142
143
144
145
146 @Override
147 @Activate
148 public void activate(ComponentContext cc) {
149 logger.info("Activating Text analyser service");
150 super.activate(cc);
151 }
152
153
154
155
156
157
158 @Override
159 public Job extract(Attachment image) throws TextAnalyzerException, MediaPackageException {
160 try {
161 return serviceRegistry.createJob(JOB_TYPE, Operation.Extract.toString(),
162 Arrays.asList(MediaPackageElementParser.getAsXml(image)), analysisJobLoad);
163 } catch (ServiceRegistryException e) {
164 throw new TextAnalyzerException("Unable to create job", e);
165 }
166 }
167
168
169
170
171
172
173
174
175
176
177
178
179 private Catalog extract(Job job, Attachment image) throws TextAnalyzerException, MediaPackageException {
180
181 final Attachment attachment = image;
182 final URI imageUrl = attachment.getURI();
183
184 File imageFile = null;
185 try {
186 Mpeg7CatalogImpl mpeg7 = Mpeg7CatalogImpl.newInstance();
187
188 logger.info("Starting text extraction from {}", imageUrl);
189 try {
190 imageFile = workspace.get(imageUrl);
191 } catch (NotFoundException e) {
192 throw new TextAnalyzerException("Image " + imageUrl + " not found in workspace", e);
193 } catch (IOException e) {
194 throw new TextAnalyzerException("Unable to access " + imageUrl + " in workspace", e);
195 }
196 VideoText[] videoTexts = analyze(imageFile, image.getIdentifier());
197
198
199 MediaTime mediaTime = new MediaTimeImpl(0, 0);
200 Video avContent = mpeg7.addVideoContent(image.getIdentifier(), mediaTime, null);
201 TemporalDecomposition<VideoSegment> temporalDecomposition = (TemporalDecomposition<VideoSegment>) avContent
202 .getTemporalDecomposition();
203
204
205 VideoSegment videoSegment = temporalDecomposition.createSegment("segment-0");
206 videoSegment.setMediaTime(mediaTime);
207
208
209 SpatioTemporalDecomposition spatioTemporalDecomposition = videoSegment.createSpatioTemporalDecomposition(true,
210 false);
211 for (VideoText videoText : videoTexts) {
212 spatioTemporalDecomposition.addVideoText(videoText);
213 }
214
215 logger.info("Text extraction of {} finished, {} lines found", attachment.getURI(), videoTexts.length);
216
217 URI uri;
218 InputStream in;
219 try {
220 in = mpeg7CatalogService.serialize(mpeg7);
221 } catch (IOException e) {
222 throw new TextAnalyzerException("Error serializing mpeg7", e);
223 }
224 try {
225 uri = workspace.putInCollection(COLLECTION_ID, job.getId() + ".xml", in);
226 } catch (IOException e) {
227 throw new TextAnalyzerException("Unable to put mpeg7 into the workspace", e);
228 }
229 Catalog catalog = (Catalog) MediaPackageElementBuilderFactory.newInstance().newElementBuilder()
230 .newElement(Catalog.TYPE, MediaPackageElements.TEXTS);
231 catalog.setURI(uri);
232
233 logger.debug("Created MPEG7 catalog for {}", imageUrl);
234
235 return catalog;
236 } catch (Exception e) {
237 logger.warn("Error extracting text from " + imageUrl, e);
238 if (e instanceof TextAnalyzerException) {
239 throw (TextAnalyzerException) e;
240 } else {
241 throw new TextAnalyzerException(e);
242 }
243 } finally {
244 try {
245 workspace.delete(imageUrl);
246 } catch (Exception e) {
247 logger.warn("Unable to delete temporary text analysis image {}", imageUrl, e);
248 }
249 }
250 }
251
252
253
254
255
256
257 @Override
258 protected String process(Job job) throws Exception {
259 Operation op = null;
260 String operation = job.getOperation();
261 List<String> arguments = job.getArguments();
262 try {
263 op = Operation.valueOf(operation);
264 switch (op) {
265 case Extract:
266 Attachment element = (Attachment) MediaPackageElementParser.getFromXml(arguments.get(0));
267 Catalog catalog = extract(job, element);
268 return MediaPackageElementParser.getAsXml(catalog);
269 default:
270 throw new IllegalStateException("Don't know how to handle operation '" + operation + "'");
271 }
272 } catch (IllegalArgumentException e) {
273 throw new ServiceRegistryException("This service can't handle operations of type '" + op + "'", e);
274 } catch (IndexOutOfBoundsException e) {
275 throw new ServiceRegistryException("This argument list for operation '" + op + "' does not meet expectations", e);
276 } catch (Exception e) {
277 throw new ServiceRegistryException("Error handling operation '" + op + "'", e);
278 }
279 }
280
281
282
283
284
285
286
287
288
289
290
291
292 protected VideoText[] analyze(File imageFile, String id) throws TextAnalyzerException {
293
294
295
296 List<VideoText> videoTexts = new ArrayList<VideoText>();
297 List<String> extractedText;
298 try {
299 extractedText = textExtractor.extract(imageFile);
300 } catch (IOException | TextExtractorException e) {
301 logger.warn("Error extracting text from {}", imageFile, e);
302 throw new TextAnalyzerException(e);
303 }
304
305
306 int i = 1;
307 for (String line : extractedText) {
308 VideoText videoText = new VideoTextImpl(id + "-" + i++);
309 Textual text = dictionaryService.cleanUpText(line);
310 if (text != null) {
311 videoText.setText(text);
312 videoTexts.add(videoText);
313 }
314 }
315
316
317 return videoTexts.toArray(new VideoText[0]);
318 }
319
320
321
322
323
324
325
326 @Reference
327 protected void setServiceRegistry(ServiceRegistry serviceRegistry) {
328 this.serviceRegistry = serviceRegistry;
329 }
330
331
332
333
334
335
336 @Override
337 protected ServiceRegistry getServiceRegistry() {
338 return serviceRegistry;
339 }
340
341
342
343
344
345
346
347 @Reference
348 protected void setTextExtractor(TextExtractor textExtractor) {
349 this.textExtractor = textExtractor;
350 }
351
352
353
354
355
356
357
358 @Reference
359 protected void setWorkspace(Workspace workspace) {
360 this.workspace = workspace;
361 }
362
363
364
365
366
367
368
369 @Reference(name = "mpeg7service")
370 protected void setMpeg7CatalogService(Mpeg7CatalogService mpeg7CatalogService) {
371 this.mpeg7CatalogService = mpeg7CatalogService;
372 }
373
374
375
376
377
378
379
380 @Reference
381 protected void setDictionaryService(DictionaryService dictionaryService) {
382 this.dictionaryService = dictionaryService;
383 }
384
385
386
387
388
389
390
391 @Reference
392 public void setSecurityService(SecurityService securityService) {
393 this.securityService = securityService;
394 }
395
396
397
398
399
400
401
402 @Reference
403 public void setUserDirectoryService(UserDirectoryService userDirectoryService) {
404 this.userDirectoryService = userDirectoryService;
405 }
406
407
408
409
410
411
412
413 @Reference
414 public void setOrganizationDirectoryService(OrganizationDirectoryService organizationDirectory) {
415 this.organizationDirectoryService = organizationDirectory;
416 }
417
418
419
420
421
422
423 @Override
424 protected SecurityService getSecurityService() {
425 return securityService;
426 }
427
428
429
430
431
432
433 @Override
434 protected UserDirectoryService getUserDirectoryService() {
435 return userDirectoryService;
436 }
437
438
439
440
441
442
443 @Override
444 protected OrganizationDirectoryService getOrganizationDirectoryService() {
445 return organizationDirectoryService;
446 }
447
448 @Override
449 public void updated(@SuppressWarnings("rawtypes") Dictionary properties) throws ConfigurationException {
450 analysisJobLoad = LoadUtil.getConfiguredLoadValue(properties, ANALYSIS_JOB_LOAD_KEY, DEFAULT_ANALYSIS_JOB_LOAD,
451 serviceRegistry);
452 }
453
454 @Reference(target = "(artifact=dictionary)")
455 public void setReadinessIndicator(ReadinessIndicator readinessIndicator) {
456
457 }
458 }