View Javadoc
1   /*
2    * Licensed to The Apereo Foundation under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional
4    * information regarding copyright ownership.
5    *
6    *
7    * The Apereo Foundation licenses this file to you under the Educational
8    * Community License, Version 2.0 (the "License"); you may not use this file
9    * except in compliance with the License. You may obtain a copy of the License
10   * at:
11   *
12   *   http://opensource.org/licenses/ecl2.txt
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
17   * License for the specific language governing permissions and limitations under
18   * the License.
19   *
20   */
21  
22  package org.opencastproject.tobira.impl;
23  
24  import static org.opencastproject.metadata.dublincore.DublinCore.PROPERTY_CREATED;
25  import static org.opencastproject.metadata.dublincore.DublinCore.PROPERTY_DESCRIPTION;
26  import static org.opencastproject.metadata.dublincore.DublinCore.PROPERTY_TITLE;
27  
28  import org.opencastproject.mediapackage.MediaPackage;
29  import org.opencastproject.mediapackage.MediaPackageElement;
30  import org.opencastproject.mediapackage.Track;
31  import org.opencastproject.mediapackage.TrackSupport;
32  import org.opencastproject.mediapackage.VideoStream;
33  import org.opencastproject.metadata.dublincore.DublinCore;
34  import org.opencastproject.metadata.dublincore.DublinCoreCatalog;
35  import org.opencastproject.metadata.dublincore.DublinCoreUtil;
36  import org.opencastproject.metadata.dublincore.EncodingSchemeUtils;
37  import org.opencastproject.metadata.mpeg7.MediaTimePointImpl;
38  import org.opencastproject.playlists.Playlist;
39  import org.opencastproject.search.api.SearchResult;
40  import org.opencastproject.security.api.AccessControlEntry;
41  import org.opencastproject.security.api.AccessControlList;
42  import org.opencastproject.security.api.AccessControlParser;
43  import org.opencastproject.security.api.AclScope;
44  import org.opencastproject.security.api.AuthorizationService;
45  import org.opencastproject.series.api.Series;
46  import org.opencastproject.util.Jsons;
47  import org.opencastproject.util.MimeType;
48  import org.opencastproject.workspace.api.Workspace;
49  
50  import org.slf4j.Logger;
51  import org.slf4j.LoggerFactory;
52  
53  import java.util.ArrayList;
54  import java.util.Arrays;
55  import java.util.Date;
56  import java.util.HashMap;
57  import java.util.LinkedHashSet;
58  import java.util.List;
59  import java.util.Objects;
60  import java.util.Optional;
61  import java.util.Set;
62  import java.util.function.Function;
63  import java.util.stream.Collectors;
64  
65  
66  /**
67   * A item of the harvesting API, basically as a JSON object. Can be "event", "series",
68   * "event-deleted" or "series-deleted". Also contains the modified date, used for sorting.
69   */
70  class Item {
71    private static final Logger logger = LoggerFactory.getLogger(Item.class);
72  
73    private Date modifiedDate;
74    private Jsons.Val obj;
75  
76    /** Converts a event into the corresponding JSON representation */
77    Item(SearchResult event, AuthorizationService authorizationService, Workspace workspace) {
78      this.modifiedDate = event.getModifiedDate();
79  
80      if (event.getDeletionDate() != null) {
81        this.obj = Jsons.obj(
82            Jsons.p("kind", "event-deleted"),
83            Jsons.p("id", event.getId()),
84            Jsons.p("updated", event.getModifiedDate().getTime())
85        );
86      } else {
87        final var mp = event.getMediaPackage();
88        final var dccs = getDccsFromMp(mp, workspace);
89  
90        // Figure out whether this is a live event
91        final var isLive = Arrays.stream(mp.getTracks()).anyMatch(track -> track.isLive());
92  
93        // Obtain creators. We first try to obtain it from the DCCs. We collect
94        // into `LinkedHashSet` to deduplicate entries.
95        final var creators = dccs.stream()
96                .flatMap(dcc -> dcc.get(DublinCore.PROPERTY_CREATOR).stream())
97                .filter(Objects::nonNull)
98                .map(creator -> Jsons.v(creator.getValue()))
99                .collect(Collectors.toCollection(LinkedHashSet::new));
100 
101       // Get start and end time
102       final var period = dccs.stream()
103               .map(dcc -> dcc.getFirst(DublinCore.PROPERTY_TEMPORAL))
104               .filter(Objects::nonNull)
105               .findFirst()
106               .flatMap(str -> {
107                 try {
108                   return Optional.of(EncodingSchemeUtils.decodeMandatoryPeriod(str));
109                 } catch (Exception e) {
110                   return Optional.empty();
111                 }
112               });
113 
114       // Get title. We require a title and will consult all three sources for it, in decreasing
115       // order of trust in that source.
116       var title = dccs.stream()
117               .map(dcc -> dcc.getFirst(DublinCore.PROPERTY_TITLE))
118               .filter(Objects::nonNull)
119               .findFirst()
120               .orElse(mp.getTitle());
121       if (title == null) {
122         // If there is no title to be found, we throw an exception to skip this event.
123         throw new RuntimeException("Event has no title");
124       }
125 
126       final var captions = findCaptions(mp);
127 
128       // Get the generated slide text.
129       final var slideText = Arrays.stream(mp.getElements())
130           .filter(mpe -> mpe.getFlavor().eq("mpeg-7/text"))
131           .map(element -> element.getURI())
132           .findFirst();
133 
134       // Obtain duration from tracks, as that's usually more accurate (stores information from
135       // inspect operations). Fall back to `getDcExtent`.
136       final var duration = Arrays.stream(mp.getTracks())
137           .filter(track -> track.hasVideo() || track.hasAudio())
138           .map(Track::getDuration)
139           .filter(d -> d != null && d > 0)
140           .mapToLong(Long::longValue)
141           // Not entirely clear how to combine different track durations. Taking the max is not
142           // worse than any other thing that I can think of. And usually all durations are basically
143           // the same.
144           .max()
145           //NB: This is an else case, so we ignore the item(s) in the stream
146           .orElseGet(() -> {
147             String dcExtent = event.getDublinCore().getFirst(DublinCore.PROPERTY_EXTENT);
148             return Math.max(0L, EncodingSchemeUtils.decodeMandatoryDuration(dcExtent));
149           });
150 
151       this.obj = Jsons.obj(
152           Jsons.p("kind", "event"),
153           Jsons.p("id", event.getId()),
154           Jsons.p("title", title),
155           Jsons.p("partOf", event.getDublinCore().getFirst(DublinCore.PROPERTY_IS_PART_OF)),
156           Jsons.p("description", event.getDublinCore().getFirst(PROPERTY_DESCRIPTION)),
157           Jsons.p("created", event.getCreatedDate().toEpochMilli()),
158           Jsons.p("startTime", period.map(p -> p.getStart().getTime()).orElse(null)),
159           Jsons.p("endTime", period.map(p -> p.getEnd().getTime()).orElse(null)),
160           Jsons.p("creators", Jsons.arr(new ArrayList<>(creators))),
161           Jsons.p("duration", duration),
162           Jsons.p("thumbnail", findThumbnail(mp)),
163           Jsons.p("timelinePreview", findTimelinePreview(mp)),
164           Jsons.p("tracks", Jsons.arr(assembleTracks(event, mp))),
165           Jsons.p("acl", assembleAcl(authorizationService.getAcl(mp, AclScope.Merged).getA().getEntries())),
166           Jsons.p("isLive", isLive),
167           Jsons.p("metadata", dccToMetadata(dccs, Set.of(new String[] {
168               "created", "creator", "title", "extent", "isPartOf", "description", "identifier",
169           }))),
170           Jsons.p("captions", Jsons.arr(captions)),
171           Jsons.p("slideText", slideText.map(t -> t.toString()).orElse(null)),
172           Jsons.p("segments", Jsons.arr(findSegments(mp))),
173           Jsons.p("updated", event.getModifiedDate().getTime())
174       );
175     }
176   }
177 
178   private static List<DublinCoreCatalog> getDccsFromMp(MediaPackage mp, Workspace workspace) {
179     return Arrays.stream(mp.getElements())
180             .filter(mpe -> {
181               final var flavor = mpe.getFlavor();
182               if (flavor == null) {
183                 return false;
184               }
185               final var isForEpisode = Objects.equals(flavor.getSubtype(), "episode");
186               final var isCatalog = Objects.equals(mpe.getElementType(), MediaPackageElement.Type.Catalog);
187               final var isXml = Objects.equals(mpe.getMimeType(), MimeType.mimeType("text", "xml"));
188               return isCatalog && isForEpisode && isXml;
189             })
190             .map(mpe -> DublinCoreUtil.loadDublinCore(workspace, mpe))
191             .collect(Collectors.toCollection(ArrayList::new));
192   }
193 
194   /**
195    * Assembles the object containing all additional metadata.
196    *
197    * The second argument is a list of dcterms metadata fields that is already included elsewhere in
198    * the response. They will be ignored here.
199    */
200   private static Jsons.Obj dccToMetadata(List<DublinCoreCatalog> dccs, Set<String> ignoredDcFields) {
201     final var namespaces = new HashMap<String, ArrayList<Jsons.Prop>>();
202 
203     for (final var dcc : (Iterable<DublinCoreCatalog>) dccs::iterator) {
204       for (final var e : dcc.getValues().entrySet()) {
205         final var key = e.getKey();
206 
207         // We special case dcterms here to get a smaller, easier to read JSON. In most cases, this
208         // will be the only namespace.
209         final var ns = key.getNamespaceURI().equals("http://purl.org/dc/terms/")
210             ? "dcterms"
211             : key.getNamespaceURI();
212 
213         // We skip fields that we already include elsewhere.
214         if (ns.equals("dcterms") && ignoredDcFields.contains(key.getLocalName())) {
215           continue;
216         }
217 
218         final var fields = namespaces.computeIfAbsent(ns, k -> new ArrayList<>());
219         final var values = e.getValue().stream()
220             .map(v -> Jsons.v(v.getValue()))
221             .collect(Collectors.toCollection(ArrayList::new));
222         final var field = Jsons.p(e.getKey().getLocalName(), Jsons.arr(values));
223         fields.add(field);
224       }
225     }
226 
227     final var fields = namespaces.entrySet().stream()
228         .map(e -> {
229           final var obj = Jsons.obj(e.getValue().toArray(new Jsons.Prop[0]));
230           return Jsons.p(e.getKey(), obj);
231         })
232         .toArray(Jsons.Prop[]::new);
233 
234     return Jsons.obj(fields);
235   }
236 
237   private static Jsons.Obj assembleAcl(List<AccessControlEntry> acl) {
238     // We just transform the ACL into a map with one field per action, and the
239     // value being a list of roles, e.g.
240     // `{ "read": ["ROLE_USER", "ROLE_FOO"], "write": [...] }`
241     final var actionToRoles = new HashMap<String, ArrayList<Jsons.Val>>();
242     acl.stream().filter(AccessControlEntry::isAllow).forEach(entry -> {
243       final var action = entry.getAction();
244       actionToRoles.putIfAbsent(action, new ArrayList<>());
245       actionToRoles.get(action).add(Jsons.v(entry.getRole()));
246     });
247 
248     final var props = actionToRoles.entrySet().stream()
249         .map(e -> Jsons.p(e.getKey(), Jsons.arr(e.getValue())))
250         .toArray(Jsons.Prop[]::new);
251 
252     return Jsons.obj(props);
253   }
254 
255   private static List<Jsons.Val> assembleTracks(SearchResult event, MediaPackage mp) {
256     return Arrays.stream(mp.getTracks())
257         .filter(track -> track.hasAudio() || track.hasVideo())
258         .map(track -> {
259           var videoStreams = TrackSupport.byType(track.getStreams(), VideoStream.class);
260           var resolution = Jsons.NULL;
261           if (videoStreams.length > 0) {
262             final var stream = videoStreams[0];
263             resolution = Jsons.arr(Jsons.v(stream.getFrameWidth()), Jsons.v(stream.getFrameHeight()));
264 
265             if (videoStreams.length > 1) {
266               logger.warn(
267                   "Track of event {} has more than one video stream; we will ignore all but the first",
268                   event.getId()
269               );
270             }
271           }
272 
273           return Jsons.obj(
274               Jsons.p("uri", track.getURI().toString()),
275               Jsons.p("mimetype", track.getMimeType().toString()),
276               Jsons.p("flavor", track.getFlavor().toString()),
277               Jsons.p("resolution", resolution),
278               Jsons.p("isMaster", track.isMaster())
279           );
280         })
281         .collect(Collectors.toCollection(ArrayList::new));
282   }
283 
284   private static List<Jsons.Val> findCaptions(MediaPackage mp) {
285     return Arrays.stream(mp.getElements())
286         .filter(element -> {
287           final var isVTT = element.getFlavor().toString().startsWith("captions/vtt")
288                 || element.getMimeType().eq("text", "vtt");
289           final var isCorrectType = element.getElementType() == MediaPackageElement.Type.Attachment
290                 || element.getElementType() == MediaPackageElement.Type.Track;
291 
292           return isVTT && isCorrectType;
293         })
294         .map(track -> {
295           final var tags = track.getTags();
296           final Function<String, Optional<String>> findTag = (String prefix) -> Arrays.stream(tags)
297                 .map(tag -> tag.split(":", 2))
298                 .filter(tagArray -> (tagArray.length == 2 && tagArray[0].equals(prefix)))
299                 .map(tagArray -> tagArray[1])
300                 .findFirst();
301 
302           // Try to get a language for this subtitle track. We first check the proper tag.
303           var lang = findTag.apply("lang");
304           if (lang.isEmpty()) {
305             // But for compatibility, we also check in the flavor.
306             final var subflavor = track.getFlavor().getSubtype();
307             if (subflavor.startsWith("vtt+")) {
308               final var suffix = subflavor.substring("vtt+".length());
309               if (suffix.length() > 0) {
310                 lang = Optional.of(suffix);
311               }
312             }
313           }
314 
315           return Jsons.obj(
316             Jsons.p("uri", track.getURI().toString()),
317             Jsons.p("lang", lang.orElse(null)),
318             Jsons.p("generatorType", findTag.apply("generator-type").orElse(null)),
319             Jsons.p("generator", findTag.apply("generator").orElse(null)),
320             Jsons.p("type", findTag.apply("type").orElse(null))
321           );
322         })
323         .collect(Collectors.toCollection(ArrayList::new));
324   }
325 
326   private static String findThumbnail(MediaPackage mp) {
327     // Find a suitable thumbnail.
328     // TODO: This certainly has to be improved in the future.
329     return Arrays.stream(mp.getAttachments())
330         .filter(a -> a.getFlavor().getSubtype().equals("player+preview"))
331         .map(a -> a.getURI().toString())
332         .findFirst()
333         .orElse(null);
334   }
335 
336   private static List<Jsons.Val> findSegments(MediaPackage mp) {
337     return Arrays.stream(mp.getAttachments())
338       .filter(a -> a.getFlavor().getSubtype().equals("segment+preview"))
339       .map(s -> Jsons.obj(
340           Jsons.p("uri", s.getURI().toString()),
341           Jsons.p("startTime", MediaTimePointImpl.parseTimePoint(
342               s.getReference().getProperty("time")
343           ).getTimeInMilliseconds())
344       ))
345       .collect(Collectors.toCollection(ArrayList::new));
346   }
347 
348   private static Jsons.Val findTimelinePreview(MediaPackage mp) {
349     return Arrays.stream(mp.getAttachments())
350         .filter(a -> a.getFlavor().getSubtype().equals("timeline+preview"))
351         .map(a -> {
352           final var props = a.getProperties();
353           final var imageCountX = props.get("imageSizeX");
354           final var imageCountY = props.get("imageSizeY");
355           final var resolutionX = props.get("resolutionX");
356           final var resolutionY = props.get("resolutionY");
357 
358           final var anyNull = imageCountX == null
359               || imageCountY == null
360               || resolutionX == null
361               || resolutionY == null;
362 
363           if (anyNull) {
364             return null;
365           }
366 
367           return (Jsons.Val) Jsons.obj(
368             Jsons.p("url", a.getURI().toString()),
369             Jsons.p("imageCountX", imageCountX),
370             Jsons.p("imageCountY", imageCountY),
371             Jsons.p("resolutionX", resolutionX),
372             Jsons.p("resolutionY", resolutionY)
373           );
374         })
375         .filter(o -> o != null)
376         .findFirst()
377         .orElse(Jsons.NULL);
378   }
379 
380   /** Converts a series into the corresponding JSON representation */
381   Item(Series series) {
382     this.modifiedDate = series.getModifiedDate();
383 
384     var serializedACL = series.getAccessControl();
385     var acl = new AccessControlList();
386     if (serializedACL != null) {
387       try {
388         acl = AccessControlParser.parseAcl(serializedACL);
389       } catch (Exception e) {
390         throw new RuntimeException(e);
391       }
392     }
393 
394     if (series.isDeleted()) {
395       this.obj = Jsons.obj(
396         Jsons.p("kind", "series-deleted"),
397         Jsons.p("id", series.getId()),
398         Jsons.p("updated", series.getModifiedDate().getTime())
399       );
400     } else {
401       // Created date
402       var createdDateString = series.getDublinCore().getFirst(PROPERTY_CREATED);
403       var created = Jsons.NULL;
404       var date = EncodingSchemeUtils.decodeDate(createdDateString);
405       if (date != null) {
406         created = Jsons.v(date.getTime());
407       } else {
408         logger.warn("Series {} has unparsable created-date: {}", series.getId(), createdDateString);
409       }
410 
411       var additionalMetadata = dccToMetadata(Arrays.asList(series.getDublinCore()), Set.of(new String[] {
412           "created", "title", "description", "identifier",
413       }));
414 
415       this.obj = Jsons.obj(
416         Jsons.p("kind", "series"),
417         Jsons.p("id", series.getId()),
418         Jsons.p("title", series.getDublinCore().getFirst(PROPERTY_TITLE)),
419         Jsons.p("description", series.getDublinCore().getFirst(PROPERTY_DESCRIPTION)),
420         Jsons.p("acl", assembleAcl(acl.getEntries())),
421         Jsons.p("metadata", additionalMetadata),
422         Jsons.p("created", created),
423         Jsons.p("updated", series.getModifiedDate().getTime())
424       );
425     }
426   }
427 
428   /** Converts a series into the corresponding JSON representation */
429   Item(Playlist playlist) {
430     this.modifiedDate = playlist.getUpdated();
431 
432     final var acl = assembleAcl(
433         playlist.getAccessControlEntries()
434             .stream()
435             .map(entry -> entry.toAccessControlEntry())
436             .collect(Collectors.toList())
437     );
438 
439     // Assemble entries
440     final List<Jsons.Val> entries = playlist.getEntries().stream().map(entry -> Jsons.obj(
441           Jsons.p("id", entry.getId()),
442           Jsons.p("contentId", entry.getContentId()),
443           Jsons.p("type", entry.getType().getCode())
444     )).collect(Collectors.toCollection(ArrayList::new));
445 
446     if (playlist.isDeleted()) {
447       this.obj = Jsons.obj(
448         Jsons.p("kind", "playlist-deleted"),
449         Jsons.p("id", playlist.getId()),
450         Jsons.p("updated", playlist.getUpdated().getTime())
451       );
452     } else {
453       this.obj = Jsons.obj(
454         Jsons.p("kind", "playlist"),
455         Jsons.p("id", playlist.getId()),
456         Jsons.p("title", playlist.getTitle()),
457         Jsons.p("description", playlist.getDescription()),
458         Jsons.p("creator", playlist.getCreator()),
459         Jsons.p("entries", Jsons.arr(entries)),
460         Jsons.p("acl", acl),
461         Jsons.p("updated", this.modifiedDate.getTime())
462       );
463     }
464   }
465 
466   Date getModifiedDate() {
467     return this.modifiedDate;
468   }
469 
470   Jsons.Val getJson() {
471     return this.obj;
472   }
473 }