View Javadoc
1   /*
2    * Licensed to The Apereo Foundation under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional
4    * information regarding copyright ownership.
5    *
6    *
7    * The Apereo Foundation licenses this file to you under the Educational
8    * Community License, Version 2.0 (the "License"); you may not use this file
9    * except in compliance with the License. You may obtain a copy of the License
10   * at:
11   *
12   *   http://opensource.org/licenses/ecl2.txt
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
17   * License for the specific language governing permissions and limitations under
18   * the License.
19   *
20   */
21  
22  
23  package org.opencastproject.metadata.dublincore;
24  
25  import com.entwinemedia.fn.data.Opt;
26  
27  import org.joda.time.Duration;
28  import org.joda.time.format.ISODateTimeFormat;
29  import org.joda.time.format.ISOPeriodFormat;
30  
31  import java.text.SimpleDateFormat;
32  import java.util.Date;
33  import java.util.HashMap;
34  import java.util.Map;
35  import java.util.TimeZone;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  
39  /**
40   * Utility class to facilitate the work with DCMI encoding schemes.
41   */
42  public final class EncodingSchemeUtils {
43  
44    private static final Map<Precision, String> formats = new HashMap<Precision, String>();
45  
46    static {
47      formats.put(Precision.Year, "yyyy");
48      formats.put(Precision.Month, "yyyy-MM");
49      formats.put(Precision.Day, "yyyy-MM-dd");
50      formats.put(Precision.Minute, "yyyy-MM-dd'T'HH:mm'Z'");
51      formats.put(Precision.Second, "yyyy-MM-dd'T'HH:mm:ss'Z'");
52      formats.put(Precision.Fraction, "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
53    }
54  
55    /** Disable construction of this utility class */
56    private EncodingSchemeUtils() {
57    }
58  
59    /**
60     * Encode a date with the given precision into a Dublin Core string value, using the recommended W3C-DTF scheme. The
61     * UTC timezone is used for all precisions from {@link Precision#Minute} to {@link Precision#Fraction}. For years,
62     * months and days the local timezone is used instead to ensure that the given date enters the DublinCore as is. If
63     * UTC was used it may happen that you get the previous or next day, month or year respectively
64     * <p>
65     * The language of the returned value is {@link DublinCore#LANGUAGE_UNDEFINED}.
66     * <p>
67     * See <a href="http://www.w3.org/TR/NOTE-datetime">http://www.w3.org/TR/NOTE-datetime</a> for more information about
68     * W3C-DTF.
69     *
70     * @param date
71     *          the date to encode
72     * @param precision
73     *          the precision to use
74     */
75    public static DublinCoreValue encodeDate(Date date, Precision precision) {
76      if (date == null)
77        throw new IllegalArgumentException("The date must not be null");
78      if (precision == null)
79        throw new IllegalArgumentException("The precision must not be null");
80  
81      return DublinCoreValue.mk(formatDate(date, precision), DublinCore.LANGUAGE_UNDEFINED, Opt.some(DublinCore.ENC_SCHEME_W3CDTF));
82    }
83  
84    public static String formatDate(Date date, Precision precision) {
85      SimpleDateFormat f = new SimpleDateFormat(formats.get(precision));
86      if (precision == Precision.Minute || precision == Precision.Second || precision == Precision.Fraction)
87        f.setTimeZone(TimeZone.getTimeZone("UTC"));
88      return f.format(date);
89    }
90  
91    /**
92     * Encode a period with the given precision into a Dublin Core string value using the recommended DCMI Period scheme.
93     * For the usage of the UTC timezone please refer to {@link #encodeDate(Date, Precision)} for further information.
94     * <p>
95     * One of the dates may be null to create an open interval.
96     * <p>
97     * The language of the returned value is {@link DublinCore#LANGUAGE_UNDEFINED}.
98     * <p>
99     * See <a href="http://dublincore.org/documents/dcmi-period/">http://dublincore.org/documents/dcmi-period/</a> for
100    * more information about DCMI Period.
101    *
102    * @param period
103    *          the period
104    * @param precision
105    *          the precision
106    */
107   public static DublinCoreValue encodePeriod(DCMIPeriod period, Precision precision) {
108     if (period == null)
109       throw new IllegalArgumentException("The period must not be null");
110     if (precision == null)
111       throw new IllegalArgumentException("The precision must not be null");
112 
113     StringBuilder b = new StringBuilder();
114     if (period.hasStart()) {
115       b.append("start=").append(formatDate(period.getStart(), precision)).append(";");
116     }
117     if (period.hasEnd()) {
118       if (b.length() > 0)
119         b.append(" ");
120       b.append("end=").append(formatDate(period.getEnd(), precision)).append(";");
121     }
122     if (period.hasName()) {
123       b.append(" ").append("name=").append(period.getName().replace(";", "")).append(";");
124     }
125     b.append(" ").append("scheme=W3C-DTF;");
126     return DublinCoreValue.mk(b.toString(), DublinCore.LANGUAGE_UNDEFINED, Opt.some(DublinCore.ENC_SCHEME_PERIOD));
127   }
128 
129   /**
130    * Encode a duration measured in milliseconds into a Dublin Core string using the
131    * {@link DublinCore#ENC_SCHEME_ISO8601} encoding scheme <code>PTnHnMnS</code>.
132    * <p>
133    * The language of the returned value is {@link DublinCore#LANGUAGE_UNDEFINED}.
134    * <p>
135    * See <a href="http://en.wikipedia.org/wiki/ISO_8601#Durations"> ISO8601 Durations</a> for details.
136    *
137    * @param duration
138    *          the duration in milliseconds
139    */
140   public static DublinCoreValue encodeDuration(long duration) {
141     return DublinCoreValue.mk(ISOPeriodFormat.standard().print(new Duration(duration).toPeriod()),
142             DublinCore.LANGUAGE_UNDEFINED, Opt.some(DublinCore.ENC_SCHEME_ISO8601));
143   }
144 
145   /**
146    * Decode a string encoded in the ISO8601 encoding scheme.
147    * <p>
148    * Also supports the REPLAY legacy format <code>hh:mm:ss</code>.
149    * <p>
150    * See <a href="http://en.wikipedia.org/wiki/ISO_8601#Durations"> ISO8601 Durations</a> for details.
151    *
152    * @param value
153    *          the ISO encoded string
154    * @return the duration in milliseconds or null, if the value cannot be parsed
155    */
156   public static Long decodeDuration(String value) {
157     try {
158       return ISOPeriodFormat.standard().parsePeriod(value).toStandardDuration().getMillis();
159     } catch (IllegalArgumentException ignore) {
160     }
161     // also support the legacy format hh:mm:ss
162     String[] parts = value.split(":");
163     try {
164       if (parts.length == 1)
165         return Long.parseLong(parts[0]) * 1000;
166       if (parts.length == 2)
167         return Long.parseLong(parts[0]) * 1000 * 60 + Long.parseLong(parts[1]) * 1000;
168       if (parts.length == 3)
169         return Long.parseLong(parts[0]) * 1000 * 60 * 60 + Long.parseLong(parts[1]) * 1000 * 60
170                 + Long.parseLong(parts[2]) * 1000;
171     } catch (NumberFormatException ignore) {
172     }
173     return null;
174   }
175 
176   /**
177    * Decode a string encoded in the ISO8601 encoding scheme.
178    *
179    * @param value
180    *          the Dublin Core value
181    * @return the duration in milliseconds or null, if the value cannot be parsed or is in a different encoding scheme
182    */
183   public static Long decodeDuration(DublinCoreValue value) {
184     if (!value.hasEncodingScheme() || value.getEncodingScheme().get().equals(DublinCore.ENC_SCHEME_ISO8601)) {
185       return decodeDuration(value.getValue());
186     }
187     return null;
188   }
189 
190   public static Long decodeMandatoryDuration(String value) {
191     Long l = decodeDuration(value);
192     if (l == null)
193       throw new IllegalArgumentException("Cannot decode duration: " + value);
194     return l;
195   }
196 
197   /**
198    * Tries to decode the given value as a W3C-DTF encoded date. If decoding fails, null is returned.
199    *
200    * @return the date or null if decoding fails
201    */
202   public static Date decodeDate(DublinCoreValue value) {
203     if (!value.hasEncodingScheme() || value.getEncodingScheme().get().equals(DublinCore.ENC_SCHEME_W3CDTF)) {
204       try {
205         return parseW3CDTF(value.getValue());
206       } catch (IllegalArgumentException ignore) {
207       }
208     }
209 
210     // Try unixtime in milliseconds (backwards-compatibility with older mediapackages)
211     try {
212       long timestamp = Long.parseLong(value.getValue());
213       Date decoded = new java.util.Date(timestamp);
214       return decoded;
215     } catch (NumberFormatException nfe) {
216     }
217 
218     return null;
219   }
220 
221   /**
222    * Tries to decode the given value as a W3C-DTF encoded date. If decoding fails, null is returned.
223    *
224    * @return the date or null if decoding fails
225    */
226   public static Date decodeDate(String value) {
227     try {
228       return parseW3CDTF(value);
229     } catch (IllegalArgumentException ignore) {
230     }
231 
232     // Try unixtime in milliseconds (backwards-compatibility with older mediapackages)
233     try {
234       long timestamp = Long.parseLong(value);
235       Date decoded = new java.util.Date(timestamp);
236       return decoded;
237     } catch (NumberFormatException nfe) {
238     }
239 
240     return null;
241   }
242 
243   /**
244    * Like {@link #decodeDate(String)}, but throws an {@link IllegalArgumentException} if the value cannot be decoded.
245    *
246    * @param value
247    *          the value
248    * @return the date
249    * @throws IllegalArgumentException
250    *           if the value cannot be decoded
251    */
252   public static Date decodeMandatoryDate(DublinCoreValue value) {
253     Date date = decodeDate(value);
254     if (date == null)
255       throw new IllegalArgumentException("Cannot decode to Date: " + value);
256     return date;
257   }
258 
259   /**
260    * Like {@link #decodeDate(String)}, but throws an {@link IllegalArgumentException} if the value cannot be decoded.
261    *
262    * @return the date
263    * @throws IllegalArgumentException
264    *           if the value cannot be decoded
265    */
266   public static Date decodeMandatoryDate(String value) {
267     Date date = decodeDate(value);
268     if (date == null)
269       throw new IllegalArgumentException("Cannot decode to Date: " + value);
270     return date;
271   }
272 
273   private static final Pattern DCMI_PERIOD = Pattern.compile("(start|end|name)\\s*=\\s*(.*?)(?:;|\\s*$)");
274   private static final Pattern DCMI_PERIOD_SCHEME = Pattern.compile("scheme\\s*=\\s*(.*?)(?:;|\\s*$)");
275 
276   /**
277    * Tries to decode a string in the DCMI period format, using W3C-DTF for the encoding of the individual dates. If
278    * parsing fails at any point, null will be returned.
279    *
280    * @return the period or null if decoding fails
281    */
282   public static DCMIPeriod decodePeriod(DublinCoreValue value) {
283     return decodePeriod(value.getValue());
284   }
285 
286   /**
287    * Tries to decode a string in the DCMI period format, using W3C-DTF for the encoding of the individual dates. If
288    * parsing fails at any point, null will be returned.
289    *
290    * @return the period or null if decoding fails
291    */
292   public static DCMIPeriod decodePeriod(String value) {
293     // Parse value
294     Matcher schemeMatcher = DCMI_PERIOD_SCHEME.matcher(value);
295     boolean mayBeW3CDTFEncoded = true;
296     if (schemeMatcher.find()) {
297       String schemeString = schemeMatcher.group(1);
298       if (!"W3C-DTF".equalsIgnoreCase(schemeString) && !"W3CDTF".equalsIgnoreCase(schemeString)) {
299         mayBeW3CDTFEncoded = false;
300       }
301     }
302     try {
303       if (mayBeW3CDTFEncoded) {
304         // Declare fields
305         Date start = null;
306         Date end = null;
307         String name = null;
308         // Parse
309         Matcher m = DCMI_PERIOD.matcher(value);
310         while (m.find()) {
311           String field = m.group(1);
312           String fieldValue = m.group(2);
313           if ("start".equals(field)) {
314             if (start != null)
315               return null;
316             start = parseW3CDTF(fieldValue);
317           } else if ("end".equals(field)) {
318             if (end != null)
319               return null;
320             end = parseW3CDTF(fieldValue);
321           } else if ("name".equals(field)) {
322             if (name != null)
323               return null;
324             name = fieldValue;
325           }
326         }
327         if (start == null && end == null)
328           return null;
329         return new DCMIPeriod(start, end, name);
330       }
331     } catch (IllegalArgumentException ignore) {
332       // Parse error
333     }
334     return null;
335   }
336 
337   /**
338    * Like {@link #decodePeriod(String)}, but throws an {@link IllegalArgumentException} if the value cannot be decoded.
339    *
340    * @return the period
341    * @throws IllegalArgumentException
342    *           if the value cannot be decoded
343    */
344   public static DCMIPeriod decodeMandatoryPeriod(DublinCoreValue value) {
345     return decodeMandatoryPeriod(value.getValue());
346   }
347 
348   /**
349    * Like {@link #decodePeriod(DublinCoreValue)}, but throws an {@link IllegalArgumentException} if the value cannot be
350    * decoded.
351    *
352    * @return the period
353    * @throws IllegalArgumentException
354    *           if the value cannot be decoded
355    */
356   public static DCMIPeriod decodeMandatoryPeriod(String value) {
357     DCMIPeriod period = decodePeriod(value);
358     if (period == null)
359       throw new IllegalArgumentException("Cannot decode to DCMIPeriod: " + value);
360 
361     return period;
362   }
363 
364   /**
365    * Tries to decode the value to a temporal object. For now, supported types are {@link java.util.Date},
366    * {@link DCMIPeriod} and Long for a duration.
367    *
368    * @param value
369    *          the value to decode
370    * @return a temporal object of the said types or null if decoding fails
371    */
372   public static Temporal decodeTemporal(DublinCoreValue value) {
373     // First try Date
374     Date instant = decodeDate(value);
375     if (instant != null)
376       return Temporal.instant(instant);
377     DCMIPeriod period = decodePeriod(value);
378     if (period != null)
379       return Temporal.period(period);
380     Long duration = decodeDuration(value);
381     if (duration != null)
382       return Temporal.duration(duration);
383     return null;
384   }
385 
386   /**
387    * Like {@link #decodeTemporal(DublinCoreValue)}, but throws an {@link IllegalArgumentException} if the value cannot
388    * be decoded.
389    *
390    * @return the temporal object of type {@link java.util.Date} or {@link DCMIPeriod}
391    * @throws IllegalArgumentException
392    *           if the value cannot be decoded
393    */
394   public static Temporal decodeMandatoryTemporal(DublinCoreValue value) {
395     Temporal temporal = decodeTemporal(value);
396     if (value == null)
397       throw new IllegalArgumentException("Cannot decode to either Date or DCMIPeriod: " + value);
398 
399     return temporal;
400   }
401 
402   /**
403    * @throws IllegalArgumentException
404    *           if the value cannot be parsed
405    */
406   private static Date parseW3CDTF(String value) {
407     return ISODateTimeFormat.dateTimeParser().parseDateTime(value).toDate();
408   }
409 
410 }