View Javadoc
1   /*
2    * Licensed to The Apereo Foundation under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional
4    * information regarding copyright ownership.
5    *
6    *
7    * The Apereo Foundation licenses this file to you under the Educational
8    * Community License, Version 2.0 (the "License"); you may not use this file
9    * except in compliance with the License. You may obtain a copy of the License
10   * at:
11   *
12   *   http://opensource.org/licenses/ecl2.txt
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
17   * License for the specific language governing permissions and limitations under
18   * the License.
19   *
20   */
21  
22  
23  package org.opencastproject.metadata.dublincore;
24  
25  import org.joda.time.Duration;
26  import org.joda.time.format.ISODateTimeFormat;
27  import org.joda.time.format.ISOPeriodFormat;
28  
29  import java.text.SimpleDateFormat;
30  import java.util.Date;
31  import java.util.HashMap;
32  import java.util.Map;
33  import java.util.Optional;
34  import java.util.TimeZone;
35  import java.util.regex.Matcher;
36  import java.util.regex.Pattern;
37  
38  /**
39   * Utility class to facilitate the work with DCMI encoding schemes.
40   */
41  public final class EncodingSchemeUtils {
42  
43    private static final Map<Precision, String> formats = new HashMap<Precision, String>();
44  
45    static {
46      formats.put(Precision.Year, "yyyy");
47      formats.put(Precision.Month, "yyyy-MM");
48      formats.put(Precision.Day, "yyyy-MM-dd");
49      formats.put(Precision.Minute, "yyyy-MM-dd'T'HH:mm'Z'");
50      formats.put(Precision.Second, "yyyy-MM-dd'T'HH:mm:ss'Z'");
51      formats.put(Precision.Fraction, "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
52    }
53  
54    /** Disable construction of this utility class */
55    private EncodingSchemeUtils() {
56    }
57  
58    /**
59     * Encode a date with the given precision into a Dublin Core string value, using the recommended W3C-DTF scheme. The
60     * UTC timezone is used for all precisions from {@link Precision#Minute} to {@link Precision#Fraction}. For years,
61     * months and days the local timezone is used instead to ensure that the given date enters the DublinCore as is. If
62     * UTC was used it may happen that you get the previous or next day, month or year respectively
63     * <p>
64     * The language of the returned value is {@link DublinCore#LANGUAGE_UNDEFINED}.
65     * <p>
66     * See <a href="http://www.w3.org/TR/NOTE-datetime">http://www.w3.org/TR/NOTE-datetime</a> for more information about
67     * W3C-DTF.
68     *
69     * @param date
70     *          the date to encode
71     * @param precision
72     *          the precision to use
73     */
74    public static DublinCoreValue encodeDate(Date date, Precision precision) {
75      if (date == null) {
76        throw new IllegalArgumentException("The date must not be null");
77      }
78      if (precision == null) {
79        throw new IllegalArgumentException("The precision must not be null");
80      }
81  
82      return DublinCoreValue.mk(formatDate(date, precision), DublinCore.LANGUAGE_UNDEFINED,
83          Optional.of(DublinCore.ENC_SCHEME_W3CDTF));
84    }
85  
86    public static String formatDate(Date date, Precision precision) {
87      SimpleDateFormat f = new SimpleDateFormat(formats.get(precision));
88      if (precision == Precision.Minute || precision == Precision.Second || precision == Precision.Fraction) {
89        f.setTimeZone(TimeZone.getTimeZone("UTC"));
90      }
91      return f.format(date);
92    }
93  
94    /**
95     * Encode a period with the given precision into a Dublin Core string value using the recommended DCMI Period scheme.
96     * For the usage of the UTC timezone please refer to {@link #encodeDate(Date, Precision)} for further information.
97     * <p>
98     * One of the dates may be null to create an open interval.
99     * <p>
100    * The language of the returned value is {@link DublinCore#LANGUAGE_UNDEFINED}.
101    * <p>
102    * See <a href="http://dublincore.org/documents/dcmi-period/">http://dublincore.org/documents/dcmi-period/</a> for
103    * more information about DCMI Period.
104    *
105    * @param period
106    *          the period
107    * @param precision
108    *          the precision
109    */
110   public static DublinCoreValue encodePeriod(DCMIPeriod period, Precision precision) {
111     if (period == null) {
112       throw new IllegalArgumentException("The period must not be null");
113     }
114     if (precision == null) {
115       throw new IllegalArgumentException("The precision must not be null");
116     }
117 
118     StringBuilder b = new StringBuilder();
119     if (period.hasStart()) {
120       b.append("start=").append(formatDate(period.getStart(), precision)).append(";");
121     }
122     if (period.hasEnd()) {
123       if (b.length() > 0) {
124         b.append(" ");
125       }
126       b.append("end=").append(formatDate(period.getEnd(), precision)).append(";");
127     }
128     if (period.hasName()) {
129       b.append(" ").append("name=").append(period.getName().replace(";", "")).append(";");
130     }
131     b.append(" ").append("scheme=W3C-DTF;");
132     return DublinCoreValue.mk(b.toString(), DublinCore.LANGUAGE_UNDEFINED, Optional.of(DublinCore.ENC_SCHEME_PERIOD));
133   }
134 
135   /**
136    * Encode a duration measured in milliseconds into a Dublin Core string using the
137    * {@link DublinCore#ENC_SCHEME_ISO8601} encoding scheme <code>PTnHnMnS</code>.
138    * <p>
139    * The language of the returned value is {@link DublinCore#LANGUAGE_UNDEFINED}.
140    * <p>
141    * See <a href="http://en.wikipedia.org/wiki/ISO_8601#Durations"> ISO8601 Durations</a> for details.
142    *
143    * @param duration
144    *          the duration in milliseconds
145    */
146   public static DublinCoreValue encodeDuration(long duration) {
147     return DublinCoreValue.mk(ISOPeriodFormat.standard().print(new Duration(duration).toPeriod()),
148             DublinCore.LANGUAGE_UNDEFINED, Optional.of(DublinCore.ENC_SCHEME_ISO8601));
149   }
150 
151   /**
152    * Decode a string encoded in the ISO8601 encoding scheme.
153    * <p>
154    * Also supports the REPLAY legacy format <code>hh:mm:ss</code>.
155    * <p>
156    * See <a href="http://en.wikipedia.org/wiki/ISO_8601#Durations"> ISO8601 Durations</a> for details.
157    *
158    * @param value
159    *          the ISO encoded string
160    * @return the duration in milliseconds or null, if the value cannot be parsed
161    */
162   public static Long decodeDuration(String value) {
163     try {
164       return ISOPeriodFormat.standard().parsePeriod(value).toStandardDuration().getMillis();
165     } catch (IllegalArgumentException ignore) {
166     }
167     // also support the legacy format hh:mm:ss
168     String[] parts = value.split(":");
169     try {
170       if (parts.length == 1) {
171         return Long.parseLong(parts[0]) * 1000;
172       }
173       if (parts.length == 2) {
174         return Long.parseLong(parts[0]) * 1000 * 60 + Long.parseLong(parts[1]) * 1000;
175       }
176       if (parts.length == 3) {
177         return Long.parseLong(parts[0]) * 1000 * 60 * 60
178             + Long.parseLong(parts[1]) * 1000 * 60
179             + Long.parseLong(parts[2]) * 1000;
180       }
181     } catch (NumberFormatException ignore) {
182     }
183     return null;
184   }
185 
186   /**
187    * Decode a string encoded in the ISO8601 encoding scheme.
188    *
189    * @param value
190    *          the Dublin Core value
191    * @return the duration in milliseconds or null, if the value cannot be parsed or is in a different encoding scheme
192    */
193   public static Long decodeDuration(DublinCoreValue value) {
194     if (!value.hasEncodingScheme() || value.getEncodingScheme().get().equals(DublinCore.ENC_SCHEME_ISO8601)) {
195       return decodeDuration(value.getValue());
196     }
197     return null;
198   }
199 
200   public static Long decodeMandatoryDuration(String value) {
201     Long l = decodeDuration(value);
202     if (l == null) {
203       throw new IllegalArgumentException("Cannot decode duration: " + value);
204     }
205     return l;
206   }
207 
208   /**
209    * Tries to decode the given value as a W3C-DTF encoded date. If decoding fails, null is returned.
210    *
211    * @return the date or null if decoding fails
212    */
213   public static Date decodeDate(DublinCoreValue value) {
214     if (!value.hasEncodingScheme() || value.getEncodingScheme().get().equals(DublinCore.ENC_SCHEME_W3CDTF)) {
215       try {
216         return parseW3CDTF(value.getValue());
217       } catch (IllegalArgumentException ignore) {
218       }
219     }
220 
221     // Try unixtime in milliseconds (backwards-compatibility with older mediapackages)
222     try {
223       long timestamp = Long.parseLong(value.getValue());
224       Date decoded = new java.util.Date(timestamp);
225       return decoded;
226     } catch (NumberFormatException nfe) {
227     }
228 
229     return null;
230   }
231 
232   /**
233    * Tries to decode the given value as a W3C-DTF encoded date. If decoding fails, null is returned.
234    *
235    * @return the date or null if decoding fails
236    */
237   public static Date decodeDate(String value) {
238     try {
239       return parseW3CDTF(value);
240     } catch (IllegalArgumentException ignore) {
241     }
242 
243     // Try unixtime in milliseconds (backwards-compatibility with older mediapackages)
244     try {
245       long timestamp = Long.parseLong(value);
246       Date decoded = new java.util.Date(timestamp);
247       return decoded;
248     } catch (NumberFormatException nfe) {
249     }
250 
251     return null;
252   }
253 
254   /**
255    * Like {@link #decodeDate(String)}, but throws an {@link IllegalArgumentException} if the value cannot be decoded.
256    *
257    * @param value
258    *          the value
259    * @return the date
260    * @throws IllegalArgumentException
261    *           if the value cannot be decoded
262    */
263   public static Date decodeMandatoryDate(DublinCoreValue value) {
264     Date date = decodeDate(value);
265     if (date == null) {
266       throw new IllegalArgumentException("Cannot decode to Date: " + value);
267     }
268     return date;
269   }
270 
271   /**
272    * Like {@link #decodeDate(String)}, but throws an {@link IllegalArgumentException} if the value cannot be decoded.
273    *
274    * @return the date
275    * @throws IllegalArgumentException
276    *           if the value cannot be decoded
277    */
278   public static Date decodeMandatoryDate(String value) {
279     Date date = decodeDate(value);
280     if (date == null) {
281       throw new IllegalArgumentException("Cannot decode to Date: " + value);
282     }
283     return date;
284   }
285 
286   private static final Pattern DCMI_PERIOD = Pattern.compile("(start|end|name)\\s*=\\s*(.*?)(?:;|\\s*$)");
287   private static final Pattern DCMI_PERIOD_SCHEME = Pattern.compile("scheme\\s*=\\s*(.*?)(?:;|\\s*$)");
288 
289   /**
290    * Tries to decode a string in the DCMI period format, using W3C-DTF for the encoding of the individual dates. If
291    * parsing fails at any point, null will be returned.
292    *
293    * @return the period or null if decoding fails
294    */
295   public static DCMIPeriod decodePeriod(DublinCoreValue value) {
296     return decodePeriod(value.getValue());
297   }
298 
299   /**
300    * Tries to decode a string in the DCMI period format, using W3C-DTF for the encoding of the individual dates. If
301    * parsing fails at any point, null will be returned.
302    *
303    * @return the period or null if decoding fails
304    */
305   public static DCMIPeriod decodePeriod(String value) {
306     // Parse value
307     Matcher schemeMatcher = DCMI_PERIOD_SCHEME.matcher(value);
308     boolean mayBeW3CDTFEncoded = true;
309     if (schemeMatcher.find()) {
310       String schemeString = schemeMatcher.group(1);
311       if (!"W3C-DTF".equalsIgnoreCase(schemeString) && !"W3CDTF".equalsIgnoreCase(schemeString)) {
312         mayBeW3CDTFEncoded = false;
313       }
314     }
315     try {
316       if (mayBeW3CDTFEncoded) {
317         // Declare fields
318         Date start = null;
319         Date end = null;
320         String name = null;
321         // Parse
322         Matcher m = DCMI_PERIOD.matcher(value);
323         while (m.find()) {
324           String field = m.group(1);
325           String fieldValue = m.group(2);
326           if ("start".equals(field)) {
327             if (start != null) {
328               return null;
329             }
330             start = parseW3CDTF(fieldValue);
331           } else if ("end".equals(field)) {
332             if (end != null) {
333               return null;
334             }
335             end = parseW3CDTF(fieldValue);
336           } else if ("name".equals(field)) {
337             if (name != null) {
338               return null;
339             }
340             name = fieldValue;
341           }
342         }
343         if (start == null && end == null) {
344           return null;
345         }
346         return new DCMIPeriod(start, end, name);
347       }
348     } catch (IllegalArgumentException ignore) {
349       // Parse error
350     }
351     return null;
352   }
353 
354   /**
355    * Like {@link #decodePeriod(String)}, but throws an {@link IllegalArgumentException} if the value cannot be decoded.
356    *
357    * @return the period
358    * @throws IllegalArgumentException
359    *           if the value cannot be decoded
360    */
361   public static DCMIPeriod decodeMandatoryPeriod(DublinCoreValue value) {
362     return decodeMandatoryPeriod(value.getValue());
363   }
364 
365   /**
366    * Like {@link #decodePeriod(DublinCoreValue)}, but throws an {@link IllegalArgumentException} if the value cannot be
367    * decoded.
368    *
369    * @return the period
370    * @throws IllegalArgumentException
371    *           if the value cannot be decoded
372    */
373   public static DCMIPeriod decodeMandatoryPeriod(String value) {
374     DCMIPeriod period = decodePeriod(value);
375     if (period == null) {
376       throw new IllegalArgumentException("Cannot decode to DCMIPeriod: " + value);
377     }
378 
379     return period;
380   }
381 
382   /**
383    * Tries to decode the value to a temporal object. For now, supported types are {@link java.util.Date},
384    * {@link DCMIPeriod} and Long for a duration.
385    *
386    * @param value
387    *          the value to decode
388    * @return a temporal object of the said types or null if decoding fails
389    */
390   public static Temporal decodeTemporal(DublinCoreValue value) {
391     // First try Date
392     Date instant = decodeDate(value);
393     if (instant != null) {
394       return Temporal.instant(instant);
395     }
396     DCMIPeriod period = decodePeriod(value);
397     if (period != null) {
398       return Temporal.period(period);
399     }
400     Long duration = decodeDuration(value);
401     if (duration != null) {
402       return Temporal.duration(duration);
403     }
404     return null;
405   }
406 
407   /**
408    * Like {@link #decodeTemporal(DublinCoreValue)}, but throws an {@link IllegalArgumentException} if the value cannot
409    * be decoded.
410    *
411    * @return the temporal object of type {@link java.util.Date} or {@link DCMIPeriod}
412    * @throws IllegalArgumentException
413    *           if the value cannot be decoded
414    */
415   public static Temporal decodeMandatoryTemporal(DublinCoreValue value) {
416     Temporal temporal = decodeTemporal(value);
417     if (value == null) {
418       throw new IllegalArgumentException("Cannot decode to either Date or DCMIPeriod: " + value);
419     }
420 
421     return temporal;
422   }
423 
424   /**
425    * @throws IllegalArgumentException
426    *           if the value cannot be parsed
427    */
428   private static Date parseW3CDTF(String value) {
429     return ISODateTimeFormat.dateTimeParser().parseDateTime(value).toDate();
430   }
431 
432 }