View Javadoc
1   /*
2    * Licensed to The Apereo Foundation under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional
4    * information regarding copyright ownership.
5    *
6    *
7    * The Apereo Foundation licenses this file to you under the Educational
8    * Community License, Version 2.0 (the "License"); you may not use this file
9    * except in compliance with the License. You may obtain a copy of the License
10   * at:
11   *
12   *   http://opensource.org/licenses/ecl2.txt
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
17   * License for the specific language governing permissions and limitations under
18   * the License.
19   *
20   */
21  
22  
23  package org.opencastproject.metadata.dublincore;
24  
25  import org.joda.time.Duration;
26  import org.joda.time.format.ISODateTimeFormat;
27  import org.joda.time.format.ISOPeriodFormat;
28  
29  import java.text.SimpleDateFormat;
30  import java.util.Date;
31  import java.util.HashMap;
32  import java.util.Map;
33  import java.util.Optional;
34  import java.util.TimeZone;
35  import java.util.regex.Matcher;
36  import java.util.regex.Pattern;
37  
38  /**
39   * Utility class to facilitate the work with DCMI encoding schemes.
40   */
41  public final class EncodingSchemeUtils {
42  
43    private static final Map<Precision, String> formats = new HashMap<Precision, String>();
44  
45    static {
46      formats.put(Precision.Year, "yyyy");
47      formats.put(Precision.Month, "yyyy-MM");
48      formats.put(Precision.Day, "yyyy-MM-dd");
49      formats.put(Precision.Minute, "yyyy-MM-dd'T'HH:mm'Z'");
50      formats.put(Precision.Second, "yyyy-MM-dd'T'HH:mm:ss'Z'");
51      formats.put(Precision.Fraction, "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
52    }
53  
54    /** Disable construction of this utility class */
55    private EncodingSchemeUtils() {
56    }
57  
58    /**
59     * Encode a date with the given precision into a Dublin Core string value, using the recommended W3C-DTF scheme. The
60     * UTC timezone is used for all precisions from {@link Precision#Minute} to {@link Precision#Fraction}. For years,
61     * months and days the local timezone is used instead to ensure that the given date enters the DublinCore as is. If
62     * UTC was used it may happen that you get the previous or next day, month or year respectively
63     * <p>
64     * The language of the returned value is {@link DublinCore#LANGUAGE_UNDEFINED}.
65     * <p>
66     * See <a href="http://www.w3.org/TR/NOTE-datetime">http://www.w3.org/TR/NOTE-datetime</a> for more information about
67     * W3C-DTF.
68     *
69     * @param date
70     *          the date to encode
71     * @param precision
72     *          the precision to use
73     */
74    public static DublinCoreValue encodeDate(Date date, Precision precision) {
75      if (date == null)
76        throw new IllegalArgumentException("The date must not be null");
77      if (precision == null)
78        throw new IllegalArgumentException("The precision must not be null");
79  
80      return DublinCoreValue.mk(formatDate(date, precision), DublinCore.LANGUAGE_UNDEFINED, Optional.of(DublinCore.ENC_SCHEME_W3CDTF));
81    }
82  
83    public static String formatDate(Date date, Precision precision) {
84      SimpleDateFormat f = new SimpleDateFormat(formats.get(precision));
85      if (precision == Precision.Minute || precision == Precision.Second || precision == Precision.Fraction)
86        f.setTimeZone(TimeZone.getTimeZone("UTC"));
87      return f.format(date);
88    }
89  
90    /**
91     * Encode a period with the given precision into a Dublin Core string value using the recommended DCMI Period scheme.
92     * For the usage of the UTC timezone please refer to {@link #encodeDate(Date, Precision)} for further information.
93     * <p>
94     * One of the dates may be null to create an open interval.
95     * <p>
96     * The language of the returned value is {@link DublinCore#LANGUAGE_UNDEFINED}.
97     * <p>
98     * See <a href="http://dublincore.org/documents/dcmi-period/">http://dublincore.org/documents/dcmi-period/</a> for
99     * more information about DCMI Period.
100    *
101    * @param period
102    *          the period
103    * @param precision
104    *          the precision
105    */
106   public static DublinCoreValue encodePeriod(DCMIPeriod period, Precision precision) {
107     if (period == null)
108       throw new IllegalArgumentException("The period must not be null");
109     if (precision == null)
110       throw new IllegalArgumentException("The precision must not be null");
111 
112     StringBuilder b = new StringBuilder();
113     if (period.hasStart()) {
114       b.append("start=").append(formatDate(period.getStart(), precision)).append(";");
115     }
116     if (period.hasEnd()) {
117       if (b.length() > 0)
118         b.append(" ");
119       b.append("end=").append(formatDate(period.getEnd(), precision)).append(";");
120     }
121     if (period.hasName()) {
122       b.append(" ").append("name=").append(period.getName().replace(";", "")).append(";");
123     }
124     b.append(" ").append("scheme=W3C-DTF;");
125     return DublinCoreValue.mk(b.toString(), DublinCore.LANGUAGE_UNDEFINED, Optional.of(DublinCore.ENC_SCHEME_PERIOD));
126   }
127 
128   /**
129    * Encode a duration measured in milliseconds into a Dublin Core string using the
130    * {@link DublinCore#ENC_SCHEME_ISO8601} encoding scheme <code>PTnHnMnS</code>.
131    * <p>
132    * The language of the returned value is {@link DublinCore#LANGUAGE_UNDEFINED}.
133    * <p>
134    * See <a href="http://en.wikipedia.org/wiki/ISO_8601#Durations"> ISO8601 Durations</a> for details.
135    *
136    * @param duration
137    *          the duration in milliseconds
138    */
139   public static DublinCoreValue encodeDuration(long duration) {
140     return DublinCoreValue.mk(ISOPeriodFormat.standard().print(new Duration(duration).toPeriod()),
141             DublinCore.LANGUAGE_UNDEFINED, Optional.of(DublinCore.ENC_SCHEME_ISO8601));
142   }
143 
144   /**
145    * Decode a string encoded in the ISO8601 encoding scheme.
146    * <p>
147    * Also supports the REPLAY legacy format <code>hh:mm:ss</code>.
148    * <p>
149    * See <a href="http://en.wikipedia.org/wiki/ISO_8601#Durations"> ISO8601 Durations</a> for details.
150    *
151    * @param value
152    *          the ISO encoded string
153    * @return the duration in milliseconds or null, if the value cannot be parsed
154    */
155   public static Long decodeDuration(String value) {
156     try {
157       return ISOPeriodFormat.standard().parsePeriod(value).toStandardDuration().getMillis();
158     } catch (IllegalArgumentException ignore) {
159     }
160     // also support the legacy format hh:mm:ss
161     String[] parts = value.split(":");
162     try {
163       if (parts.length == 1)
164         return Long.parseLong(parts[0]) * 1000;
165       if (parts.length == 2)
166         return Long.parseLong(parts[0]) * 1000 * 60 + Long.parseLong(parts[1]) * 1000;
167       if (parts.length == 3)
168         return Long.parseLong(parts[0]) * 1000 * 60 * 60 + Long.parseLong(parts[1]) * 1000 * 60
169                 + Long.parseLong(parts[2]) * 1000;
170     } catch (NumberFormatException ignore) {
171     }
172     return null;
173   }
174 
175   /**
176    * Decode a string encoded in the ISO8601 encoding scheme.
177    *
178    * @param value
179    *          the Dublin Core value
180    * @return the duration in milliseconds or null, if the value cannot be parsed or is in a different encoding scheme
181    */
182   public static Long decodeDuration(DublinCoreValue value) {
183     if (!value.hasEncodingScheme() || value.getEncodingScheme().get().equals(DublinCore.ENC_SCHEME_ISO8601)) {
184       return decodeDuration(value.getValue());
185     }
186     return null;
187   }
188 
189   public static Long decodeMandatoryDuration(String value) {
190     Long l = decodeDuration(value);
191     if (l == null)
192       throw new IllegalArgumentException("Cannot decode duration: " + value);
193     return l;
194   }
195 
196   /**
197    * Tries to decode the given value as a W3C-DTF encoded date. If decoding fails, null is returned.
198    *
199    * @return the date or null if decoding fails
200    */
201   public static Date decodeDate(DublinCoreValue value) {
202     if (!value.hasEncodingScheme() || value.getEncodingScheme().get().equals(DublinCore.ENC_SCHEME_W3CDTF)) {
203       try {
204         return parseW3CDTF(value.getValue());
205       } catch (IllegalArgumentException ignore) {
206       }
207     }
208 
209     // Try unixtime in milliseconds (backwards-compatibility with older mediapackages)
210     try {
211       long timestamp = Long.parseLong(value.getValue());
212       Date decoded = new java.util.Date(timestamp);
213       return decoded;
214     } catch (NumberFormatException nfe) {
215     }
216 
217     return null;
218   }
219 
220   /**
221    * Tries to decode the given value as a W3C-DTF encoded date. If decoding fails, null is returned.
222    *
223    * @return the date or null if decoding fails
224    */
225   public static Date decodeDate(String value) {
226     try {
227       return parseW3CDTF(value);
228     } catch (IllegalArgumentException ignore) {
229     }
230 
231     // Try unixtime in milliseconds (backwards-compatibility with older mediapackages)
232     try {
233       long timestamp = Long.parseLong(value);
234       Date decoded = new java.util.Date(timestamp);
235       return decoded;
236     } catch (NumberFormatException nfe) {
237     }
238 
239     return null;
240   }
241 
242   /**
243    * Like {@link #decodeDate(String)}, but throws an {@link IllegalArgumentException} if the value cannot be decoded.
244    *
245    * @param value
246    *          the value
247    * @return the date
248    * @throws IllegalArgumentException
249    *           if the value cannot be decoded
250    */
251   public static Date decodeMandatoryDate(DublinCoreValue value) {
252     Date date = decodeDate(value);
253     if (date == null)
254       throw new IllegalArgumentException("Cannot decode to Date: " + value);
255     return date;
256   }
257 
258   /**
259    * Like {@link #decodeDate(String)}, but throws an {@link IllegalArgumentException} if the value cannot be decoded.
260    *
261    * @return the date
262    * @throws IllegalArgumentException
263    *           if the value cannot be decoded
264    */
265   public static Date decodeMandatoryDate(String value) {
266     Date date = decodeDate(value);
267     if (date == null)
268       throw new IllegalArgumentException("Cannot decode to Date: " + value);
269     return date;
270   }
271 
272   private static final Pattern DCMI_PERIOD = Pattern.compile("(start|end|name)\\s*=\\s*(.*?)(?:;|\\s*$)");
273   private static final Pattern DCMI_PERIOD_SCHEME = Pattern.compile("scheme\\s*=\\s*(.*?)(?:;|\\s*$)");
274 
275   /**
276    * Tries to decode a string in the DCMI period format, using W3C-DTF for the encoding of the individual dates. If
277    * parsing fails at any point, null will be returned.
278    *
279    * @return the period or null if decoding fails
280    */
281   public static DCMIPeriod decodePeriod(DublinCoreValue value) {
282     return decodePeriod(value.getValue());
283   }
284 
285   /**
286    * Tries to decode a string in the DCMI period format, using W3C-DTF for the encoding of the individual dates. If
287    * parsing fails at any point, null will be returned.
288    *
289    * @return the period or null if decoding fails
290    */
291   public static DCMIPeriod decodePeriod(String value) {
292     // Parse value
293     Matcher schemeMatcher = DCMI_PERIOD_SCHEME.matcher(value);
294     boolean mayBeW3CDTFEncoded = true;
295     if (schemeMatcher.find()) {
296       String schemeString = schemeMatcher.group(1);
297       if (!"W3C-DTF".equalsIgnoreCase(schemeString) && !"W3CDTF".equalsIgnoreCase(schemeString)) {
298         mayBeW3CDTFEncoded = false;
299       }
300     }
301     try {
302       if (mayBeW3CDTFEncoded) {
303         // Declare fields
304         Date start = null;
305         Date end = null;
306         String name = null;
307         // Parse
308         Matcher m = DCMI_PERIOD.matcher(value);
309         while (m.find()) {
310           String field = m.group(1);
311           String fieldValue = m.group(2);
312           if ("start".equals(field)) {
313             if (start != null)
314               return null;
315             start = parseW3CDTF(fieldValue);
316           } else if ("end".equals(field)) {
317             if (end != null)
318               return null;
319             end = parseW3CDTF(fieldValue);
320           } else if ("name".equals(field)) {
321             if (name != null)
322               return null;
323             name = fieldValue;
324           }
325         }
326         if (start == null && end == null)
327           return null;
328         return new DCMIPeriod(start, end, name);
329       }
330     } catch (IllegalArgumentException ignore) {
331       // Parse error
332     }
333     return null;
334   }
335 
336   /**
337    * Like {@link #decodePeriod(String)}, but throws an {@link IllegalArgumentException} if the value cannot be decoded.
338    *
339    * @return the period
340    * @throws IllegalArgumentException
341    *           if the value cannot be decoded
342    */
343   public static DCMIPeriod decodeMandatoryPeriod(DublinCoreValue value) {
344     return decodeMandatoryPeriod(value.getValue());
345   }
346 
347   /**
348    * Like {@link #decodePeriod(DublinCoreValue)}, but throws an {@link IllegalArgumentException} if the value cannot be
349    * decoded.
350    *
351    * @return the period
352    * @throws IllegalArgumentException
353    *           if the value cannot be decoded
354    */
355   public static DCMIPeriod decodeMandatoryPeriod(String value) {
356     DCMIPeriod period = decodePeriod(value);
357     if (period == null)
358       throw new IllegalArgumentException("Cannot decode to DCMIPeriod: " + value);
359 
360     return period;
361   }
362 
363   /**
364    * Tries to decode the value to a temporal object. For now, supported types are {@link java.util.Date},
365    * {@link DCMIPeriod} and Long for a duration.
366    *
367    * @param value
368    *          the value to decode
369    * @return a temporal object of the said types or null if decoding fails
370    */
371   public static Temporal decodeTemporal(DublinCoreValue value) {
372     // First try Date
373     Date instant = decodeDate(value);
374     if (instant != null)
375       return Temporal.instant(instant);
376     DCMIPeriod period = decodePeriod(value);
377     if (period != null)
378       return Temporal.period(period);
379     Long duration = decodeDuration(value);
380     if (duration != null)
381       return Temporal.duration(duration);
382     return null;
383   }
384 
385   /**
386    * Like {@link #decodeTemporal(DublinCoreValue)}, but throws an {@link IllegalArgumentException} if the value cannot
387    * be decoded.
388    *
389    * @return the temporal object of type {@link java.util.Date} or {@link DCMIPeriod}
390    * @throws IllegalArgumentException
391    *           if the value cannot be decoded
392    */
393   public static Temporal decodeMandatoryTemporal(DublinCoreValue value) {
394     Temporal temporal = decodeTemporal(value);
395     if (value == null)
396       throw new IllegalArgumentException("Cannot decode to either Date or DCMIPeriod: " + value);
397 
398     return temporal;
399   }
400 
401   /**
402    * @throws IllegalArgumentException
403    *           if the value cannot be parsed
404    */
405   private static Date parseW3CDTF(String value) {
406     return ISODateTimeFormat.dateTimeParser().parseDateTime(value).toDate();
407   }
408 
409 }