View Javadoc
1   /*
2    * Licensed to The Apereo Foundation under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional
4    * information regarding copyright ownership.
5    *
6    *
7    * The Apereo Foundation licenses this file to you under the Educational
8    * Community License, Version 2.0 (the "License"); you may not use this file
9    * except in compliance with the License. You may obtain a copy of the License
10   * at:
11   *
12   *   http://opensource.org/licenses/ecl2.txt
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
17   * License for the specific language governing permissions and limitations under
18   * the License.
19   *
20   */
21  package org.opencastproject.caption.converters;
22  
23  import org.opencastproject.caption.api.Caption;
24  import org.opencastproject.caption.api.CaptionConverter;
25  import org.opencastproject.caption.api.CaptionConverterException;
26  import org.opencastproject.caption.api.IllegalTimeFormatException;
27  import org.opencastproject.caption.api.Time;
28  import org.opencastproject.caption.impl.CaptionImpl;
29  import org.opencastproject.caption.impl.TimeImpl;
30  import org.opencastproject.mediapackage.MediaPackageElement;
31  import org.opencastproject.mediapackage.MediaPackageElement.Type;
32  
33  import org.json.simple.JSONArray;
34  import org.json.simple.JSONObject;
35  import org.json.simple.parser.JSONParser;
36  import org.osgi.service.component.annotations.Component;
37  import org.slf4j.Logger;
38  import org.slf4j.LoggerFactory;
39  
40  import java.io.IOException;
41  import java.io.InputStream;
42  import java.io.InputStreamReader;
43  import java.io.OutputStream;
44  import java.text.NumberFormat;
45  import java.util.ArrayList;
46  import java.util.List;
47  import java.util.Locale;
48  
49  @Component(
50      immediate = true,
51      service = { CaptionConverter.class },
52      property = {
53          "service.description=Google speech caption converter",
54          "caption.format=google-speech"
55      }
56  )
57  public class GoogleSpeechCaptionConverter implements CaptionConverter {
58  
59    /**
60     * The logger
61     */
62    private static final Logger logger = LoggerFactory.getLogger(GoogleSpeechCaptionConverter.class);
63  
64    // Default transcription text line size
65    private static final int LINE_SIZE = 100;
66  
67    @Override
68    public List<Caption> importCaption(InputStream inputStream, String languageLineSize)
69            throws CaptionConverterException {
70      List<Caption> captionList = new ArrayList<Caption>();
71      JSONParser jsonParser = new JSONParser();
72      int transcriptionLineSize = 0;
73      try {
74        // No language to specify so define size of a transcripts line
75        transcriptionLineSize = Integer.parseInt(languageLineSize.trim());
76        logger.info("Transcripts line size {} used", transcriptionLineSize);
77      } catch (NumberFormatException nfe) {
78        transcriptionLineSize = LINE_SIZE;
79        logger.info("Default transcripts line size {} used", transcriptionLineSize);
80      }
81  
82      try {
83        JSONObject outputObj = (JSONObject) jsonParser.parse(new InputStreamReader(inputStream));
84        String jobId = "Unknown";
85        if (outputObj.get("name") != null) {
86          jobId = (String) outputObj.get("name");
87        }
88  
89        JSONObject responseObj = (JSONObject) outputObj.get("response");
90        JSONArray resultsArray = (JSONArray) responseObj.get("results");
91  
92        resultsLoop:
93        for (int i = 0; i < resultsArray.size(); i++) {
94          JSONObject resultElement = (JSONObject) resultsArray.get(i);
95          JSONArray alternativesArray = (JSONArray) resultElement.get("alternatives");
96          if (alternativesArray != null && alternativesArray.size() > 0) {
97            JSONObject alternativeElement = (JSONObject) alternativesArray.get(0);
98            if (!alternativeElement.isEmpty()) {
99              // remove trailing space in order to have correct transcript length
100             String transcript = ((String) alternativeElement.get("transcript")).trim();
101             if (transcript != null) {
102               JSONArray timestampsArray = (JSONArray) alternativeElement.get("words");
103               if (timestampsArray == null || timestampsArray.isEmpty()) {
104                 logger.warn("Could not build caption object for job {}, result index {}: timestamp data not found",
105                     jobId, i);
106                 continue;
107               }
108               // Force a maximum line size of transcriptionLineSize + one word
109               String[] words = transcript.split("\\s+");
110               StringBuffer line = new StringBuffer();
111               int indexFirst = -1;
112               int indexLast = -1;
113               for (int j = 0; j < words.length; j++) {
114                 if (indexFirst == -1) {
115                   indexFirst = j;
116                 }
117                 line.append(words[j]);
118                 line.append(" ");
119                 if (line.length() >= transcriptionLineSize || j == words.length - 1) {
120                   indexLast = j;
121                   // Create a caption
122                   double start = -1;
123                   double end = -1;
124                   if (indexLast < timestampsArray.size()) {
125                     // Get start time of first element
126                     JSONObject wordTSList = (JSONObject) timestampsArray.get(indexFirst);
127                     if (wordTSList.size() == 3) {
128                       // Remove 's' at the end
129                       Number startNumber = NumberFormat.getInstance(Locale.US)
130                           .parse(removeEndCharacter((wordTSList.get("startTime").toString()), "s"));
131                       start = startNumber.doubleValue();
132                     }
133                     // Get end time of last element
134                     wordTSList = (JSONObject) timestampsArray.get(indexLast);
135                     if (wordTSList.size() == 3) {
136                       Number endNumber = NumberFormat.getInstance(Locale.US)
137                           .parse(removeEndCharacter((wordTSList.get("endTime").toString()), "s"));
138                       end = endNumber.doubleValue();
139                     }
140                   }
141                   if (start == -1 || end == -1) {
142                     logger.warn("Could not build caption object for job {}, result index {}: start/end times not found",
143                         jobId, i);
144                     continue resultsLoop;
145                   }
146 
147                   String[] captionLines = new String[1];
148                   captionLines[0] = line.toString().replace("%HESITATION", "...");
149                   captionList.add(new CaptionImpl(buildTime((long) (start * 1000)), buildTime((long) (end * 1000)),
150                       captionLines));
151                   indexFirst = -1;
152                   indexLast = -1;
153                   line.setLength(0);
154                 }
155               }
156             }
157           }
158         }
159       }
160     } catch (Exception e) {
161       logger.warn("Error when parsing Google transcriptions result", e);
162       throw new CaptionConverterException(e);
163     }
164 
165     return captionList;
166   }
167 
168   @Override
169   public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
170     throw new UnsupportedOperationException();
171   }
172 
173   @Override
174   public String[] getLanguageList(InputStream inputStream) throws CaptionConverterException {
175     throw new UnsupportedOperationException();
176   }
177 
178   @Override
179   public String getExtension() {
180     return "json";
181   }
182 
183   @Override
184   public Type getElementType() {
185     return MediaPackageElement.Type.Attachment;
186   }
187 
188   private Time buildTime(long ms) throws IllegalTimeFormatException {
189     int h = (int) (ms / 3600000L);
190     int m = (int) ((ms % 3600000L) / 60000L);
191     int s = (int) ((ms % 60000L) / 1000L);
192     ms = (int) (ms % 1000);
193 
194     return new TimeImpl(h, m, s, (int) ms);
195   }
196 
197   private String removeEndCharacter(String str, String end) {
198     if (str.endsWith(end)) {
199       str = str.replace(end, "");
200     }
201     return str;
202   }
203 
204 }