View Javadoc
1   /*
2    * Licensed to The Apereo Foundation under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional
4    * information regarding copyright ownership.
5    *
6    *
7    * The Apereo Foundation licenses this file to you under the Educational
8    * Community License, Version 2.0 (the "License"); you may not use this file
9    * except in compliance with the License. You may obtain a copy of the License
10   * at:
11   *
12   *   http://opensource.org/licenses/ecl2.txt
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
17   * License for the specific language governing permissions and limitations under
18   * the License.
19   *
20   */
21  package org.opencastproject.caption.converters;
22  
23  import org.opencastproject.caption.api.Caption;
24  import org.opencastproject.caption.api.CaptionConverter;
25  import org.opencastproject.caption.api.CaptionConverterException;
26  import org.opencastproject.caption.api.IllegalTimeFormatException;
27  import org.opencastproject.caption.api.Time;
28  import org.opencastproject.caption.impl.CaptionImpl;
29  import org.opencastproject.caption.impl.TimeImpl;
30  import org.opencastproject.mediapackage.MediaPackageElement;
31  import org.opencastproject.mediapackage.MediaPackageElement.Type;
32  
33  import org.json.simple.JSONArray;
34  import org.json.simple.JSONObject;
35  import org.json.simple.parser.JSONParser;
36  import org.osgi.service.component.annotations.Component;
37  import org.slf4j.Logger;
38  import org.slf4j.LoggerFactory;
39  
40  import java.io.IOException;
41  import java.io.InputStream;
42  import java.io.InputStreamReader;
43  import java.io.OutputStream;
44  import java.text.NumberFormat;
45  import java.util.ArrayList;
46  import java.util.List;
47  import java.util.Locale;
48  
49  @Component(
50      immediate = true,
51      service = { CaptionConverter.class },
52      property = {
53          "service.description=Google speech caption converter",
54          "caption.format=google-speech"
55      }
56  )
57  public class GoogleSpeechCaptionConverter implements CaptionConverter {
58  
59    /**
60     * The logger
61     */
62    private static final Logger logger = LoggerFactory.getLogger(GoogleSpeechCaptionConverter.class);
63  
64    // Default transcription text line size
65    private static final int LINE_SIZE = 100;
66  
67    @Override
68    public List<Caption> importCaption(InputStream inputStream, String languageLineSize) throws CaptionConverterException {
69      List<Caption> captionList = new ArrayList<Caption>();
70      JSONParser jsonParser = new JSONParser();
71      int transcriptionLineSize = 0;
72      try {
73        // No language to specify so define size of a transcripts line
74        transcriptionLineSize = Integer.parseInt(languageLineSize.trim());
75        logger.info("Transcripts line size {} used", transcriptionLineSize);
76      } catch (NumberFormatException nfe) {
77        transcriptionLineSize = LINE_SIZE;
78        logger.info("Default transcripts line size {} used", transcriptionLineSize);
79      }
80  
81      try {
82        JSONObject outputObj = (JSONObject) jsonParser.parse(new InputStreamReader(inputStream));
83        String jobId = "Unknown";
84        if (outputObj.get("name") != null) {
85          jobId = (String) outputObj.get("name");
86        }
87  
88        JSONObject responseObj = (JSONObject) outputObj.get("response");
89        JSONArray resultsArray = (JSONArray) responseObj.get("results");
90  
91        resultsLoop:
92        for (int i = 0; i < resultsArray.size(); i++) {
93          JSONObject resultElement = (JSONObject) resultsArray.get(i);
94          JSONArray alternativesArray = (JSONArray) resultElement.get("alternatives");
95          if (alternativesArray != null && alternativesArray.size() > 0) {
96            JSONObject alternativeElement = (JSONObject) alternativesArray.get(0);
97            if (!alternativeElement.isEmpty()) {
98              // remove trailing space in order to have correct transcript length
99              String transcript = ((String) alternativeElement.get("transcript")).trim();
100             if (transcript != null) {
101               JSONArray timestampsArray = (JSONArray) alternativeElement.get("words");
102               if (timestampsArray == null || timestampsArray.isEmpty()) {
103                 logger.warn("Could not build caption object for job {}, result index {}: timestamp data not found", jobId, i);
104                 continue;
105               }
106               // Force a maximum line size of transcriptionLineSize + one word
107               String[] words = transcript.split("\\s+");
108               StringBuffer line = new StringBuffer();
109               int indexFirst = -1;
110               int indexLast = -1;
111               for (int j = 0; j < words.length; j++) {
112                 if (indexFirst == -1) {
113                   indexFirst = j;
114                 }
115                 line.append(words[j]);
116                 line.append(" ");
117                 if (line.length() >= transcriptionLineSize || j == words.length - 1) {
118                   indexLast = j;
119                   // Create a caption
120                   double start = -1;
121                   double end = -1;
122                   if (indexLast < timestampsArray.size()) {
123                     // Get start time of first element
124                     JSONObject wordTSList = (JSONObject) timestampsArray.get(indexFirst);
125                     if (wordTSList.size() == 3) {
126                       // Remove 's' at the end
127                       Number startNumber = NumberFormat.getInstance(Locale.US).parse(removeEndCharacter((wordTSList.get("startTime").toString()), "s"));
128                       start = startNumber.doubleValue();
129                     }
130                     // Get end time of last element
131                     wordTSList = (JSONObject) timestampsArray.get(indexLast);
132                     if (wordTSList.size() == 3) {
133                       Number endNumber = NumberFormat.getInstance(Locale.US).parse(removeEndCharacter((wordTSList.get("endTime").toString()), "s"));
134                       end = endNumber.doubleValue();
135                     }
136                   }
137                   if (start == -1 || end == -1) {
138                     logger.warn("Could not build caption object for job {}, result index {}: start/end times not found", jobId, i);
139                     continue resultsLoop;
140                   }
141 
142                   String[] captionLines = new String[1];
143                   captionLines[0] = line.toString().replace("%HESITATION", "...");
144                   captionList.add(new CaptionImpl(buildTime((long) (start * 1000)), buildTime((long) (end * 1000)), captionLines));
145                   indexFirst = -1;
146                   indexLast = -1;
147                   line.setLength(0);
148                 }
149               }
150             }
151           }
152         }
153       }
154     } catch (Exception e) {
155       logger.warn("Error when parsing Google transcriptions result", e);
156       throw new CaptionConverterException(e);
157     }
158 
159     return captionList;
160   }
161 
162   @Override
163   public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
164     throw new UnsupportedOperationException();
165   }
166 
167   @Override
168   public String[] getLanguageList(InputStream inputStream) throws CaptionConverterException {
169     throw new UnsupportedOperationException();
170   }
171 
172   @Override
173   public String getExtension() {
174     return "json";
175   }
176 
177   @Override
178   public Type getElementType() {
179     return MediaPackageElement.Type.Attachment;
180   }
181 
182   private Time buildTime(long ms) throws IllegalTimeFormatException {
183     int h = (int) (ms / 3600000L);
184     int m = (int) ((ms % 3600000L) / 60000L);
185     int s = (int) ((ms % 60000L) / 1000L);
186     ms = (int) (ms % 1000);
187 
188     return new TimeImpl(h, m, s, (int) ms);
189   }
190 
191   private String removeEndCharacter(String str, String end) {
192     if (str.endsWith(end)) {
193       str = str.replace(end, "");
194     }
195     return str;
196   }
197 
198 }