1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.opencastproject.caption.converters;
22
23 import org.opencastproject.caption.api.Caption;
24 import org.opencastproject.caption.api.CaptionConverter;
25 import org.opencastproject.caption.api.CaptionConverterException;
26 import org.opencastproject.caption.api.IllegalTimeFormatException;
27 import org.opencastproject.caption.api.Time;
28 import org.opencastproject.caption.impl.CaptionImpl;
29 import org.opencastproject.caption.impl.TimeImpl;
30 import org.opencastproject.mediapackage.MediaPackageElement;
31 import org.opencastproject.mediapackage.MediaPackageElement.Type;
32
33 import org.json.simple.JSONArray;
34 import org.json.simple.JSONObject;
35 import org.json.simple.parser.JSONParser;
36 import org.osgi.service.component.annotations.Component;
37 import org.slf4j.Logger;
38 import org.slf4j.LoggerFactory;
39
40 import java.io.IOException;
41 import java.io.InputStream;
42 import java.io.InputStreamReader;
43 import java.io.OutputStream;
44 import java.text.NumberFormat;
45 import java.util.ArrayList;
46 import java.util.List;
47 import java.util.Locale;
48
49 @Component(
50 immediate = true,
51 service = { CaptionConverter.class },
52 property = {
53 "service.description=Google speech caption converter",
54 "caption.format=google-speech"
55 }
56 )
57 public class GoogleSpeechCaptionConverter implements CaptionConverter {
58
59
60
61
62 private static final Logger logger = LoggerFactory.getLogger(GoogleSpeechCaptionConverter.class);
63
64
65 private static final int LINE_SIZE = 100;
66
67 @Override
68 public List<Caption> importCaption(InputStream inputStream, String languageLineSize)
69 throws CaptionConverterException {
70 List<Caption> captionList = new ArrayList<Caption>();
71 JSONParser jsonParser = new JSONParser();
72 int transcriptionLineSize = 0;
73 try {
74
75 transcriptionLineSize = Integer.parseInt(languageLineSize.trim());
76 logger.info("Transcripts line size {} used", transcriptionLineSize);
77 } catch (NumberFormatException nfe) {
78 transcriptionLineSize = LINE_SIZE;
79 logger.info("Default transcripts line size {} used", transcriptionLineSize);
80 }
81
82 try {
83 JSONObject outputObj = (JSONObject) jsonParser.parse(new InputStreamReader(inputStream));
84 String jobId = "Unknown";
85 if (outputObj.get("name") != null) {
86 jobId = (String) outputObj.get("name");
87 }
88
89 JSONObject responseObj = (JSONObject) outputObj.get("response");
90 JSONArray resultsArray = (JSONArray) responseObj.get("results");
91
92 resultsLoop:
93 for (int i = 0; i < resultsArray.size(); i++) {
94 JSONObject resultElement = (JSONObject) resultsArray.get(i);
95 JSONArray alternativesArray = (JSONArray) resultElement.get("alternatives");
96 if (alternativesArray != null && alternativesArray.size() > 0) {
97 JSONObject alternativeElement = (JSONObject) alternativesArray.get(0);
98 if (!alternativeElement.isEmpty()) {
99
100 String transcript = ((String) alternativeElement.get("transcript")).trim();
101 if (transcript != null) {
102 JSONArray timestampsArray = (JSONArray) alternativeElement.get("words");
103 if (timestampsArray == null || timestampsArray.isEmpty()) {
104 logger.warn("Could not build caption object for job {}, result index {}: timestamp data not found",
105 jobId, i);
106 continue;
107 }
108
109 String[] words = transcript.split("\\s+");
110 StringBuffer line = new StringBuffer();
111 int indexFirst = -1;
112 int indexLast = -1;
113 for (int j = 0; j < words.length; j++) {
114 if (indexFirst == -1) {
115 indexFirst = j;
116 }
117 line.append(words[j]);
118 line.append(" ");
119 if (line.length() >= transcriptionLineSize || j == words.length - 1) {
120 indexLast = j;
121
122 double start = -1;
123 double end = -1;
124 if (indexLast < timestampsArray.size()) {
125
126 JSONObject wordTSList = (JSONObject) timestampsArray.get(indexFirst);
127 if (wordTSList.size() == 3) {
128
129 Number startNumber = NumberFormat.getInstance(Locale.US)
130 .parse(removeEndCharacter((wordTSList.get("startTime").toString()), "s"));
131 start = startNumber.doubleValue();
132 }
133
134 wordTSList = (JSONObject) timestampsArray.get(indexLast);
135 if (wordTSList.size() == 3) {
136 Number endNumber = NumberFormat.getInstance(Locale.US)
137 .parse(removeEndCharacter((wordTSList.get("endTime").toString()), "s"));
138 end = endNumber.doubleValue();
139 }
140 }
141 if (start == -1 || end == -1) {
142 logger.warn("Could not build caption object for job {}, result index {}: start/end times not found",
143 jobId, i);
144 continue resultsLoop;
145 }
146
147 String[] captionLines = new String[1];
148 captionLines[0] = line.toString().replace("%HESITATION", "...");
149 captionList.add(new CaptionImpl(buildTime((long) (start * 1000)), buildTime((long) (end * 1000)),
150 captionLines));
151 indexFirst = -1;
152 indexLast = -1;
153 line.setLength(0);
154 }
155 }
156 }
157 }
158 }
159 }
160 } catch (Exception e) {
161 logger.warn("Error when parsing Google transcriptions result", e);
162 throw new CaptionConverterException(e);
163 }
164
165 return captionList;
166 }
167
168 @Override
169 public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
170 throw new UnsupportedOperationException();
171 }
172
173 @Override
174 public String[] getLanguageList(InputStream inputStream) throws CaptionConverterException {
175 throw new UnsupportedOperationException();
176 }
177
178 @Override
179 public String getExtension() {
180 return "json";
181 }
182
183 @Override
184 public Type getElementType() {
185 return MediaPackageElement.Type.Attachment;
186 }
187
188 private Time buildTime(long ms) throws IllegalTimeFormatException {
189 int h = (int) (ms / 3600000L);
190 int m = (int) ((ms % 3600000L) / 60000L);
191 int s = (int) ((ms % 60000L) / 1000L);
192 ms = (int) (ms % 1000);
193
194 return new TimeImpl(h, m, s, (int) ms);
195 }
196
197 private String removeEndCharacter(String str, String end) {
198 if (str.endsWith(end)) {
199 str = str.replace(end, "");
200 }
201 return str;
202 }
203
204 }