1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.opencastproject.caption.converters;
22
23 import org.opencastproject.caption.api.Caption;
24 import org.opencastproject.caption.api.CaptionConverter;
25 import org.opencastproject.caption.api.CaptionConverterException;
26 import org.opencastproject.caption.api.IllegalTimeFormatException;
27 import org.opencastproject.caption.api.Time;
28 import org.opencastproject.caption.impl.CaptionImpl;
29 import org.opencastproject.caption.impl.TimeImpl;
30 import org.opencastproject.mediapackage.MediaPackageElement;
31 import org.opencastproject.mediapackage.MediaPackageElement.Type;
32
33 import org.json.simple.JSONArray;
34 import org.json.simple.JSONObject;
35 import org.json.simple.parser.JSONParser;
36 import org.osgi.service.component.annotations.Component;
37 import org.slf4j.Logger;
38 import org.slf4j.LoggerFactory;
39
40 import java.io.IOException;
41 import java.io.InputStream;
42 import java.io.InputStreamReader;
43 import java.io.OutputStream;
44 import java.text.NumberFormat;
45 import java.util.ArrayList;
46 import java.util.List;
47 import java.util.Locale;
48
49 @Component(
50 immediate = true,
51 service = { CaptionConverter.class },
52 property = {
53 "service.description=Google speech caption converter",
54 "caption.format=google-speech"
55 }
56 )
57 public class GoogleSpeechCaptionConverter implements CaptionConverter {
58
59
60
61
62 private static final Logger logger = LoggerFactory.getLogger(GoogleSpeechCaptionConverter.class);
63
64
65 private static final int LINE_SIZE = 100;
66
67 @Override
68 public List<Caption> importCaption(InputStream inputStream, String languageLineSize) throws CaptionConverterException {
69 List<Caption> captionList = new ArrayList<Caption>();
70 JSONParser jsonParser = new JSONParser();
71 int transcriptionLineSize = 0;
72 try {
73
74 transcriptionLineSize = Integer.parseInt(languageLineSize.trim());
75 logger.info("Transcripts line size {} used", transcriptionLineSize);
76 } catch (NumberFormatException nfe) {
77 transcriptionLineSize = LINE_SIZE;
78 logger.info("Default transcripts line size {} used", transcriptionLineSize);
79 }
80
81 try {
82 JSONObject outputObj = (JSONObject) jsonParser.parse(new InputStreamReader(inputStream));
83 String jobId = "Unknown";
84 if (outputObj.get("name") != null) {
85 jobId = (String) outputObj.get("name");
86 }
87
88 JSONObject responseObj = (JSONObject) outputObj.get("response");
89 JSONArray resultsArray = (JSONArray) responseObj.get("results");
90
91 resultsLoop:
92 for (int i = 0; i < resultsArray.size(); i++) {
93 JSONObject resultElement = (JSONObject) resultsArray.get(i);
94 JSONArray alternativesArray = (JSONArray) resultElement.get("alternatives");
95 if (alternativesArray != null && alternativesArray.size() > 0) {
96 JSONObject alternativeElement = (JSONObject) alternativesArray.get(0);
97 if (!alternativeElement.isEmpty()) {
98
99 String transcript = ((String) alternativeElement.get("transcript")).trim();
100 if (transcript != null) {
101 JSONArray timestampsArray = (JSONArray) alternativeElement.get("words");
102 if (timestampsArray == null || timestampsArray.isEmpty()) {
103 logger.warn("Could not build caption object for job {}, result index {}: timestamp data not found", jobId, i);
104 continue;
105 }
106
107 String[] words = transcript.split("\\s+");
108 StringBuffer line = new StringBuffer();
109 int indexFirst = -1;
110 int indexLast = -1;
111 for (int j = 0; j < words.length; j++) {
112 if (indexFirst == -1) {
113 indexFirst = j;
114 }
115 line.append(words[j]);
116 line.append(" ");
117 if (line.length() >= transcriptionLineSize || j == words.length - 1) {
118 indexLast = j;
119
120 double start = -1;
121 double end = -1;
122 if (indexLast < timestampsArray.size()) {
123
124 JSONObject wordTSList = (JSONObject) timestampsArray.get(indexFirst);
125 if (wordTSList.size() == 3) {
126
127 Number startNumber = NumberFormat.getInstance(Locale.US).parse(removeEndCharacter((wordTSList.get("startTime").toString()), "s"));
128 start = startNumber.doubleValue();
129 }
130
131 wordTSList = (JSONObject) timestampsArray.get(indexLast);
132 if (wordTSList.size() == 3) {
133 Number endNumber = NumberFormat.getInstance(Locale.US).parse(removeEndCharacter((wordTSList.get("endTime").toString()), "s"));
134 end = endNumber.doubleValue();
135 }
136 }
137 if (start == -1 || end == -1) {
138 logger.warn("Could not build caption object for job {}, result index {}: start/end times not found", jobId, i);
139 continue resultsLoop;
140 }
141
142 String[] captionLines = new String[1];
143 captionLines[0] = line.toString().replace("%HESITATION", "...");
144 captionList.add(new CaptionImpl(buildTime((long) (start * 1000)), buildTime((long) (end * 1000)), captionLines));
145 indexFirst = -1;
146 indexLast = -1;
147 line.setLength(0);
148 }
149 }
150 }
151 }
152 }
153 }
154 } catch (Exception e) {
155 logger.warn("Error when parsing Google transcriptions result", e);
156 throw new CaptionConverterException(e);
157 }
158
159 return captionList;
160 }
161
162 @Override
163 public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
164 throw new UnsupportedOperationException();
165 }
166
167 @Override
168 public String[] getLanguageList(InputStream inputStream) throws CaptionConverterException {
169 throw new UnsupportedOperationException();
170 }
171
172 @Override
173 public String getExtension() {
174 return "json";
175 }
176
177 @Override
178 public Type getElementType() {
179 return MediaPackageElement.Type.Attachment;
180 }
181
182 private Time buildTime(long ms) throws IllegalTimeFormatException {
183 int h = (int) (ms / 3600000L);
184 int m = (int) ((ms % 3600000L) / 60000L);
185 int s = (int) ((ms % 60000L) / 1000L);
186 ms = (int) (ms % 1000);
187
188 return new TimeImpl(h, m, s, (int) ms);
189 }
190
191 private String removeEndCharacter(String str, String end) {
192 if (str.endsWith(end)) {
193 str = str.replace(end, "");
194 }
195 return str;
196 }
197
198 }