1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.opencastproject.caption.converters;
22
23 import org.opencastproject.caption.api.Caption;
24 import org.opencastproject.caption.api.CaptionConverter;
25 import org.opencastproject.caption.api.CaptionConverterException;
26 import org.opencastproject.caption.api.IllegalTimeFormatException;
27 import org.opencastproject.caption.api.Time;
28 import org.opencastproject.caption.impl.CaptionImpl;
29 import org.opencastproject.caption.impl.TimeImpl;
30 import org.opencastproject.mediapackage.MediaPackageElement;
31 import org.opencastproject.mediapackage.MediaPackageElement.Type;
32
33 import org.json.simple.JSONArray;
34 import org.json.simple.JSONObject;
35 import org.json.simple.parser.JSONParser;
36 import org.osgi.service.component.annotations.Component;
37 import org.slf4j.Logger;
38 import org.slf4j.LoggerFactory;
39
40 import java.io.IOException;
41 import java.io.InputStream;
42 import java.io.InputStreamReader;
43 import java.io.OutputStream;
44 import java.text.NumberFormat;
45 import java.util.ArrayList;
46 import java.util.List;
47 import java.util.Locale;
48
49 @Component(
50 immediate = true,
51 service = { CaptionConverter.class },
52 property = {
53 "service.description=IBM Watson caption converter",
54 "caption.format=ibm-watson"
55 }
56 )
57 public class IBMWatsonCaptionConverter implements CaptionConverter {
58
59
60 private static final Logger logger = LoggerFactory.getLogger(IBMWatsonCaptionConverter.class);
61
62 private static final int LINE_SIZE = 100;
63
64 @Override
65 public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
66 List<Caption> captionList = new ArrayList<Caption>();
67 JSONParser jsonParser = new JSONParser();
68
69 try {
70 JSONObject resultsObj = (JSONObject) jsonParser.parse(new InputStreamReader(inputStream));
71 String jobId = "Unknown";
72 if (resultsObj.get("id") != null) {
73 jobId = (String) resultsObj.get("id");
74 }
75
76
77 if (resultsObj.get("warnings") != null) {
78 JSONArray warningsArray = (JSONArray) resultsObj.get("warnings");
79 if (warningsArray != null) {
80 for (Object w : warningsArray) {
81 logger.warn("Warning from Speech-To-Text service: {}", w);
82 }
83 }
84 }
85
86 JSONArray outerResultsArray = (JSONArray) resultsObj.get("results");
87 JSONObject obj = (JSONObject) outerResultsArray.get(0);
88 JSONArray resultsArray = (JSONArray) obj.get("results");
89
90 resultsLoop:
91 for (int i = 0; i < resultsArray.size(); i++) {
92 JSONObject resultElement = (JSONObject) resultsArray.get(i);
93
94 if (!(Boolean) resultElement.get("final")) {
95 continue;
96 }
97
98 JSONArray alternativesArray = (JSONArray) resultElement.get("alternatives");
99 if (alternativesArray != null && alternativesArray.size() > 0) {
100 JSONObject alternativeElement = (JSONObject) alternativesArray.get(0);
101 String transcript = (String) alternativeElement.get("transcript");
102 if (transcript != null) {
103 JSONArray timestampsArray = (JSONArray) alternativeElement.get("timestamps");
104 if (timestampsArray == null || timestampsArray.size() == 0) {
105 logger.warn("Could not build caption object for job {}, result index {}: timestamp data not found",
106 jobId, i);
107 continue;
108 }
109
110 String[] words = transcript.split("\\s+");
111 StringBuffer line = new StringBuffer();
112 int indexFirst = -1;
113 int indexLast = -1;
114 for (int j = 0; j < words.length; j++) {
115 if (indexFirst == -1) {
116 indexFirst = j;
117 }
118 line.append(words[j]);
119 line.append(" ");
120 if (line.length() >= LINE_SIZE || j == words.length - 1) {
121 indexLast = j;
122
123 double start = -1;
124 double end = -1;
125 if (indexLast < timestampsArray.size()) {
126
127 JSONArray wordTsArray = (JSONArray) timestampsArray.get(indexFirst);
128 if (wordTsArray.size() == 3) {
129 start = NumberFormat.getInstance(Locale.US)
130 .parse(removeEndCharacter((wordTsArray.get(1).toString()), "s")).doubleValue();
131 }
132
133 wordTsArray = (JSONArray) timestampsArray.get(indexLast);
134 if (wordTsArray.size() == 3) {
135 end = NumberFormat.getInstance(Locale.US)
136 .parse(removeEndCharacter((wordTsArray.get(2).toString()), "s")).doubleValue();
137 }
138 }
139 if (start == -1 || end == -1) {
140 logger.warn("Could not build caption object for job {}, result index {}: start/end times not found",
141 jobId, i);
142 continue resultsLoop;
143 }
144
145 String[] captionLines = new String[1];
146 captionLines[0] = line.toString().replace("%HESITATION", "...");
147 captionList.add(new CaptionImpl(buildTime((long) (start * 1000)), buildTime((long) (end * 1000)),
148 captionLines));
149 indexFirst = -1;
150 indexLast = -1;
151 line.setLength(0);
152 }
153 }
154 }
155 }
156 }
157 } catch (Exception e) {
158 logger.warn("Error when parsing IBM Watson transcriptions result", e);
159 throw new CaptionConverterException(e);
160 }
161
162 return captionList;
163 }
164
165 @Override
166 public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
167 throw new UnsupportedOperationException();
168 }
169
170 @Override
171 public String[] getLanguageList(InputStream inputStream) throws CaptionConverterException {
172 throw new UnsupportedOperationException();
173 }
174
175 @Override
176 public String getExtension() {
177 return "json";
178 }
179
180 @Override
181 public Type getElementType() {
182 return MediaPackageElement.Type.Attachment;
183 }
184
185 private Time buildTime(long ms) throws IllegalTimeFormatException {
186 int h = (int) (ms / 3600000L);
187 int m = (int) ((ms % 3600000L) / 60000L);
188 int s = (int) ((ms % 60000L) / 1000L);
189 ms = (int) (ms % 1000);
190
191 return new TimeImpl(h, m, s, (int) ms);
192 }
193
194 private String removeEndCharacter(String str, String end) {
195 if (str.endsWith(end)) {
196 str = str.replace(end, "");
197 }
198 return str;
199 }
200
201 }