View Javadoc
1   /*
2    * Licensed to The Apereo Foundation under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional
4    * information regarding copyright ownership.
5    *
6    *
7    * The Apereo Foundation licenses this file to you under the Educational
8    * Community License, Version 2.0 (the "License"); you may not use this file
9    * except in compliance with the License. You may obtain a copy of the License
10   * at:
11   *
12   *   http://opensource.org/licenses/ecl2.txt
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
17   * License for the specific language governing permissions and limitations under
18   * the License.
19   *
20   */
21  
22  package org.opencastproject.silencedetection.ffmpeg;
23  
24  import org.opencastproject.mediapackage.MediaPackageException;
25  import org.opencastproject.mediapackage.Track;
26  import org.opencastproject.silencedetection.api.MediaSegment;
27  import org.opencastproject.silencedetection.api.MediaSegments;
28  import org.opencastproject.silencedetection.api.SilenceDetectionFailedException;
29  import org.opencastproject.silencedetection.impl.SilenceDetectionProperties;
30  import org.opencastproject.util.NotFoundException;
31  import org.opencastproject.workspace.api.Workspace;
32  
33  import org.apache.commons.lang3.StringUtils;
34  import org.osgi.framework.BundleContext;
35  import org.slf4j.Logger;
36  import org.slf4j.LoggerFactory;
37  
38  import java.io.BufferedReader;
39  import java.io.File;
40  import java.io.IOException;
41  import java.io.InputStreamReader;
42  import java.text.DecimalFormat;
43  import java.text.DecimalFormatSymbols;
44  import java.util.LinkedList;
45  import java.util.List;
46  import java.util.Locale;
47  import java.util.Properties;
48  import java.util.regex.Matcher;
49  import java.util.regex.Pattern;
50  
51  /**
52   * Find silent sequences in audio stream using Gstreamer.
53   */
54  public class FFmpegSilenceDetector {
55  
56    private static final Logger logger = LoggerFactory.getLogger(FFmpegSilenceDetector.class);
57  
58    public static final String FFMPEG_BINARY_CONFIG = "org.opencastproject.composer.ffmpeg.path";
59    public static final String FFMPEG_BINARY_DEFAULT = "ffmpeg";
60  
61    private static final Long DEFAULT_SILENCE_MIN_LENGTH = 5000L;
62    private static final Long DEFAULT_SILENCE_PRE_LENGTH = 2000L;
63    private static final String DEFAULT_THRESHOLD_DB = "-40dB";
64    private static final Long DEFAULT_VOICE_MIN_LENGTH = 60000L;
65  
66    private static String binary = FFMPEG_BINARY_DEFAULT;
67    private String filePath;
68    private String trackId;
69  
70    private List<MediaSegment> segments = null;
71  
72    /**
73     * Update FFMPEG binary path if set in configuration.
74     *
75     * @param bundleContext
76     */
77    public static void init(BundleContext bundleContext) {
78      String binaryPath = bundleContext.getProperty(FFMPEG_BINARY_CONFIG);
79      try {
80        if (StringUtils.isNotBlank(binaryPath)) {
81          File binaryFile = new File(StringUtils.trim(binaryPath));
82          if (binaryFile.exists()) {
83            binary = binaryFile.getAbsolutePath();
84          } else {
85            logger.warn("FFmpeg binary file {} does not exist", StringUtils.trim(binaryPath));
86          }
87        }
88      } catch (Exception ex) {
89        logger.error("Failed to set ffmpeg binary path", ex);
90      }
91    }
92  
93  
94    /**
95     * Create nonsilent sequences detection pipeline.
96     * Parse audio stream and store all positions, where the volume level fall under the threshold.
97     *
98     * @param properties
99     * @param track source track
100    */
101   public FFmpegSilenceDetector(Properties properties, Track track, Workspace workspace)
102           throws SilenceDetectionFailedException, MediaPackageException, IOException {
103 
104     long minSilenceLength;
105     long minVoiceLength;
106     long preSilenceLength;
107     String thresholdDB;
108 
109     //Ensure properties is not null, avoids null checks later
110     if (null == properties) {
111       properties = new Properties();
112     }
113 
114     minSilenceLength = parseLong(properties, SilenceDetectionProperties.SILENCE_MIN_LENGTH, DEFAULT_SILENCE_MIN_LENGTH);
115     minVoiceLength = parseLong(properties, SilenceDetectionProperties.VOICE_MIN_LENGTH, DEFAULT_VOICE_MIN_LENGTH);
116     preSilenceLength = parseLong(properties, SilenceDetectionProperties.SILENCE_PRE_LENGTH, DEFAULT_SILENCE_PRE_LENGTH);
117     thresholdDB = properties.getProperty(SilenceDetectionProperties.SILENCE_THRESHOLD_DB, DEFAULT_THRESHOLD_DB);
118 
119     trackId = track.getIdentifier();
120 
121     /* Make sure the element can be analyzed using this analysis implementation */
122     if (!track.hasAudio()) {
123       logger.warn("Track {} has no audio stream to run a silece detection on", trackId);
124       throw new SilenceDetectionFailedException("Element has no audio stream");
125     }
126 
127     /* Make sure we are not allowed to move the beginning of a segment into the last segment */
128     if (preSilenceLength > minSilenceLength) {
129       logger.error("Pre silence length ({}) is configured to be greater than minimun silence length ({})",
130           preSilenceLength, minSilenceLength);
131       throw new SilenceDetectionFailedException("preSilenceLength > minSilenceLength");
132     }
133 
134     try {
135       File mediaFile = workspace.get(track.getURI());
136       filePath = mediaFile.getAbsolutePath();
137     } catch (NotFoundException e) {
138       throw new SilenceDetectionFailedException("Error finding the media file in workspace", e);
139     } catch (IOException e) {
140       throw new SilenceDetectionFailedException("Error reading media file in workspace", e);
141     }
142 
143     if (track.getDuration() == null) {
144       throw new MediaPackageException("Track " + trackId + " does not have a duration");
145     }
146     logger.debug("Track {} loaded, duration is {} s", filePath, track.getDuration() / 1000);
147     logger.info("Starting silence detection of {}", filePath);
148     DecimalFormat decimalFmt = new DecimalFormat("0.000", new DecimalFormatSymbols(Locale.US));
149     String minSilenceLengthInSeconds = decimalFmt.format((double) minSilenceLength / 1000.0);
150     String filter = "silencedetect=noise=" + thresholdDB + ":duration=" + minSilenceLengthInSeconds;
151     String[] command = new String[] {
152         binary, "-nostats", "-nostdin", "-i", filePath, "-vn", "-filter:a", filter, "-f", "null", "-"};
153 
154     logger.info("Running {}", (Object) command);
155 
156     ProcessBuilder pbuilder = new ProcessBuilder(command);
157     List<String> segmentsStrings = new LinkedList<String>();
158     Process process = pbuilder.start();
159     try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) {
160       String line = reader.readLine();
161       while (null != line) {
162         /* We want only lines from the silence detection filter */
163         logger.debug("FFmpeg output: {}", line);
164         if (line.startsWith("[silencedetect ") // FFmpeg <= 8.0
165             || line.startsWith("[Parsed_silencedetect_")) { // FFmpeg > 8.0
166           segmentsStrings.add(line);
167         }
168         line = reader.readLine();
169       }
170     } catch (IOException e) {
171       logger.error("Error executing ffmpeg", e);
172     }
173 
174     /*
175      * Example output:
176      * [silencedetect @ 0x2968e40] silence_start: 466.486
177      * [silencedetect @ 0x2968e40] silence_end: 469.322 | silence_duration: 2.83592
178      */
179 
180     LinkedList<MediaSegment> segmentsTmp = new LinkedList<>();
181     if (!segmentsStrings.isEmpty()) {
182       long lastSilenceEnd = 0;
183       long lastSilenceStart = 0;
184       Pattern patternStart = Pattern.compile("silence_start\\:\\ \\d+\\.\\d+");
185       Pattern patternEnd = Pattern.compile("silence_end\\:\\ \\d+\\.\\d+");
186       for (String seginfo : segmentsStrings) {
187         /* Match silence ends */
188         Matcher matcher = patternEnd.matcher(seginfo);
189         String time = "";
190         while (matcher.find()) {
191           time = matcher.group().substring(13);
192         }
193         if (!"".equals(time)) {
194           long silenceEnd = (long) (Double.parseDouble(time) * 1000);
195           if (silenceEnd > lastSilenceEnd) {
196             logger.debug("Found silence end at {}", silenceEnd);
197             lastSilenceEnd = silenceEnd;
198           }
199           continue;
200         }
201 
202         /* Match silence start -> End of segments */
203         matcher = patternStart.matcher(seginfo);
204         time = "";
205         while (matcher.find()) {
206           time = matcher.group().substring(15);
207         }
208         if (!"".equals(time)) {
209           lastSilenceStart = (long) (Double.parseDouble(time) * 1000);
210           logger.debug("Found silence start at {}", lastSilenceStart);
211           if (lastSilenceStart - lastSilenceEnd > minVoiceLength) {
212             /* Found a valid segment */
213             long segmentStart = java.lang.Math.max(0, lastSilenceEnd - preSilenceLength);
214             logger.info("Adding segment from {} to {}", segmentStart, lastSilenceStart);
215             segmentsTmp.add(new MediaSegment(segmentStart, lastSilenceStart));
216           }
217         }
218       }
219       /* Add last segment if it is no silence and the segment is long enough */
220       if (lastSilenceStart < lastSilenceEnd && track.getDuration() - lastSilenceEnd > minVoiceLength) {
221         long segmentStart = java.lang.Math.max(0, lastSilenceEnd - preSilenceLength);
222         logger.info("Adding final segment from {} to {}", segmentStart, track.getDuration());
223         segmentsTmp.add(new MediaSegment(segmentStart, track.getDuration()));
224       }
225     }
226 
227     logger.info("Segmentation of track {} yielded {} segments", trackId, segmentsTmp.size());
228     segments = segmentsTmp;
229 
230   }
231 
232   private Long parseLong(Properties properties, String key, Long defaultValue) {
233     try {
234       return Long.parseLong(properties.getProperty(key, defaultValue.toString()));
235     } catch (NumberFormatException e) {
236       logger.warn("Configuration value for {} is invalid, using default value of {} instead", key, defaultValue);
237       return defaultValue;
238     }
239   }
240 
241   /**
242    * Returns found media segments.
243    * @return nonsilent media segments
244    */
245   public MediaSegments getMediaSegments() {
246     if (segments == null) {
247       return null;
248     }
249 
250     return new MediaSegments(trackId, filePath, segments);
251   }
252 }