| /* |
| * Copyright (C) 2011 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "sola_time_scaler.h" |
| |
| #include <math.h> |
| #include <hlogging.h> |
| #include <algorithm> |
| |
| #include "ring_buffer.h" |
| |
| #define FLAGS_sola_ring_buffer 2.0 |
| #define FLAGS_sola_enable_correlation true |
| |
| |
| namespace video_editing { |
| |
| // Returns a cross-correlation score for the specified buffers. |
| int SolaAnalyzer::Correlate(const float* buffer1, const float* buffer2, |
| int num_frames) { |
| CHECK(initialized_); |
| |
| int score = 0; |
| num_frames *= num_channels_; |
| while (num_frames-- > 0) { |
| // Increment the score if the sign bits match. |
| score += ((bit_cast<int32>(*buffer1++) ^ bit_cast<int32>(*buffer2++)) >= 0) |
| ? 1 : 0; |
| } |
| return score; |
| } |
| |
| // Trivial SolaAnalyzer class to bypass correlation. |
| class SolaBypassAnalyzer : public SolaAnalyzer { |
| public: |
| SolaBypassAnalyzer() { } |
| virtual int Correlate(const float*, const float*, int num_frames) { |
| return num_frames * num_channels_; |
| } |
| }; |
| |
| |
| // Default constructor. |
| SolaTimeScaler::SolaTimeScaler() |
| : input_buffer_(NULL), output_buffer_(NULL), analyzer_(NULL) { |
| sample_rate_ = 0; |
| num_channels_ = 0; |
| |
| draining_ = false; |
| initialized_ = false; |
| } |
| |
| SolaTimeScaler::~SolaTimeScaler() { |
| delete input_buffer_; |
| delete output_buffer_; |
| delete analyzer_; |
| } |
| |
| // Injects a SolaAnalyzer instance for analyzing signal frames. |
| void SolaTimeScaler::set_analyzer(SolaAnalyzer* analyzer) { |
| MutexLock lock(&mutex_); // lock out processing while updating |
| delete analyzer_; |
| analyzer_ = analyzer; |
| } |
| |
| // Initializes a SOLA timescaler. |
| void SolaTimeScaler::Init(double sample_rate, |
| int num_channels, |
| double initial_speed, |
| double window_duration, |
| double overlap_duration) { |
| MutexLock lock(&mutex_); // lock out processing while updating |
| |
| sample_rate_ = sample_rate; |
| num_channels_ = num_channels; |
| speed_ = initial_speed; |
| window_duration_ = window_duration; |
| overlap_duration_ = overlap_duration; |
| |
| initialized_ = true; |
| GenerateParameters(); |
| Reset(); |
| } |
| |
| // Adjusts the rate scaling factor. |
| void SolaTimeScaler::set_speed(double speed) { |
| MutexLock lock(&mutex_); // lock out processing while updating |
| |
| speed_ = speed; |
| GenerateParameters(); |
| } |
| |
| // Generates processing parameters from the current settings. |
| void SolaTimeScaler::GenerateParameters() { |
| if (speed_ < 0.1) { |
| LOGE("Requested speed %fx limited to 0.1x", speed_); |
| speed_ = 0.1; |
| } else if (speed_ > 8.0) { |
| LOGE("Requested speed %fx limited to 8.0x", speed_); |
| speed_ = 8.0; |
| } |
| |
| ratio_ = 1.0 / speed_; |
| |
| num_window_frames_ = nearbyint(sample_rate_ * window_duration_); |
| |
| // Limit the overlap to half the window size, and round up to an odd number. |
| // Half of overlap window (rounded down) is also a useful number. |
| overlap_duration_ = min(overlap_duration_, window_duration_ / 2.0); |
| num_overlap_frames_ = nearbyint(sample_rate_ * overlap_duration_); |
| num_overlap_frames_ |= 1; |
| half_overlap_frames_ = num_overlap_frames_ >> 1; |
| |
| if (speed_ >= 1.) { |
| // For compression (speed up), adjacent input windows overlap in the output. |
| input_window_offset_ = num_window_frames_; |
| target_merge_offset_ = nearbyint(num_window_frames_ * ratio_); |
| } else { |
| // For expansion (slow down), each input window start point overlaps the |
| // previous, and they are placed adjacently in the output |
| // (+/- half the overlap size). |
| input_window_offset_ = nearbyint(num_window_frames_ * speed_); |
| target_merge_offset_ = num_window_frames_; |
| } |
| |
| // Make sure we copy enough extra data to be able to perform a |
| // frame correlation over the range of target merge point +/- half overlap, |
| // even when the previous merge point was adjusted backwards a half overlap. |
| max_frames_to_merge_ = max(num_window_frames_, |
| target_merge_offset_ + (2 * num_overlap_frames_)); |
| min_output_to_hold_= |
| max_frames_to_merge_ + num_overlap_frames_ - target_merge_offset_; |
| } |
| |
| // The input buffer has one writer and reader. |
| // The output buffer has one reader/updater, and one reader/consumer. |
| static const int kInputReader = 0; |
| static const int kOutputAnalysis = 0; |
| static const int kOutputConsumer = 1; |
| |
| void SolaTimeScaler::Reset() { |
| CHECK(initialized_); |
| double duration = max(FLAGS_sola_ring_buffer, 20. * window_duration_); |
| draining_ = false; |
| |
| delete input_buffer_; |
| input_buffer_ = new RingBuffer(); |
| input_buffer_->Init(static_cast<int> |
| (sample_rate_ * duration), num_channels_, 1); |
| |
| delete output_buffer_; |
| output_buffer_ = new RingBuffer(); |
| output_buffer_->Init(static_cast<int> |
| (sample_rate_ * ratio_ * duration), num_channels_, 2); |
| |
| if (analyzer_ == NULL) { |
| if (FLAGS_sola_enable_correlation) { |
| analyzer_ = new SolaAnalyzer(); |
| } else { |
| analyzer_ = new SolaBypassAnalyzer(); |
| } |
| } |
| analyzer_->Init(sample_rate_, num_channels_); |
| } |
| |
| // Returns the number of frames that the input buffer can accept. |
| int SolaTimeScaler::input_limit() const { |
| CHECK(initialized_); |
| return input_buffer_->overhead(); |
| } |
| |
| // Returns the number of available output frames. |
| int SolaTimeScaler::available() { |
| CHECK(initialized_); |
| |
| int available = output_buffer_->available(kOutputConsumer); |
| if (available > min_output_to_hold_) { |
| available -= min_output_to_hold_; |
| } else if (draining_) { |
| Process(); |
| available = output_buffer_->available(kOutputConsumer); |
| if (available > min_output_to_hold_) { |
| available -= min_output_to_hold_; |
| } |
| } else { |
| available = 0; |
| } |
| return available; |
| } |
| |
| void SolaTimeScaler::Drain() { |
| CHECK(initialized_); |
| |
| draining_ = true; |
| } |
| |
| |
| // Feeds audio to the timescaler, and processes as much data as possible. |
| int SolaTimeScaler::InjectSamples(float* buffer, int num_frames) { |
| CHECK(initialized_); |
| |
| // Do not write more frames than the buffer can accept. |
| num_frames = min(input_limit(), num_frames); |
| if (!num_frames) { |
| return 0; |
| } |
| |
| // Copy samples to the input buffer and then process whatever can be consumed. |
| input_buffer_->Write(buffer, num_frames); |
| Process(); |
| return num_frames; |
| } |
| |
| // Retrieves audio data from the timescaler. |
| int SolaTimeScaler::RetrieveSamples(float* buffer, int num_frames) { |
| CHECK(initialized_); |
| |
| // Do not read more frames than available. |
| num_frames = min(available(), num_frames); |
| if (!num_frames) { |
| return 0; |
| } |
| |
| output_buffer_->Copy(kOutputConsumer, buffer, num_frames); |
| output_buffer_->Seek(kOutputConsumer, |
| output_buffer_->Tell(kOutputConsumer) + num_frames); |
| |
| return num_frames; |
| } |
| |
| // Munges input samples to produce output. |
| bool SolaTimeScaler::Process() { |
| CHECK(initialized_); |
| bool generated_data = false; |
| |
| // We can only process data if there is sufficient input available |
| // (or we are draining the latency), and there is sufficient room |
| // for output to be merged. |
| while (((input_buffer_->available(kInputReader) > max_frames_to_merge_) || |
| draining_) && (output_buffer_->overhead() >= max_frames_to_merge_)) { |
| MutexLock lock(&mutex_); // lock out updates while processing each window |
| |
| // Determine the number of samples to merge into the output. |
| int input_count = |
| min(input_buffer_->available(kInputReader), max_frames_to_merge_); |
| if (input_count == 0) { |
| break; |
| } |
| // The input reader always points to the next window to process. |
| float* input_pointer = input_buffer_->GetPointer(kInputReader, input_count); |
| |
| // The analysis reader always points to the ideal target merge point, |
| // minus half an overlap window (ie, the starting point for correlation). |
| // That means the available data from that point equals the number |
| // of samples that must be cross-faded. |
| int output_merge_cnt = output_buffer_->available(kOutputAnalysis); |
| float* output_pointer = |
| output_buffer_->GetPointer(kOutputAnalysis, output_merge_cnt); |
| |
| // If there is not enough data to do a proper correlation, |
| // just merge at the ideal target point. Otherwise, |
| // find the best correlation score, working from the center out. |
| int merge_offset = min(output_merge_cnt, half_overlap_frames_); |
| |
| if ((output_merge_cnt >= (2 * num_overlap_frames_)) && |
| (input_count >= num_overlap_frames_)) { |
| int best_offset = merge_offset; |
| int best_score = 0; |
| int score; |
| for (int i = 0; i <= half_overlap_frames_; ++i) { |
| score = analyzer_->Correlate(input_pointer, |
| output_pointer + ((merge_offset + i) * num_channels_), |
| num_overlap_frames_); |
| if (score > best_score) { |
| best_score = score; |
| best_offset = merge_offset + i; |
| if (score == (num_overlap_frames_ * num_channels_)) { |
| break; // It doesn't get better than perfect. |
| } |
| } |
| if (i > 0) { |
| score = analyzer_->Correlate(input_pointer, |
| output_pointer + ((merge_offset - i) * num_channels_), |
| num_overlap_frames_); |
| if (score > best_score) { |
| best_score = score; |
| best_offset = merge_offset - i; |
| if (score == (num_overlap_frames_ * num_channels_)) { |
| break; // It doesn't get better than perfect. |
| } |
| } |
| } |
| } |
| merge_offset = best_offset; |
| } else if ((output_merge_cnt > 0) && !draining_) { |
| LOGE("no correlation performed"); |
| } |
| |
| // Crossfade the overlap between input and output, and then |
| // copy in the remaining input. |
| int crossfade_count = max(0, (output_merge_cnt - merge_offset)); |
| crossfade_count = min(crossfade_count, input_count); |
| int remaining_count = input_count - crossfade_count; |
| |
| float* merge_pointer = output_pointer + (merge_offset * num_channels_); |
| float flt_count = static_cast<float>(crossfade_count); |
| for (int i = 0; i < crossfade_count; ++i) { |
| // Linear cross-fade, for now. |
| float input_scale = static_cast<float>(i) / flt_count; |
| float output_scale = 1. - input_scale; |
| for (int j = 0; j < num_channels_; ++j) { |
| *merge_pointer = (*merge_pointer * output_scale) + |
| (*input_pointer++ * input_scale); |
| ++merge_pointer; |
| } |
| } |
| // Copy the merged buffer back into the output, if necessary, and |
| // append the rest of the window. |
| output_buffer_->MergeBack(kOutputAnalysis, |
| output_pointer, output_merge_cnt); |
| output_buffer_->Write(input_pointer, remaining_count); |
| |
| // Advance the output analysis pointer to the next target merge point, |
| // minus half an overlap window. The target merge point is always |
| // calculated as a delta from the previous ideal target, not the actual |
| // target, to avoid drift. |
| int output_advance = target_merge_offset_; |
| if (output_merge_cnt < half_overlap_frames_) { |
| // On the first window, back up the pointer for the next correlation. |
| // Thereafter, that compensation is preserved. |
| output_advance -= half_overlap_frames_; |
| } |
| |
| // Don't advance beyond the available data, when finishing up. |
| if (draining_) { |
| output_advance = |
| min(output_advance, output_buffer_->available(kOutputAnalysis)); |
| } |
| output_buffer_->Seek(kOutputAnalysis, |
| output_buffer_->Tell(kOutputAnalysis) + output_advance); |
| |
| // Advance the input pointer beyond the frames that are no longer needed. |
| input_buffer_->Seek(kInputReader, input_buffer_->Tell(kInputReader) + |
| min(input_count, input_window_offset_)); |
| |
| if ((crossfade_count + remaining_count) > 0) { |
| generated_data = true; |
| } |
| } // while (more to process) |
| return generated_data; |
| } |
| |
| } // namespace video_editing |