/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* Rubber Band An audio time-stretching and pitch-shifting library. Copyright 2007-2008 Chris Cannam. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #include "StretchCalculator.h" #include #include #include #include #include #include #include #include #include "sysutils.h" namespace RubberBand { StretchCalculator::StretchCalculator(size_t sampleRate, size_t inputIncrement, bool useHardPeaks) : m_sampleRate(sampleRate), m_increment(inputIncrement), m_prevDf(0), m_divergence(0), m_recovery(0), m_prevRatio(1.0), m_transientAmnesty(0), m_useHardPeaks(useHardPeaks) { // std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl; } StretchCalculator::~StretchCalculator() { } std::vector StretchCalculator::calculate(double ratio, size_t inputDuration, const std::vector &phaseResetDf, const std::vector &stretchDf) { assert(phaseResetDf.size() == stretchDf.size()); m_lastPeaks = findPeaks(phaseResetDf); std::vector &peaks = m_lastPeaks; size_t totalCount = phaseResetDf.size(); std::vector increments; size_t outputDuration = lrint(inputDuration * ratio); if (m_debugLevel > 0) { std::cerr << "StretchCalculator::calculate(): inputDuration " << inputDuration << ", ratio " << ratio << ", outputDuration " << outputDuration; } outputDuration = lrint((phaseResetDf.size() * m_increment) * ratio); if (m_debugLevel > 0) { std::cerr << " (rounded up to " << outputDuration << ")"; std::cerr << ", df size " << phaseResetDf.size() << std::endl; } std::vector fixedAudioChunks; for (size_t i = 0; i < peaks.size(); ++i) { fixedAudioChunks.push_back (lrint((double(peaks[i].chunk) * outputDuration) / totalCount)); } if (m_debugLevel > 1) { std::cerr << "have " << peaks.size() << " fixed positions" << std::endl; } size_t totalInput = 0, totalOutput = 0; // For each region between two consecutive time sync points, we // want to take the number of output chunks to be allocated and // the detection function values within the range, and produce a // series of increments that sum to the number of output chunks, // such that each increment is displaced from the input increment // by an amount inversely proportional to the magnitude of the // stretch detection function at that input step. size_t regionTotalChunks = 0; for (size_t i = 0; i <= peaks.size(); ++i) { size_t regionStart, regionStartChunk, regionEnd, regionEndChunk; bool phaseReset = false; if (i == 0) { regionStartChunk = 0; regionStart = 0; } else { regionStartChunk = peaks[i-1].chunk; regionStart = fixedAudioChunks[i-1]; phaseReset = peaks[i-1].hard; } if (i == peaks.size()) { regionEndChunk = totalCount; regionEnd = outputDuration; } else { regionEndChunk = peaks[i].chunk; regionEnd = fixedAudioChunks[i]; } size_t regionDuration = regionEnd - regionStart; regionTotalChunks += regionDuration; std::vector dfRegion; for (size_t j = regionStartChunk; j != regionEndChunk; ++j) { dfRegion.push_back(stretchDf[j]); } if (m_debugLevel > 1) { std::cerr << "distributeRegion from " << regionStartChunk << " to " << regionEndChunk << " (chunks " << regionStart << " to " << regionEnd << ")" << std::endl; } dfRegion = smoothDF(dfRegion); std::vector regionIncrements = distributeRegion (dfRegion, regionDuration, ratio, phaseReset); size_t totalForRegion = 0; for (size_t j = 0; j < regionIncrements.size(); ++j) { int incr = regionIncrements[j]; if (j == 0 && phaseReset) increments.push_back(-incr); else increments.push_back(incr); if (incr > 0) totalForRegion += incr; else totalForRegion += -incr; totalInput += m_increment; } if (totalForRegion != regionDuration) { std::cerr << "*** WARNING: distributeRegion returned wrong duration " << totalForRegion << ", expected " << regionDuration << std::endl; } totalOutput += totalForRegion; } if (m_debugLevel > 0) { std::cerr << "total input increment = " << totalInput << " (= " << totalInput / m_increment << " chunks), output = " << totalOutput << ", ratio = " << double(totalOutput)/double(totalInput) << ", ideal output " << size_t(ceil(totalInput * ratio)) << std::endl; std::cerr << "(region total = " << regionTotalChunks << ")" << std::endl; } return increments; } int StretchCalculator::calculateSingle(double ratio, float df, size_t increment) { if (increment == 0) increment = m_increment; bool isTransient = false; // We want to ensure, as close as possible, that the phase reset // points appear at _exactly_ the right audio frame numbers. // In principle, the threshold depends on chunk size: larger chunk // sizes need higher thresholds. Since chunk size depends on // ratio, I suppose we could in theory calculate the threshold // from the ratio directly. For the moment we're happy if it // works well in common situations. float transientThreshold = 0.35f; if (ratio > 1) transientThreshold = 0.25f; if (m_useHardPeaks && df > m_prevDf * 1.1f && df > transientThreshold) { isTransient = true; } if (m_debugLevel > 2) { std::cerr << "df = " << df << ", prevDf = " << m_prevDf << ", thresh = " << transientThreshold << std::endl; } m_prevDf = df; bool ratioChanged = (ratio != m_prevRatio); m_prevRatio = ratio; if (isTransient && m_transientAmnesty == 0) { if (m_debugLevel > 1) { std::cerr << "StretchCalculator::calculateSingle: transient" << std::endl; } m_divergence += increment - (increment * ratio); // as in offline mode, 0.05 sec approx min between transients m_transientAmnesty = lrint(ceil(double(m_sampleRate) / (20 * double(increment)))); m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment); return -int(increment); } if (ratioChanged) { m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment); } if (m_transientAmnesty > 0) --m_transientAmnesty; int incr = lrint(increment * ratio - m_recovery); if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) { std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", "; } if (incr < lrint((increment * ratio) / 2)) { incr = lrint((increment * ratio) / 2); } else if (incr > lrint(increment * ratio * 2)) { incr = lrint(increment * ratio * 2); } double divdiff = (increment * ratio) - incr; if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) { std::cerr << "divdiff = " << divdiff << std::endl; } double prevDivergence = m_divergence; m_divergence -= divdiff; if ((prevDivergence < 0 && m_divergence > 0) || (prevDivergence > 0 && m_divergence < 0)) { m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment); } return incr; } void StretchCalculator::reset() { m_prevDf = 0; m_divergence = 0; } std::vector StretchCalculator::findPeaks(const std::vector &rawDf) { std::vector df = smoothDF(rawDf); // We distinguish between "soft" and "hard" peaks. A soft peak is // simply the result of peak-picking on the smoothed onset // detection function, and it represents any (strong-ish) onset. // We aim to ensure always that soft peaks are placed at the // correct position in time. A hard peak is where there is a very // rapid rise in detection function, and it presumably represents // a more broadband, noisy transient. For these we perform a // phase reset (if in the appropriate mode), and we locate the // reset at the first point where we notice enough of a rapid // rise, rather than necessarily at the peak itself, in order to // preserve the shape of the transient. std::set hardPeakCandidates; std::set softPeakCandidates; if (m_useHardPeaks) { // 0.05 sec approx min between hard peaks size_t hardPeakAmnesty = lrint(ceil(double(m_sampleRate) / (20 * double(m_increment)))); size_t prevHardPeak = 0; if (m_debugLevel > 1) { std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl; } for (size_t i = 1; i + 1 < df.size(); ++i) { if (df[i] < 0.1) continue; if (df[i] <= df[i-1] * 1.1) continue; if (df[i] < 0.22) continue; if (!hardPeakCandidates.empty() && i < prevHardPeak + hardPeakAmnesty) { continue; } bool hard = (df[i] > 0.4); if (hard && (m_debugLevel > 1)) { std::cerr << "hard peak at " < absolute " << 0.4 << std::endl; } if (!hard) { hard = (df[i] > df[i-1] * 1.4); if (hard && (m_debugLevel > 1)) { std::cerr << "hard peak at " < prev " << df[i-1] << " * 1.4" << std::endl; } } if (!hard && i > 1) { hard = (df[i] > df[i-1] * 1.2 && df[i-1] > df[i-2] * 1.2); if (hard && (m_debugLevel > 1)) { std::cerr << "hard peak at " < prev " << df[i-1] << " * 1.2 and " << df[i-1] << " > prev " << df[i-2] << " * 1.2" << std::endl; } } if (!hard && i > 2) { // have already established that df[i] > df[i-1] * 1.1 hard = (df[i] > 0.3 && df[i-1] > df[i-2] * 1.1 && df[i-2] > df[i-3] * 1.1); if (hard && (m_debugLevel > 1)) { std::cerr << "hard peak at " < prev " << df[i-1] << " * 1.1 and " << df[i-1] << " > prev " << df[i-2] << " * 1.1 and " << df[i-2] << " > prev " << df[i-3] << " * 1.1" << std::endl; } } if (!hard) continue; // (df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) || // df[i] > 0.4) { size_t peakLocation = i; if (i + 1 < rawDf.size() && rawDf[i + 1] > rawDf[i] * 1.4) { ++peakLocation; if (m_debugLevel > 1) { std::cerr << "pushing hard peak forward to " << peakLocation << ": " << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << std::endl; } } hardPeakCandidates.insert(peakLocation); prevHardPeak = peakLocation; } } size_t medianmaxsize = lrint(ceil(double(m_sampleRate) / double(m_increment))); // 1 sec ish if (m_debugLevel > 1) { std::cerr << "mediansize = " << medianmaxsize << std::endl; } if (medianmaxsize < 7) { medianmaxsize = 7; if (m_debugLevel > 1) { std::cerr << "adjusted mediansize = " << medianmaxsize << std::endl; } } int minspacing = lrint(ceil(double(m_sampleRate) / (20 * double(m_increment)))); // 0.05 sec ish std::deque medianwin; std::vector sorted; int softPeakAmnesty = 0; for (size_t i = 0; i < medianmaxsize/2; ++i) { medianwin.push_back(0); } for (size_t i = 0; i < medianmaxsize/2 && i < df.size(); ++i) { medianwin.push_back(df[i]); } size_t lastSoftPeak = 0; for (size_t i = 0; i < df.size(); ++i) { size_t mediansize = medianmaxsize; if (medianwin.size() < mediansize) { mediansize = medianwin.size(); } size_t middle = medianmaxsize / 2; if (middle >= mediansize) middle = mediansize-1; size_t nextDf = i + mediansize - middle; if (mediansize < 2) { if (mediansize > medianmaxsize) { // absurd, but never mind that medianwin.pop_front(); } if (nextDf < df.size()) { medianwin.push_back(df[nextDf]); } else { medianwin.push_back(0); } continue; } if (m_debugLevel > 2) { // std::cerr << "have " << mediansize << " in median buffer" << std::endl; } sorted.clear(); for (size_t j = 0; j < mediansize; ++j) { sorted.push_back(medianwin[j]); } std::sort(sorted.begin(), sorted.end()); size_t n = 90; // percentile above which we pick peaks size_t index = (sorted.size() * n) / 100; if (index >= sorted.size()) index = sorted.size()-1; if (index == sorted.size()-1 && index > 0) --index; float thresh = sorted[index]; // if (m_debugLevel > 2) { // std::cerr << "medianwin[" << middle << "] = " << medianwin[middle] << ", thresh = " << thresh << std::endl; // if (medianwin[middle] == 0.f) { // std::cerr << "contents: "; // for (size_t j = 0; j < medianwin.size(); ++j) { // std::cerr << medianwin[j] << " "; // } // std::cerr << std::endl; // } // } if (medianwin[middle] > thresh && medianwin[middle] > medianwin[middle-1] && medianwin[middle] > medianwin[middle+1] && softPeakAmnesty == 0) { size_t maxindex = middle; float maxval = medianwin[middle]; for (size_t j = middle+1; j < mediansize; ++j) { if (medianwin[j] > maxval) { maxval = medianwin[j]; maxindex = j; } else if (medianwin[j] < medianwin[middle]) { break; } } size_t peak = i + maxindex - middle; // std::cerr << "i = " < 1) { std::cerr << "soft peak at " << peak << " (" << peak * m_increment << "): " << medianwin[middle] << " > " << thresh << " and " << medianwin[middle] << " > " << medianwin[middle-1] << " and " << medianwin[middle] << " > " << medianwin[middle+1] << std::endl; } if (peak >= df.size()) { if (m_debugLevel > 2) { std::cerr << "peak is beyond end" << std::endl; } } else { softPeakCandidates.insert(peak); lastSoftPeak = peak; } } softPeakAmnesty = minspacing + maxindex - middle; if (m_debugLevel > 2) { std::cerr << "amnesty = " << softPeakAmnesty << std::endl; } } else if (softPeakAmnesty > 0) --softPeakAmnesty; if (mediansize >= medianmaxsize) { medianwin.pop_front(); } if (nextDf < df.size()) { medianwin.push_back(df[nextDf]); } else { medianwin.push_back(0); } } std::vector peaks; while (!hardPeakCandidates.empty() || !softPeakCandidates.empty()) { bool haveHardPeak = !hardPeakCandidates.empty(); bool haveSoftPeak = !softPeakCandidates.empty(); size_t hardPeak = (haveHardPeak ? *hardPeakCandidates.begin() : 0); size_t softPeak = (haveSoftPeak ? *softPeakCandidates.begin() : 0); Peak peak; peak.hard = false; peak.chunk = softPeak; bool ignore = false; if (haveHardPeak && (!haveSoftPeak || hardPeak <= softPeak)) { if (m_debugLevel > 2) { std::cerr << "Hard peak: " << hardPeak << std::endl; } peak.hard = true; peak.chunk = hardPeak; hardPeakCandidates.erase(hardPeakCandidates.begin()); } else { if (m_debugLevel > 2) { std::cerr << "Soft peak: " << softPeak << std::endl; } if (!peaks.empty() && peaks[peaks.size()-1].hard && peaks[peaks.size()-1].chunk + 3 >= softPeak) { if (m_debugLevel > 2) { std::cerr << "(ignoring, as we just had a hard peak)" << std::endl; } ignore = true; } } if (haveSoftPeak && peak.chunk == softPeak) { softPeakCandidates.erase(softPeakCandidates.begin()); } if (!ignore) { peaks.push_back(peak); } } return peaks; } std::vector StretchCalculator::smoothDF(const std::vector &df) { std::vector smoothedDF; for (size_t i = 0; i < df.size(); ++i) { // three-value moving mean window for simple smoothing float total = 0.f, count = 0; if (i > 0) { total += df[i-1]; ++count; } total += df[i]; ++count; if (i+1 < df.size()) { total += df[i+1]; ++count; } float mean = total / count; smoothedDF.push_back(mean); } return smoothedDF; } std::vector StretchCalculator::distributeRegion(const std::vector &dfIn, size_t duration, float ratio, bool phaseReset) { std::vector df(dfIn); std::vector increments; // The peak for the stretch detection function may appear after // the peak that we're using to calculate the start of the region. // We don't want that. If we find a peak in the first half of // the region, we should set all the values up to that point to // the same value as the peak. // (This might not be subtle enough, especially if the region is // long -- we want a bound that corresponds to acoustic perception // of the audible bounce.) for (size_t i = 1; i < df.size()/2; ++i) { if (df[i] < df[i-1]) { if (m_debugLevel > 1) { std::cerr << "stretch peak offset: " << i-1 << " (peak " << df[i-1] << ")" << std::endl; } for (size_t j = 0; j < i-1; ++j) { df[j] = df[i-1]; } break; } } float maxDf = 0; for (size_t i = 0; i < df.size(); ++i) { if (i == 0 || df[i] > maxDf) maxDf = df[i]; } // We want to try to ensure the last 100ms or so (if possible) are // tending back towards the maximum df, so that the stretchiness // reduces at the end of the stretched region. int reducedRegion = lrint((0.1 * m_sampleRate) / m_increment); if (reducedRegion > int(df.size()/5)) reducedRegion = df.size()/5; for (int i = 0; i < reducedRegion; ++i) { size_t index = df.size() - reducedRegion + i; df[index] = df[index] + ((maxDf - df[index]) * i) / reducedRegion; } long toAllot = long(duration) - long(m_increment * df.size()); if (m_debugLevel > 1) { std::cerr << "region of " << df.size() << " chunks, output duration " << duration << ", toAllot " << toAllot << std::endl; } size_t totalIncrement = 0; // We place limits on the amount of displacement per chunk. if // ratio < 0, no increment should be larger than increment*ratio // or smaller than increment*ratio/2; if ratio > 0, none should be // smaller than increment*ratio or larger than increment*ratio*2. // We need to enforce this in the assignment of displacements to // allotments, not by trying to respond if something turns out // wrong. // Note that the ratio is only provided to this function for the // purposes of establishing this bound to the displacement. // so if // maxDisplacement / totalDisplacement > increment * ratio*2 - increment // (for ratio > 1) // or // maxDisplacement / totalDisplacement < increment * ratio/2 // (for ratio < 1) // then we need to adjust and accommodate bool acceptableSquashRange = false; double totalDisplacement = 0; double maxDisplacement = 0; // min displacement will be 0 by definition maxDf = 0; float adj = 0; while (!acceptableSquashRange) { acceptableSquashRange = true; calculateDisplacements(df, maxDf, totalDisplacement, maxDisplacement, adj); if (m_debugLevel > 1) { std::cerr << "totalDisplacement " << totalDisplacement << ", max " << maxDisplacement << " (maxDf " << maxDf << ", df count " << df.size() << ")" << std::endl; } if (totalDisplacement == 0) { // Not usually a problem, in fact // std::cerr << "WARNING: totalDisplacement == 0 (duration " << duration << ", " << df.size() << " values in df)" << std::endl; if (!df.empty() && adj == 0) { acceptableSquashRange = false; adj = 1; } continue; } int extremeIncrement = m_increment + lrint((toAllot * maxDisplacement) / totalDisplacement); if (ratio < 1.0) { if (extremeIncrement > lrint(ceil(m_increment * ratio))) { std::cerr << "ERROR: extreme increment " << extremeIncrement << " > " << m_increment * ratio << " (this should not happen)" << std::endl; } else if (extremeIncrement < (m_increment * ratio) / 2) { if (m_debugLevel > 0) { std::cerr << "WARNING: extreme increment " << extremeIncrement << " < " << (m_increment * ratio) / 2 << std::endl; } acceptableSquashRange = false; } } else { if (extremeIncrement > m_increment * ratio * 2) { if (m_debugLevel > 0) { std::cerr << "WARNING: extreme increment " << extremeIncrement << " > " << m_increment * ratio * 2 << std::endl; } acceptableSquashRange = false; } else if (extremeIncrement < lrint(floor(m_increment * ratio))) { std::cerr << "ERROR: extreme increment " << extremeIncrement << " < " << m_increment * ratio << " (I thought this couldn't happen?)" << std::endl; } } if (!acceptableSquashRange) { // Need to make maxDisplacement smaller as a proportion of // the total displacement, yet ensure that the // displacements still sum to the total. adj += maxDf/10; } } for (size_t i = 0; i < df.size(); ++i) { double displacement = maxDf - df[i]; if (displacement < 0) displacement -= adj; else displacement += adj; if (i == 0 && phaseReset) { if (df.size() == 1) { increments.push_back(duration); totalIncrement += duration; } else { increments.push_back(m_increment); totalIncrement += m_increment; } totalDisplacement -= displacement; continue; } double theoreticalAllotment = 0; if (totalDisplacement != 0) { theoreticalAllotment = (toAllot * displacement) / totalDisplacement; } int allotment = lrint(theoreticalAllotment); if (i + 1 == df.size()) allotment = toAllot; int increment = m_increment + allotment; if (increment <= 0) { // this is a serious problem, the allocation is quite // wrong if it allows increment to diverge so far from the // input increment std::cerr << "*** WARNING: increment " << increment << " <= 0, rounding to zero" << std::endl; increment = 0; allotment = increment - m_increment; } increments.push_back(increment); totalIncrement += increment; toAllot -= allotment; totalDisplacement -= displacement; if (m_debugLevel > 2) { std::cerr << "df " << df[i] << ", smoothed " << df[i] << ", disp " << displacement << ", allot " << theoreticalAllotment << ", incr " << increment << ", remain " << toAllot << std::endl; } } if (m_debugLevel > 2) { std::cerr << "total increment: " << totalIncrement << ", left over: " << toAllot << " to allot, displacement " << totalDisplacement << std::endl; } if (totalIncrement != duration) { std::cerr << "*** WARNING: calculated output duration " << totalIncrement << " != expected " << duration << std::endl; } return increments; } void StretchCalculator::calculateDisplacements(const std::vector &df, float &maxDf, double &totalDisplacement, double &maxDisplacement, float adj) const { totalDisplacement = maxDisplacement = 0; maxDf = 0; for (size_t i = 0; i < df.size(); ++i) { if (i == 0 || df[i] > maxDf) maxDf = df[i]; } for (size_t i = 0; i < df.size(); ++i) { double displacement = maxDf - df[i]; if (displacement < 0) displacement -= adj; else displacement += adj; totalDisplacement += displacement; if (i == 0 || displacement > maxDisplacement) { maxDisplacement = displacement; } } } }