从音频中获取对数字节频率数据

Question

我之前问过类似的问题，但是没有解决我的问题，而且解释的很烂。这次我做了插图，希望能更好地解释。

我的音频播放器有一个简单的频谱分析仪。频率存储在一个数组中，该数组在每个 requestAnimationFrame 上更新，该数组如下所示：

fbc_array = new Uint8Array(analyser.frequencyBinCount);
analyser.getByteFrequencyData(fbc_array);

Read more about getByteFrequencyData here.

所以这很好用，但是我希望频率在整个频谱中均匀分布。现在它显示线性频率：

如您所见，这里的主要频率范围是高音（高端），最主要的频率范围是低音范围（低端）。我希望我的分析仪呈现均匀分布的频率范围，如下所示：

在这里您可以看到分析仪上均匀分布的频率。这可能吗？

我用来生成分析器的代码如下所示：

// These variables are dynamically changed, ignore them.
var canbars = 737
var canmultiplier = 8
var canspace = 1

// The analyser
var canvas, ctx, source, context, analyser, fbc_array, bars, bar_x,
    bar_width, bar_height;

function audioAnalyserFrame() {
    'use strict';
    var i;
    canvas.width = $('analyser-').width();
    canvas.height = $('analyser-').height();
    ctx.imageSmoothingEnabled = false;
    fbc_array = new Uint8Array(analyser.frequencyBinCount);
    analyser.getByteFrequencyData(fbc_array);
    ctx.clearRect(0, 0, canvas.width, canvas.height); // Clear the canvas
    ctx.fillStyle = "white"; // Color of the bars
    bars = canbars;
    for (i = 0; i < bars; i += canmultiplier) {
        bar_x = i * canspace;
        bar_width = 2;
        bar_height = -3 - (fbc_array[i] / 2);
        ctx.fillRect(bar_x, canvas.height, bar_width, bar_height);
    }
    window.requestAnimationFrame(audioAnalyserFrame);
}

function audioAnalyserInitialize() {
    'use strict';
    var analyserElement = document.getElementById('analyzer');

    if (analyserElement !== null && audioViewIsCurrent() === true) {
        if (analyserInitialized === false) {
            context = new AudioContext();
            source = context.createMediaElementSource(audioSource);
        } else {
            analyser.disconnect();
        }
        analyser = context.createAnalyser();
        canvas = analyserElement;
        ctx = canvas.getContext('2d');
        source.connect(analyser);
        analyser.connect(context.destination);
        if (analyserInitialized === false) {
            audioAnalyserFrame();
        }
        analyserInitialized = true;
        analyser.smoothingTimeConstant = 0.7;
    }
}

请注意，我在 for 循环中跳过了 8 个小节（请参阅顶部的 canmultiplier）（如果我不这样做，分析器的另一半将在 canvas 之外呈现因为它太大了。）我不知道这是否也是导致频率范围不一致的原因。

Answer 1

您必须手动对这些值（或类似的东西）求平均值才能将其转换为对数数组；这就是 FFT 算法的工作方式。

Answer 2

另一种可能有效也可能无效的方法。将信号分成 5 个频段。应用覆盖整个频率范围的低通和高通滤波器以及 3 个带通滤波器。将所有滤波器（低通滤波器除外）的输出调低至 0 频率。为 5 个不同的信号中的每一个添加一个分析器。绘制每一个的响应，考虑到您已经将滤波器输出的频率下移。

各个分析器的输出仍然是统一的，但结果可能已经足够接近了。

（可以使用一个或两个增益节点将频率调低至 0，增益节点的增益是来自振荡器节点的正弦波或余弦波。）

Answer 3

如果我对你的理解正确，我认为这对你有用，尽管远非完美。

您在 for 循环中所做的是对数组进行采样，每 8 个元素采样一次。我要做的是以对数方式进行采样。

一个例子：

//Given a range, transforms a value from linear scale to log scale.
var toLog = function(value, min, max){
    var exp = (value-min) / (max-min);
  return min * Math.pow(max/min, exp);
}

//This would be the frequency array in a linear scale
var arr = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20];

//In this case i'm using a range from 1 to 20, you would use the size of your array. I'm incrementing 'i' by one each time, but you could also change that
for (var i = 1; i < 20; i += 1) {
  //I'm starting at 1 because 0 and logarithms dont get along
  var logindex = toLog(i,1,19); //the index we want to sample

  //As the logindex will probably be decimal, we need to interpolate (in this case linear interpolation)
  var low = Math.floor(logindex);
  var high = Math.ceil(logindex);
  var lv = arr[low];
  var hv = arr[high];
  var w = (logindex-low)/(high-low);
  var v = lv + (hv-lv)*w; //the interpolated value of the original array in the logindex index.
    document.write(v + "<br/>");  //In your case you should draw the bar here or save it in an array for later.
}

我希望我解释得很好。这里有一个 working demo，它有一些边界错误，但它可以正常工作。

Answer 4

与此类似的东西应该有效：

// These variables are dynamically changed, ignore them.
var canbars = 737
var canmultiplier = 8
var canspace = 1

// The analyser
var canvas, ctx, source, context, analyser, fbc_array, bars, bar_x,
    bar_width, bar_height;

function audioAnalyserFrame() {
    'use strict';
    var i;
    canvas.width = $('analyser-').width();
    canvas.height = $('analyser-').height();
    ctx.imageSmoothingEnabled = false;
    fbc_array = new Uint8Array(analyser.frequencyBinCount);
    analyser.getByteFrequencyData(fbc_array);
    ctx.clearRect(0, 0, canvas.width, canvas.height); // Clear the canvas
    ctx.fillStyle = "white"; // Color of the bars
    bars = canbars;
    //Find the center
    var center = Math.round(bars / 2) - 1;
    for (i = 0; i < fbc_array.length; i ++) {
        // Update the spectrum bars, spread evenly.
        bar_x = (center + (i % 2 == 0 ? -1 : 1) * Math.round(i / 2));
        bar_width = 2;
        bar_height = -3 - (fbc_array[i] / 2);
        ctx.fillRect(bar_x, canvas.height, bar_width, bar_height);
    }
    window.requestAnimationFrame(audioAnalyserFrame);
}

function audioAnalyserInitialize() {
    'use strict';
    var analyserElement = document.getElementById('analyzer');

    if (analyserElement !== null && audioViewIsCurrent() === true) {
        if (analyserInitialized === false) {
            context = new AudioContext();
            source = context.createMediaElementSource(audioSource);
        } else {
            analyser.disconnect();
        }
        analyser = context.createAnalyser();
        canvas = analyserElement;
        ctx = canvas.getContext('2d');
        source.connect(analyser);
        analyser.connect(context.destination);
        if (analyserInitialized === false) {
            audioAnalyserFrame();
        }
        analyserInitialized = true;
        analyser.smoothingTimeConstant = 0.7;
    }
}

改进了一步，将 "update" 包装在一个函数中

function audioAnalyserFrame() {
  'use strict';
  var i;
  canvas.width = $('analyser-').width();
  canvas.height = $('analyser-').height();
  ctx.imageSmoothingEnabled = false;
  fbc_array = new Uint8Array(analyser.frequencyBinCount);

  ctx.clearRect(0, 0, canvas.width, canvas.height); // Clear the canvas
  ctx.fillStyle = "white"; // Color of the bars
  bars = canbars;
  //Find the center
  var center = Math.round(bars / 2) - 1;
  (update = function() {
      window.requestAnimationFrame(update);
      analyser.getByteFrequencyData(fbc_array);
      for (i = 0; i < fbc_array.length; i++) {
        // Update the spectrum bars, spread evenly.
        bar_x = (center + (i % 2 == 0 ? -1 : 1) * Math.round(i / 2));
        bar_width = 2;
        bar_height = -3 - (fbc_array[i] / 2);
        ctx.fillRect(bar_x, canvas.height, bar_width, bar_height);
      }
    }();
  }

Answer 5

我相信我完全理解你的意思。问题不在于您的代码，而在于底层的 FFT getByteFrequencyData。核心问题是音乐 音符是对数间隔的 而 FFT 频率区间是线性间隔的 .

音符按对数间隔： 连续低音之间的差异，例如 A2(110 Hz) 和 A2 #(116.5 Hz) 是 6.5 Hz，而相同的 2 个音符在高八度 A3(220 Hz) 和 A3#[= 之间的差异34=](233.1 赫兹) 是 13.1 赫兹。

FFT bins 是线性间隔的：假设我们每秒处理 44100 个样本，FFT 需要 window 1024 个样本（一个波），并且首先将它与一个长达 1024 个样本的波相乘（我们称之为 wave1），因此这将是一个 1024/44100=0.023 seconds 的周期，即 43.48 Hz，然后将在第一个 bin 中产生的振幅。然后它与频率为wave1 * 2的波相乘，即86.95 Hz，然后wave1 * 3 = 130.43 Hz.所以频率之间的差异是线性的；它总是相同的= 43.48，不像音符变化的差异。

这就是为什么接近的低频会被捆绑在同一个 bin 中，而接近的高频会被分开。这就是FFT的频率分辨率的问题。它可以通过取 windows 大于 1024 个样本来解决，但这将是时间分辨率的权衡。

Answer 6

对我来说，看起来您可以简单地 space 通过将当前柱的 x 位置乘以项 10/i 来得出柱。我不确定这是否正确，但似乎是这样。图中的八度变化 space 均匀分布，这是正确的。

查看我的傅立叶级数可视化器版本，它还呈现生成的音频信号的 fft 分析器： https://editor.p5js.org/mohragk/sketches/BkMiw4KxV

分析器代码在drawAnalyser().

从音频中获取对数字节频率数据

Get logarithmic byteFrequencyData from Audio

javascript

web-audio-api