在微信小程序中浅仿ChatGPT的语音动画效果

站长

2024年03月21日 20:25 · 阅读数 5

在微信小程序中实现音频播放频率动画效果

之前看了 ChatGPT APP 内的语音电话效果，感觉非常不错，正好最近也用 Donut 开发了多端应用，涉及到 AI 聊天，于是就开始了粗糙的模仿。文章会涉及到以下内容：

小程序内创建音频上下文、频域数据处理；
本地音频、网络音频数据处理；
canvas的基本用法、高清处理、坐标系转换；

最后也发散了一下，请教一下大家如何实现录音过程的音频动画。

效果图

在微信小程序中浅仿ChatGPT的语音动画效果

创建音频播放对象

选择 WebAudioContext 创建音频上下文，因为它不但可以控制音频播放，更重要的是它还支持创建“音频分析器（createAnalyser）”，对音频进行实时处理。将音频源节点连接到一个AnalyserNode对象（analyser）用于分析音频数据，例如获取频谱数据或实现实时音频可视化。

// 创建音频上下文
audioContext = wx.createWebAudioContext();

// 创建音频分析器
analyser = audioContext.createAnalyser();
analyser.fftSize = fftSize; // 音频FFT大小，范围在[32, 32768]

加载音频资源

音频资源可以时本地音频，如：项目资源、fs 系统缓存的文件，也可以是在线资源，比如存储在 CDN 上的音频文件。只不过它们的加载方式有所不同，下面分别介绍。

本地音频

使用微信文件系统加载本地资源，并将原始文件数据转换成 audioContext 可识别的 AudioBuffer

// 创建文件管理器
fs = wx.getFileSystemManager();

return new Promise((resolve, reject) => {
  fs.readFile({
    filePath: url,
    success: (res) => {
      // 将原始文件数据转换成audioContext可识别的 AudioBuffer
      audioContext.decodeAudioData(
        res.data,
        (buffer) => {
          resolve(buffer);
        },
        (err) => {
          reject(err);
        }
      );
    },
    fail: (err) => {
      reject(err);
    },
  });
});

网络音频

使用小程序框架自带的wx.request，请求在线音频资源，请求类型为 arraybuffer，请求成功后，使用audioContext.decodeAudioData解析音频数据，

return new Promise((resolve, reject) => {
  wx.request({
    url,
    responseType: "arraybuffer",
    success: (res) => {
      // 将原始文件数据转(arraybuffer)换成audioContext可识别的AudioBuffer
      audioContext.decodeAudioData(
        res.data,
        (buffer) => {
          resolve(buffer);
        },
        (err) => {
          console.error("decodeAudioData fail", err);
          reject(err);
        }
      );
    },
    fail: (err) => {
      console.error("request fail", err);
      reject(err);
    },
  });
});

设置音频源

以上获取到了音频资源，并解析成功后，把音频 buffer 数据设置给audioSource的buffer属性，并把audioSource连接到analyser，最后把audioSource连接到audioContext的`destination，实现边播放边获取音频频率数据。

// 创建音频源节点，处理和控制音频的播放
let audioSource = audioContext.createBufferSource();

// 将一个已加载的音频缓冲区（buffer）分配给音频源节点的buffer属性
audioSource.buffer = buffer;

// 取频谱数据
audioSource.connect(analyser);

// !!!将音频源节点连接到音频上下文的目标节点，这样音频源节点播放的时候才有声音
audioSource.connect(audioContext.destination);

// 播放音频
audioSource.start();

获取频域数据

// frequencyBinCount表示analyser节点的频域数据的长度，即频谱中的频率区间数量
bufferLength = analyser.frequencyBinCount;

// 根据bufferLength创建Uint8Array类型的数组，用于存储音频数据
dataArray = new Uint8Array(bufferLength);

// 将当前的计算的频域数据写入到dataArray数组中（持续获取实现实时音频动画）
analyser.getByteFrequencyData(dataArray);

// 归一化处理
normalizedArr = normalizedBuffer(dataArray);

音频数据归一化

音频数据归一化，是为了更好的显示音频数据，避免数据过大以及每帧获取到的最大和最小范围不统一，起到优化显示效果的作用。

function normalizedBuffer(data = []) {
  let copyData = [...data];
  // 找到音频数据的最大值和最小值
  const max = Math.max(...copyData);
  const min = Math.min(...copyData);

  // 计算音频数据的范围
  const range = max - min;

  // 对音频数据进行归一化处理，音频数据范围在 0 到 1 之间
  return copyData.map((sample) => (sample - min) / range || 0);
}

normalizedArr = normalizedBuffer(dataArray);

现在已经可以拿到了音频频谱数据，只需要在requestAnimationFrame中获取音频数据，然后通过draw函数绘制到 canvas 上，实现音频可视化。

绘制音频可视化

绘制形式可以根据自己喜好随意发挥，我这里使用的是绘制柱状图的形式，并对 canvas 坐标轴进行了重置：以中心为原点，x 轴向右为正方向，y 轴向上为正方向。绘制的时候，需要计算每个音频数据对应的柱状图的绘制位置，整体成纤为居中显示的效果。

获取 canvas 并设置坐标轴

const dpr = wx.getWindowInfo().pixelRatio;
const canvasId = "#mycanvas";

// 创建动画上下文
wx.createSelectorQuery()
  .select(canvasId)
  .fields({
    node: true,
    size: true,
  })
  .exec((res) => {
    // Canvas 对象
    let canvas = res[0].node;
    // 渲染上下文
    let ctx = canvas.getContext("2d");

    // Canvas 画布的CSS绘制宽高
    const renderWidth = res[0].width;
    const renderHeight = res[0].height;
    canvasWidth = renderWidth;
    canvasHeight = renderHeight;

    // 高清处理：初始化画布大小，以dpr缩放更清晰
    canvas.width = renderWidth * dpr;
    canvas.height = renderHeight * dpr;
    ctx.scale(dpr, dpr);

    // 坐标系转换：画布正中心为原点
    ctx.translate(renderWidth / 2, renderHeight / 2);
    ctx.scale(1, -1);

    canvas = canvas;
    ctx = ctx;

绘制音频可视化

1.计算每个音频数据对应的柱状图的绘制位置；2.绘制柱状图；3.优化绘制区域

// 获取音频数据
let bufferLength = analyser.frequencyBinCount;
let dataArray = new Uint8Array(bufferLength);

// 动画函数
function animate() {
  ctx.clearRect(-canvasWidth / 2, -canvasHeight / 2, canvasWidth, canvasHeight);

  // 获取音频数据
  analyser.getByteFrequencyData(dataArray);
  let normalizedArr = normalizedBuffer(dataArray);
  // normalizedArr = normalizedArr.filter(item => item > 0.3)

  const barCount = Math.ceil(canvasWidth / (barWidth + barMargin));
  const halfBarCount = Math.floor(barCount / 2);
  const barStep = Math.floor(bufferLength / barCount);

  // 绘制音波柱状条
  for (let i = -halfBarCount; i <= halfBarCount; i++) {
    let index = Math.abs(i) * barStep;
    let item = normalizedArr[index];
    let barHeight = Math.round(item * barHeightScale); // 占位高度
    let x = i * (barMargin + barWidth) - (barMargin + barWidth) / 2;
    let y = Math.ceil(-barHeight / 2); //垂直居中

    // 排除左右边距范围内的渲染
    if (
      x > -canvasWidth / 2 + horizonPadding &&
      x < canvasWidth / 2 - horizonPadding
    ) {
      drawItem(x, y, barWidth, barHeight);
    }
  }

  // 循环绘制
  animationId = canvas.requestAnimationFrame(animate);
}

// 开始动画循环
animate();

function drawItem(x, y, w, h, opacity = 1) {
	let baseFixedY = 1
	let baseFixedW = 1
	let radius = w / 2;
	opacity = Math.max(0.1, opacity)
	this.drawCircle(x, h / 2, radius, 0, Math.PI, this.getBarColor(opacity))
	this.drawRect(x, y - baseFixedY, w + baseFixedW, h + baseFixedY, this.getBarColor(opacity))
	this.drawCircle(x, -h / 2, radius, Math.PI, 2 * Math.PI, this.getBarColor(opacity))

}

完整代码

以上代码片段中使用到的方法和变量都可以在下面的完整代码中找到，代码比较糙，理解实现原理即可。

export class SoundDanceAudio {
  canvasWidth = 0; // 画布宽度
  canvasHeight = 0; // 画布高度

  audioContext = null; // 音频上下文
  analyser = null; // 音频分析器
  audioSource = null; // 音频资源节点
  sourceCache = new Set(); // 音频资源缓存，防止GC后，音频中断

  /**
   * 创建音频频谱canvas动画
   * @param {String} canvasId canvas的id
   * @param {Object} options 可选配置
   * {
   *  barWidth,
   *  barHeightScale,
   *  barMargin,
   *  horizonPadding,
   *  fftSize,
   *  onStop,
   *  onError
   * }
   */
  constructor(canvasId, options = {}) {
    this.canvasId = canvasId;
    this.canvas = null;
    this.ctx = null;
    this.animationId = null; // 动画ID

    this.barWidth = options.barWidth || 10; // 音波柱状条宽度
    this.barHeightScale = options.barHeightScale || 100; //音波柱子高度缩放值
    this.barMargin = options.barMargin || 8; // 音波柱状条左右间距
    this.horizonPadding = options.horizonPadding || 5; //水平方向上左右那边距
    this.fftSize = options.fftSize || 1024; // 音频FFT大小 [32, 32768]

    this.fs = wx.getFileSystemManager(); // 文件管理器，用于读取本地音频文件
    this.onStop = options.onStop || null; // 音频or音波动画结束
    this.onError = options.onError || null; //任意报错

    this.createCanvas(this.canvasId);
  }

  /**
   * 创建canvas绘制相关
   */
  createCanvas() {
    const dpr = wx.getWindowInfo().pixelRatio;

    // 创建动画上下文
    wx.createSelectorQuery()
      .select(this.canvasId)
      .fields({
        node: true,
        size: true,
      })
      .exec((res) => {
        // Canvas 对象
        let canvas = res[0].node;
        // 渲染上下文
        let ctx = canvas.getContext("2d");

        // Canvas 画布的实际绘制宽高
        const renderWidth = res[0].width;
        const renderHeight = res[0].height;
        this.canvasWidth = renderWidth;
        this.canvasHeight = renderHeight;

        // 初始化画布大小，以dpr缩放更清晰
        canvas.width = renderWidth * dpr;
        canvas.height = renderHeight * dpr;
        ctx.scale(dpr, dpr);

        // 坐标系转换(画布正中心为原点)
        ctx.translate(renderWidth / 2, renderHeight / 2);
        ctx.scale(1, -1);

        this.canvas = canvas;
        this.ctx = ctx;

        // 绘制测试
        // this.ctx.fillStyle = this.getBarColor(60);
        // let drawH = 10
        // let drawW = 20
        // this.ctx.fillRect(
        // 	-drawW / 2,
        // 	-drawH / 2,
        // 	drawW,
        // 	drawH
        // );
        // setTimeout(() => {
        // 	this.drawCircle(0, 0, 100)
        // }, 2000);
      });
  }

  /**
   * 创建Web音频上下文控制相关
   * @param {Number} fftSize
   */
  createWebAudioCtx(fftSize = 128) {
    // 创建音频上下文
    this.audioContext = wx.createWebAudioContext();

    // 创建音频分析器
    this.analyser = this.audioContext.createAnalyser();
    this.analyser.fftSize = fftSize; // 设置FFT大小
  }

  /**
   * 开始播放音频
   * @param {String} url 音频地址
   * @param {Boolean} is_remote 是否是在线地址
   * onlineUrl = 'https://website/audio/1698635338898_92102.mp3';
   * localUrl = '/resources/audio/test_audio.mp3';
   */
  startAudio(url, is_remote = false) {
    // !!! 使用的时候再创建，因为在多端应用模式中，会出现调用audioSource.start()不会播放的问题
    this.createWebAudioCtx(this.fftSize);
    let { audioContext, analyser, onStop } = this;

    this.loadAudio(url, is_remote)
      .then((buffer) => {
        let audioSource = audioContext.createBufferSource();
        audioSource.buffer = buffer;
        audioSource.connect(analyser);
        audioSource.connect(audioContext.destination);
        this.sourceCache.add(audioSource); // Tips：缓存住 source，防止被GC掉，GC掉的话音频会中断

        audioSource.onended = () => {
          // 结束动画
          this.stopAnimate();

          // 执行【onStop】回调函数
          onStop && onStop(buffer);
        };
        this.audioSource = audioSource;

        // 开始播放
        try {
          this.audioSource.start();
          // 开始动画
          this.startAnimate();
        } catch (err) {
          console.error(err);
        }
      })
      .catch((err) => {
        console.log("fail", err);
        this.handleError(err);
      });
  }

  /**
   * 停止播放音频
   */
  stopAudio() {
    // 停止音频播放
    this.audioSource.stop();

    // 停止动画
    this.stopAnimate();
  }

  /**
   * 开始动画
   */
  startAnimate() {
    let {
      ctx,
      canvas,
      canvasWidth,
      canvasHeight,
      analyser,
      barWidth,
      barHeightScale,
      barMargin,
      horizonPadding,
      normalizedBuffer,
    } = this;

    // 获取音频数据
    let bufferLength = analyser.frequencyBinCount;
    let dataArray = new Uint8Array(bufferLength);

    // 动画函数
    const animate = () => {
      ctx.clearRect(
        -canvasWidth / 2,
        -canvasHeight / 2,
        canvasWidth,
        canvasHeight
      );

      // 获取音频数据
      analyser.getByteFrequencyData(dataArray);
      let normalizedArr = normalizedBuffer(dataArray);
      // normalizedArr = normalizedArr.filter(item => item > 0.3)

      const barCount = Math.ceil(canvasWidth / (barWidth + barMargin));
      const halfBarCount = Math.floor(barCount / 2);
      const barStep = Math.floor(bufferLength / barCount);

      // 绘制音波柱状条
      for (let i = -halfBarCount; i <= halfBarCount; i++) {
        let index = Math.abs(i) * barStep;
        let item = normalizedArr[index];
        let barHeight = Math.round(item * barHeightScale); // 占位高度
        let x = i * (barMargin + barWidth) - (barMargin + barWidth) / 2;
        let y = Math.ceil(-barHeight / 2); //垂直居中

        // 排除左右边距范围内的渲染
        if (
          x > -canvasWidth / 2 + horizonPadding &&
          x < canvasWidth / 2 - horizonPadding
        ) {
          this.drawItem(x, y, barWidth, barHeight);
        }
      }

      // 继续下一帧动画
      this.animationId = canvas.requestAnimationFrame(animate);
    };

    // 开始动画循环
    animate();
  }

  /**
   * 结束动画
   */
  stopAnimate() {
    const { ctx, canvas, canvasWidth, canvasHeight, animationId, sourceCache } =
      this;
    if (animationId) {
      ctx.clearRect(
        -canvasWidth / 2,
        -canvasHeight / 2,
        canvasWidth,
        canvasHeight
      );
      this.drawOpacity();
      canvas.cancelAnimationFrame(animationId);
      sourceCache.delete(this.audioSource); // Tips：播放完之后，再清掉source缓存
    }
  }

  
 drawItem(x, y, w, h, opacity = 1) {
    let baseFixedY = 1
    let baseFixedW = 1
    let radius = w / 2;
    opacity = Math.max(0.1, opacity)
    this.drawCircle(x, h / 2, radius, 0, Math.PI, this.getBarColor(opacity))
    this.drawRect(x, y - baseFixedY, w + baseFixedW, h + baseFixedY, this.getBarColor(opacity))
    this.drawCircle(x, -h / 2, radius, Math.PI, 2 * Math.PI, this.getBarColor(opacity))
}

  drawCircle(
    x,
    y,
    radius,
    startAngle = 0,
    endAngle = 2 * Math.PI,
    color = "#ffffff"
  ) {
    this.ctx.beginPath();
    this.ctx.strokeStyle = color;
    this.ctx.fillStyle = color;
    this.ctx.arc(x, y, radius, startAngle, endAngle, false);
    this.ctx.stroke();
    this.ctx.fill();
    this.ctx.closePath();
  }

  drawRect(x, y, w, h, color = "#ffffff") {
    this.ctx.strokeStyle = color; //this.getBarColor(opacity);
    this.ctx.fillStyle = color; //this.getBarColor(opacity);
    this.ctx.fillRect(x - w / 2, y, w, h);
  }
  
  drawOpacity() {
    let {
        ctx,
        canvasWidth,
        canvasHeight,
    } = this;
    ctx.fillStyle = 'rgba(255,255,255,0)';
    ctx.fillRect(-canvasWidth / 2, -canvasHeight / 2, canvasWidth, canvasHeight);
  }

  /**
   * 加载音频文件buffer数据
   * @param {String} url 音频地址
   * @param {Boolean} is_remote 是否是在线地址
   */
  loadAudio(url, is_remote = false) {
    const { audioContext } = this;
    return new Promise((resolve, reject) => {
      if (is_remote) {
        // 处理在线文件
        wx.request({
          url,
          responseType: "arraybuffer",
          success: (res) => {
            audioContext.decodeAudioData(
              res.data,
              (buffer) => {
                resolve(buffer);
              },
              (err) => {
                console.error("decodeAudioData fail", err);
                reject(err);
              }
            );
          },
          fail: (err) => {
            console.error("request fail", err);
            reject(err);
          },
        });
      } else {
        // 处理本地文件
        this.fs.readFile({
          filePath: url,
          success: (res) => {
            // console.log('加载音频数据：', res.data)
            audioContext.decodeAudioData(
              res.data,
              (buffer) => {
                // console.log('音频数据解码：', buffer)
                resolve(buffer);
              },
              (err) => {
                console.error("decodeAudioData fail", err);
                reject(err);
              }
            );
          },
          fail: (err) => {
            console.error("err:", err);
            reject(err);
          },
        });
      }
    });
  }

  getBarColor(opacity = 1) {
    return `rgba(255, 100 ,230, ${opacity})`;
  }

  normalizedBuffer(data = []) {
    let copyData = [...data];
    // 找到音频数据的最大值和最小值
    const max = Math.max(...copyData);
    const min = Math.min(...copyData);

    // 计算音频数据的范围
    const range = max - min;
    // console.log(min, max, range)

    // 对音频数据进行归一化处理，音频数据范围在 0 到 1 之间
    return copyData.map((sample) => (sample - min) / range || 0);
  }

  handleError(err) {
    this.onError && this.onError(err);
  }
}

使用 DEMO

import { SoundDanceAudio } from "./sound_dance_audio.js";

let SDA = new SoundDanceAudio('#mycanvas', {
  horizonPadding: 40,
  onStop: (res) => {
    // 处理下一段音频（如果有的话）
    onAudioEndHandler(playAudioIndex);
  },
  onError: console.error,
});

// 开始播放音频并绘制音频动画
SDA.startAudio(localeFilePath, false);

在微信小程序中浅仿ChatGPT的语音动画效果

发散一下：如果是录音咋实现

声音播放实现音频动画相对是比较容易的，但是在小程序里想要在录音的时候也实现音频动画就有点棘手了。我这里遇到了以下几个问题：

使用getRecorderManager录音，即便不说话，onFrameRecorded 返回的 frameBuffer 依然有数据
没办法知道用户是否停止说话
...