Chinaunix首页 | 论坛 | 博客
  • 博客访问: 341494
  • 博文数量: 89
  • 博客积分: 5152
  • 博客等级: 大校
  • 技术积分: 1155
  • 用 户 组: 普通用户
  • 注册时间: 2006-02-25 15:12
文章分类

全部博文(89)

文章存档

2012年(1)

2011年(5)

2010年(14)

2009年(69)

我的朋友

分类: 嵌入式

2011-02-18 09:26:35

回音消除一直是语音处理中的一个难点,很多软回音消除的算法处理并不理想。
但是android 2.3中自带了回音消除处理模块,效果还不错,代码的位置在
frameworks/base/voip/jni/rtp/EchoSuppressor.cpp
frameworks/base/voip/jni/rtp/EchoSuppressor.h

代码如下
  1. /*
  2.  * Copyrightm (C) 2010 The Android Open Source Project
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  *
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  */

  16. #include <stdio.h>
  17. #include <string.h>
  18. #include <stdint.h>
  19. #include <math.h>

  20. #define LOG_TAG "Echo"
  21. #include <utils/Log.h>

  22. #include "EchoSuppressor.h"

  23. // It is very difficult to do echo cancellation at this level due to the lack of
  24. // the timing information of the samples being played and recorded. Therefore,
  25. // for the first release only echo suppression is implemented.

  26. // The algorithm is derived from the "previous works" summarized in
  27. // A new class of doubletalk detectors based on cross-correlation,
  28. // J Benesty, DR Morgan, JH Cho, IEEE Trans. on Speech and Audio Processing.
  29. // The method proposed in that paper is not used because of its high complexity.

  30. // It is well known that cross-correlation can be computed using convolution,
  31. // but unfortunately not every mobile processor has a (fast enough) FPU. Thus
  32. // we use integer arithmetic as much as possible and do lots of bookkeeping.
  33. // Again, parameters and thresholds are chosen by experiments.

  34. EchoSuppressor::EchoSuppressor(int sampleCount, int tailLength)
  35. {
  36.     tailLength += sampleCount * 4;

  37.     int shift = 0;
  38.     while ((sampleCount >> shift) > 1 && (tailLength >> shift) > 256) {
  39.         ++shift;
  40.     }

  41.     mShift = shift + 4;
  42.     mScale = 1 << shift;
  43.     mSampleCount = sampleCount;
  44.     mWindowSize = sampleCount >> shift;
  45.     mTailLength = tailLength >> shift;
  46.     mRecordLength = tailLength * 2 / sampleCount;
  47.     mRecordOffset = 0;

  48.     mXs = new uint16_t[mTailLength + mWindowSize];
  49.     memset(mXs, 0, sizeof(*mXs) * (mTailLength + mWindowSize));
  50.     mXSums = new uint32_t[mTailLength];
  51.     memset(mXSums, 0, sizeof(*mXSums) * mTailLength);
  52.     mX2Sums = new uint32_t[mTailLength];
  53.     memset(mX2Sums, 0, sizeof(*mX2Sums) * mTailLength);
  54.     mXRecords = new uint16_t[mRecordLength * mWindowSize];
  55.     memset(mXRecords, 0, sizeof(*mXRecords) * mRecordLength * mWindowSize);

  56.     mYSum = 0;
  57.     mY2Sum = 0;
  58.     mYRecords = new uint32_t[mRecordLength];
  59.     memset(mYRecords, 0, sizeof(*mYRecords) * mRecordLength);
  60.     mY2Records = new uint32_t[mRecordLength];
  61.     memset(mY2Records, 0, sizeof(*mY2Records) * mRecordLength);

  62.     mXYSums = new uint32_t[mTailLength];
  63.     memset(mXYSums, 0, sizeof(*mXYSums) * mTailLength);
  64.     mXYRecords = new uint32_t[mRecordLength * mTailLength];
  65.     memset(mXYRecords, 0, sizeof(*mXYRecords) * mRecordLength * mTailLength);

  66.     mLastX = 0;
  67.     mLastY = 0;
  68.     mWeight = 1.0f / (mRecordLength * mWindowSize);
  69. }

  70. EchoSuppressor::~EchoSuppressor()
  71. {
  72.     delete [] mXs;
  73.     delete [] mXSums;
  74.     delete [] mX2Sums;
  75.     delete [] mXRecords;
  76.     delete [] mYRecords;
  77.     delete [] mY2Records;
  78.     delete [] mXYSums;
  79.     delete [] mXYRecords;
  80. }

  81. void EchoSuppressor::run(int16_t *playbacked, int16_t *recorded)
  82. {
  83.     // Update Xs.
  84.     for (int i = mTailLength - 1; i >= 0; --i) {
  85.         mXs[i + mWindowSize] = mXs[i];
  86.     }
  87.     for (int i = mWindowSize - 1, j = 0; i >= 0; --i, j += mScale) {
  88.         uint32_t sum = 0;
  89.         for (int k = 0; k < mScale; ++k) {
  90.             int32_t x = playbacked[j + k] << 15;
  91.             mLastX += x;
  92.             sum += ((mLastX >= 0) ? mLastX : -mLastX) >> 15;
  93.             mLastX -= (mLastX >> 10) + x;
  94.         }
  95.         mXs[i] = sum >> mShift;
  96.     }

  97.     // Update XSums, X2Sums, and XRecords.
  98.     for (int i = mTailLength - mWindowSize - 1; i >= 0; --i) {
  99.         mXSums[i + mWindowSize] = mXSums[i];
  100.         mX2Sums[i + mWindowSize] = mX2Sums[i];
  101.     }
  102.     uint16_t *xRecords = &mXRecords[mRecordOffset * mWindowSize];
  103.     for (int i = mWindowSize - 1; i >= 0; --i) {
  104.         uint16_t x = mXs[i];
  105.         mXSums[i] = mXSums[i + 1] + x - xRecords[i];
  106.         mX2Sums[i] = mX2Sums[i + 1] + x * x - xRecords[i] * xRecords[i];
  107.         xRecords[i] = x;
  108.     }

  109.     // Compute Ys.
  110.     uint16_t ys[mWindowSize];
  111.     for (int i = mWindowSize - 1, j = 0; i >= 0; --i, j += mScale) {
  112.         uint32_t sum = 0;
  113.         for (int k = 0; k < mScale; ++k) {
  114.             int32_t y = recorded[j + k] << 15;
  115.             mLastY += y;
  116.             sum += ((mLastY >= 0) ? mLastY : -mLastY) >> 15;
  117.             mLastY -= (mLastY >> 10) + y;
  118.         }
  119.         ys[i] = sum >> mShift;
  120.     }

  121.     // Update YSum, Y2Sum, YRecords, and Y2Records.
  122.     uint32_t ySum = 0;
  123.     uint32_t y2Sum = 0;
  124.     for (int i = mWindowSize - 1; i >= 0; --i) {
  125.         ySum += ys[i];
  126.         y2Sum += ys[i] * ys[i];
  127.     }
  128.     mYSum += ySum - mYRecords[mRecordOffset];
  129.     mY2Sum += y2Sum - mY2Records[mRecordOffset];
  130.     mYRecords[mRecordOffset] = ySum;
  131.     mY2Records[mRecordOffset] = y2Sum;

  132.     // Update XYSums and XYRecords.
  133.     uint32_t *xyRecords = &mXYRecords[mRecordOffset * mTailLength];
  134.     for (int i = mTailLength - 1; i >= 0; --i) {
  135.         uint32_t xySum = 0;
  136.         for (int j = mWindowSize - 1; j >= 0; --j) {
  137.             xySum += mXs[i + j] * ys[j];
  138.         }
  139.         mXYSums[i] += xySum - xyRecords[i];
  140.         xyRecords[i] = xySum;
  141.     }

  142.     // Compute correlations.
  143.     float corr2 = 0.0f;
  144.     int latency = 0;
  145.     float varY = mY2Sum - mWeight * mYSum * mYSum;
  146.     for (int i = mTailLength - 1; i >= 0; --i) {
  147.         float varX = mX2Sums[i] - mWeight * mXSums[i] * mXSums[i];
  148.         float cov = mXYSums[i] - mWeight * mXSums[i] * mYSum;
  149.         float c2 = cov * cov / (varX * varY + 1);
  150.         if (c2 > corr2) {
  151.             corr2 = c2;
  152.             latency = i;
  153.         }
  154.     }
  155.     //LOGI("correlation^2 = %.10f, latency = %d", corr2, latency * mScale);

  156.     // Do echo suppression.
  157.     if (corr2 > 0.1f) {
  158.         int factor = (corr2 > 1.0f) ? 0 : (1.0f - sqrtf(corr2)) * 4096;
  159.         for (int i = 0; i < mSampleCount; ++i) {
  160.             recorded[i] = recorded[i] * factor >> 16;
  161.         }
  162.     }

  163.     // Increase RecordOffset.
  164.     ++mRecordOffset;
  165.     if (mRecordOffset == mRecordLength) {
  166.         mRecordOffset = 0;
  167.     }
  168. }

头文件

  1. /*
  2.  * Copyrightm (C) 2010 The Android Open Source Project
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  *
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  */

  16. #ifndef __ECHO_SUPPRESSOR_H__
  17. #define __ECHO_SUPPRESSOR_H__

  18. #include <stdint.h>

  19. class EchoSuppressor
  20. {
  21. public:
  22.     // The sampleCount must be power of 2.
  23.     EchoSuppressor(int sampleCount, int tailLength);
  24.     ~EchoSuppressor();
  25.     void run(int16_t *playbacked, int16_t *recorded);

  26. private:
  27.     int mShift;
  28.     int mScale;
  29.     int mSampleCount;
  30.     int mWindowSize;
  31.     int mTailLength;
  32.     int mRecordLength;
  33.     int mRecordOffset;

  34.     uint16_t *mXs;
  35.     uint32_t *mXSums;
  36.     uint32_t *mX2Sums;
  37.     uint16_t *mXRecords;

  38.     uint32_t mYSum;
  39.     uint32_t mY2Sum;
  40.     uint32_t *mYRecords;
  41.     uint32_t *mY2Records;

  42.     uint32_t *mXYSums;
  43.     uint32_t *mXYRecords;

  44.     int32_t mLastX;
  45.     int32_t mLastY;

  46.     float mWeight;
  47. };

  48. #endif



阅读(5760) | 评论(1) | 转发(0) |
给主人留下些什么吧!~~

2011-11-15 14:58:52

感觉这个回声消除代码有些问题,这部分关键代码:
    // Do echo suppression.
    if (corr2 > 0.1f) {
        int factor = (corr2 > 1.0f) ? 0 : (1.0f - sqrtf(corr2)) * 4096;
        for (int i = 0; i < mSampleCount; ++i) {
            recorded = recorded * factor >> 16;
        }
    }
可以