2 ** Copyright 2003-2010, VisualOn, Inc.
\r
4 ** Licensed under the Apache License, Version 2.0 (the "License");
\r
5 ** you may not use this file except in compliance with the License.
\r
6 ** You may obtain a copy of the License at
\r
8 ** http://www.apache.org/licenses/LICENSE-2.0
\r
10 ** Unless required by applicable law or agreed to in writing, software
\r
11 ** distributed under the License is distributed on an "AS IS" BASIS,
\r
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
13 ** See the License for the specific language governing permissions and
\r
14 ** limitations under the License.
\r
18 /*-------------------------------------------------------------------*
\r
20 *-------------------------------------------------------------------*
\r
21 * Functions and static memory for Voice Activity Detection. *
\r
22 *-------------------------------------------------------------------*/
\r
24 #ifndef __WB_VAD_H__
\r
25 #define __WB_VAD_H__
\r
27 /******************************************************************************
\r
29 ******************************************************************************/
\r
30 #include "typedef.h"
\r
31 #include "wb_vad_c.h"
\r
34 /******************************************************************************
\r
35 * DEFINITION OF DATA TYPES
\r
36 ******************************************************************************/
\r
40 Word16 bckr_est[COMPLEN]; /* background noise estimate */
\r
41 Word16 ave_level[COMPLEN]; /* averaged input components for stationary */
\r
43 Word16 old_level[COMPLEN]; /* input levels of the previous frame */
\r
44 Word16 sub_level[COMPLEN]; /* input levels calculated at the end of a frame (lookahead) */
\r
45 Word16 a_data5[F_5TH_CNT][2]; /* memory for the filter bank */
\r
46 Word16 a_data3[F_3TH_CNT]; /* memory for the filter bank */
\r
48 Word16 burst_count; /* counts length of a speech burst */
\r
49 Word16 hang_count; /* hangover counter */
\r
50 Word16 stat_count; /* stationary counter */
\r
52 /* Note that each of the following two variables holds 15 flags. Each flag reserves 1 bit of the
\r
53 * variable. The newest flag is in the bit 15 (assuming that LSB is bit 1 and MSB is bit 16). */
\r
54 Word16 vadreg; /* flags for intermediate VAD decisions */
\r
55 Word16 tone_flag; /* tone detection flags */
\r
57 Word16 sp_est_cnt; /* counter for speech level estimation */
\r
58 Word16 sp_max; /* maximum level */
\r
59 Word16 sp_max_cnt; /* counts frames that contains speech */
\r
60 Word16 speech_level; /* estimated speech level */
\r
61 Word32 prev_pow_sum; /* power of previous frame */
\r
65 /********************************************************************************
\r
67 * DECLARATION OF PROTOTYPES
\r
68 ********************************************************************************/
\r
70 Word16 wb_vad_init(VadVars ** st, VO_MEM_OPERATOR *pMemOP);
\r
71 Word16 wb_vad_reset(VadVars * st);
\r
72 void wb_vad_exit(VadVars ** st, VO_MEM_OPERATOR *pMemOP);
\r
73 void wb_vad_tone_detection(VadVars * st, Word16 p_gain);
\r
74 Word16 wb_vad(VadVars * st, Word16 in_buf[]);
\r
76 #endif //__WB_VAD_H__
\r