Contents

The SMAC front end

%The SMAC speech feature extraction front end
%  [SMACS, FB, SM1, SM1C] = SMAC(S, FS, FB, NC, CMS, WL, FP, PR)
%  Input parameters:
%    S    input speech signal
%    FS   sampling frequency (dafault 8000)
%    FB   filter bank parameters: [NumberOfBands, FFTLength, Bandwidth, MelSpaced, BandwidthMod]
%         or an [NumberOfBands x FFTLength/2] matrix, (dafault [12 1024 4 1 1])
%         if BandwidthMod is false, the actual bandwidh is relative to the filter spacing
%    NC   number of augmenting cepstral coefficients (dafault 2)
%    CMS  ceptral mean subtraction (only on CCs) (dafault 1)
%    WL   length of window in sec (dafault 0.025)
%    FP   frame period in sec (dafault 0.010)
%    PR   preemphasis coefficients (dafault 0.97)
%
%  Output values:
%    SMACS  the SMAC feature vector matrix [NumberOfBands x NumberOfFrames]
%    FB     the filterbank matrix (can also be used in subsequent calls)
%    SM1    first spectral moment matrix
%    SM1C   first central spectral moment (the SM part of SMAC)
%
%  Examples:
%    % plot spectral moment (default parametrization)
%    [s fs] = wavread('input.wav');
%    [smacs, FB, sm1] = smac(s,fs); % extract spectral moments
%    figure; specgram(s,1024,fs,100,80); % plot spectrum
%    hold; plot(0:0.01:0.01*(size(sm1,2)-1), sm1'); % plot spectral moment
%
%    % mel spaced pyknogram
%    number_of_bands = 128;
%    filter_bandwidth = 1000;
%    [s fs] = wavread('input.wav');
%    [smacs, FB, sm1] = smac(s,fs,[number_of_bands 1024 filter_bandwidth 1 0]); % extract spectral moments
%    figure; specgram(s,1024,fs,100,80); % plot spectrum
%    hold; plot(0:0.01:0.01*(size(sm1,2)-1), sm1'); % plot spectral moment
%
%    % linerly spaced pyknogram
%    number_of_bands = 128;
%    filter_bandwidth = 1000;
%    [s fs] = wavread('input.wav');
%    [smacs, FB, sm1] = smac(s,fs,[number_of_bands 1024 filter_bandwidth 0 0]); % extract spectral moments
%    figure; specgram(s,1024,fs,100,80); % plot spectrum
%    hold; plot(0:0.01:0.01*(size(sm1,2)-1), sm1'); % plot spectral moment
%
%    % batch feature extraction from a list of input files and target files
%    [wavs feats] = textread('target.scp', '%s%s');
%    FB = 12; NC = 2;  % the length of the final vector is FB+NC = 14
%    for fi=1:length(wavs)
%        [s fs] = wavread(wavs{fi}); disp(wavs{fi}); % read wav file
%        s = resample(s,8000,fs); % resample at 8 kHz
%        [smacs, FB] = smac(s,8000,FB,NC); % keep filter bank in FB
%        htkwrite(feats{fi}, smacs, 100000, 6); % save as HTK MFCC kind
%    end
%
%For more details see:
%P. Tsiakoulis, A. Potamianos, and D. Dimitriadis,
%Spectral moment features augmented by low order cepstral coefficients for robust ASR,
%IEEE Signal Processing Letters, June 2010.
%
%P. Tsiakoulis, Sep. 2009, (modified June 2012)
function [smacs, FB, sm1, sm1c, sm0] = smac(S, FS, FB, NC, CMS, WL, FP, PR)
% default paramters
count = 2;
if nargin<count, FS = 8000; end; count = count + 1;
if nargin<count, FB = [12 1024 4 1 1]; end; count = count + 1;
if nargin<count, NC = 2; end; count = count + 1;
if nargin<count, CMS = 1; end; count = count + 1;
if nargin<count, WL = 0.025; end; count = count + 1;
if nargin<count, FP = 0.010; end; count = count + 1;
if nargin<count, PR = 0.97; end; count = count + 1;

% initilize filterbank
if length(FB)>5
    [NB FL] = size(FB);
    FL = 2*FL;
else
    NB = FB(1);
    if length(FB)>=2, FL = FB(2); else FL = 1024; end
    if length(FB)>=3, BM = FB(3); else BM = 4; end
    if length(FB)>=4, MS = FB(4); else MS = 1; end
    if length(FB)>=5, BMod = FB(5); else BMod = 1; end
    FB = gaborfb(FS, NB, FL, BM, MS, BMod);
end

[m mi] = max(FB,[],2);
cfreq = FS*mi/FL;


% init
W = round(WL*FS);
F = round(FP*FS);

linsp = linspace(0, FS/2, FL/2);

% this needs to be re-examined
S = S/max(S);

% make vector
S = reshape(S, [1 length(S)]);

% Preemphasize here
if PR>0, S(2:end) = S(2:end) - PR*S(1:end-1); end

NFrames = length(1:F:(length(S)-W));

sm0 = zeros(NB, NFrames);
sm1 = zeros(NB, NFrames);
sm1c = zeros(NB, NFrames);

% frame analysis
for fri = 1:NFrames
    fridx = (F*(fri-1)+1):(F*(fri-1)+W);

    zz = abs(fft(S(fridx), FL));
    for g=1:NB
        z2 = zz(1:FL/2).*FB(g,:);
        z2 = z2.^2;
        sz2 = sum(z2);

        sm0(g,fri) = sz2;
        sm1(g,fri) = sum((linsp).*z2)/sz2;
        sm1c(g,fri) = sm1(g,fri) - cfreq(g);
    end
end
%sm1 = 2*pi*sm1/FS;
%sm1c = 2*pi*sm1c/FS;

% cep
sm0 = 10*log10(sm0);
sm0(isnan(sm0) | isinf(sm0)) = 0;
sm0 = mfb2cep(sm0', NC, 1)';
%cms
if CMS,
    m_sm0 = mean(sm0, 2);
    sm0 = sm0 - repmat(m_sm0, 1, size(sm0,2));
end

% smacs
if NC > 0
    smacs = [sm1c; sm0(end:-1:1,:)];
else
    smacs = sm1c;
end
smacs(isnan(smacs) | isinf(smacs)) = 0;

Helper functions

% gabor.m
% GABOR  Gabor Filter
%        Creates a Gabor filter described by F(frequency), A(bandwith) and S
%        The impulse resonse is truncated to N = (4/beta) + 1 points
% (each side) wher beta = A/S [ S is the sampling frequency ].
% The frequency response is sampled at 512 points (range [0,pi]).
%        corresponding to 1024 in the [0,2*pi] range
%
%
%            [h, H] = GABOR(F, A, S, N)
%
%h   impulse response exp(-[(A/S)*t]^2)*cos(2*pi*F/S*t)
%H   frequency response
%    sqrt(pi)/(2*A/S) { exp (-[(f-F)*pi/A]^2) + exp(...) }
%F,A the center frquency (Hz) and the bandwidth parameter (Hz)
%S   the sampling frequency
% A Potamianos, 8 Mar 94, GaTech
% P Tsiakoulis, 3 Jul 09, NTUA -> added nb
function [h, H] = gabor(fr, bw, sfr, nb)

if nargin<4, nb = 512; end

b = bw / sfr;
w_c = 2*pi*fr/sfr;

N = ceil((4/b) + 1) ;
n = -N:1:N;

h = exp(-(b*n).^2).*cos(w_c*n) ;

n = linspace(0,pi,nb);
H = sqrt(pi)/(2*b)*(exp(-((n-w_c)/(2*b)).^2)+exp(-((n+w_c)/(2*b)).^2));
%n = linspace(0,sfr/2,512);
%H = sqrt(pi)/(2*bw/sfr)*(exp(-((n-fr)*pi/bw).^2)+exp(-((n+fr)*pi/bw).^2));

% gaborfb.m
% P Tsiakoulis, 3 Jul 09
% P Tsiakoulis, 26 Nov 13, CUED -> added ms
function [H, F, h] = gaborfb(FS, NB, FL, BM, MS, BMod)

F = zeros(1, NB);

if MS
    f_mel_max = log10(1+(FS/2)/700);   % mel warped sfr/2
    F(1, 1:NB)=(10.^(f_mel_max*(1:NB)/(NB+1))-1)*700;
else
    tmp = (FS/2)/(NB+1);
    F = tmp:tmp:FS/2-tmp;
end

if BMod,
    bwtmp = [];
    bwtmp(1, 2:NB+1) = F;
    bwtmp = diff(bwtmp, 1, 2); % filter spacing
    bw = BM*bwtmp;
else
    bw(1:NB) = BM;
end

H = zeros(NB, FL/2);
h = cell(NB);
for g=1:NB
    [hg, Hg] = gabor(F(g),bw(g),FS, FL/2);
    H(g,:) = Hg;
    h{g} = hg;
end

% mfb2cep.m
% MFB2CEP  Tranform from spectral envelope to cepstrum domain   [potam]
%
%   USAGE:   c = mfb2cep(m,nc)
%
%   where c(t,bin) is the cepstrum coefficients time-frequency representation
%   (bin=[1...], i.e., NO c[0]) and m(t,f) is the (Mel) spectral envelope
%   time-frequency representation (nc is the number of Cep coefficients
%   INCLUDING c[0] (default: 13))
%
%  see also: LOADCEP, LOADMFB, CEP2MFB
% Alexandros Potamianos, AT&T Bell Labs, Dec 11, 1995
function c = mfb2cep(m,nc,c0)

%%%%% I/O %%%%%
if (nargin < 1)
   help mfb2cep;return;
end
if (nargin < 2)
   nc = 13;
end
if (nargin < 3)
   c0 = 0;
end
nf = size(m,2);
N = size(m,1);
if (nc>nf)
  disp(['Number of cepstrum coefficients reduced to ',num2str(nf)]);
  nc = nf-1;
end;
%%%%% TRANSFORM %%%%%           %%%% copy from ricks icostrans()
%%%%% init
w = pi/nf;
tmp = ones(nf,nc);
for i = 1:nc-1
tmp(:,i+1) = (1/nf)*cos(i*[0.5:nf-0.5]*w)';
end
if c0
    c = zeros(N,nc);
else
    c = zeros(N,nc-1);
end
%%%% do the deed
for i = 1:N
tt = (tmp'*m(i,:)');
%%%% equivalent implementation with FFT
%%%% (almost the same as above apart from the half-sample computation)
%tt = ifft([0 m(i,:) m(i,(nf-1):-1:1)]);
if c0
    c(i,:) = real(tt(1:nc)');     %%%% keep c0 and truncate
else
    c(i,:) = real(tt(2:nc)');     %%%% throw away c0 and truncate
end
end