SuperVectors.cpp

Go to the documentation of this file.
00001 /*
00002 This file is part of LIA_RAL which is a set of software based on ALIZE
00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL.
00004 
00005 LIA_RAL project is a development project was initiated by the computer
00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon -
00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it
00008 was supported by two national projects of the French Research Ministry:
00009         - TECHNOLANGUE program [http://www.technolangue.net]
00010         - MISTRAL program [http://mistral.univ-avignon.fr]
00011 
00012 LIA_RAL is free software: you can redistribute it and/or modify
00013 it under the terms of the GNU Lesser General Public License as
00014 published by the Free Software Foundation, either version 3 of
00015 the License, or any later version.
00016 
00017 LIA_RAL is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU Lesser General Public License for more details.
00021 
00022 You should have received a copy of the GNU Lesser General Public
00023 License along with LIA_RAL.
00024 If not, see [http://www.gnu.org/licenses/].
00025 
00026 The LIA team as well as the LIA_RAL project team wants to highlight the
00027 limits of voice authentication in a forensic context.
00028 The "Person Authentification by Voice: A Need of Caution" paper
00029 proposes a good overview of this point (cf. "Person
00030 Authentification by Voice: A Need of Caution", Bonastre J.F.,
00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00032 chagnolleau I., Eurospeech 2003, Genova].
00033 The conclusion of the paper of the paper is proposed bellow:
00034 [Currently, it is not possible to completely determine whether the
00035 similarity between two recordings is due to the speaker or to other
00036 factors, especially when: (a) the speaker does not cooperate, (b) there
00037 is no control over recording equipment, (c) recording conditions are not
00038 known, (d) one does not know whether the voice was disguised and, to a
00039 lesser extent, (e) the linguistic content of the message is not
00040 controlled. Caution and judgment must be exercised when applying speaker
00041 recognition techniques, whether human or automatic, to account for these
00042 uncontrolled factors. Under more constrained or calibrated situations,
00043 or as an aid for investigative purposes, judicious application of these
00044 techniques may be suitable, provided they are not considered as infallible.
00045 At the present time, there is no scientific process that enables one to
00046 uniquely characterize a persones voice or to identify with absolute
00047 certainty an individual from his or her voice.]
00048 
00049 Copyright (C) 2004-2010
00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr]
00051 LIA_RAL admin [alize@univ-avignon.fr]
00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_SuperVectors_cpp)
00056 #define ALIZE_SuperVectors_cpp
00057 
00058 #include "SuperVectors.h"
00059 #include<iostream>
00060 #include<fstream>
00061 #include<cstdio>
00062 #include<cassert>
00063 #include<cmath>
00064 
00065 using namespace alize;
00066 using namespace std;
00067 
00068 // comments in .h
00069 
00070 void modelToSv(const MixtureGD &M,RealVector <double> &v){
00071         unsigned long modelSize=M.getDistribCount();
00072         unsigned long vectSize=M.getVectSize(); 
00073         v.setSize(modelSize*vectSize);
00074         for (unsigned long i=0;i<modelSize;i++)
00075                 for (unsigned long j=0;j<vectSize;j++)
00076                         v[i*vectSize+j]=M.getDistrib(i).getMean(j);
00077 }
00078 
00079 void svToModel(RealVector  <double> &v, MixtureGD &M) {
00080         unsigned long modelSize=M.getDistribCount();
00081         unsigned long vectSize=M.getVectSize(); 
00082         for (unsigned long i=0;i<modelSize;i++)
00083                 for (unsigned long j=0;j<vectSize;j++)
00084                         M.getDistrib(i).setMean(v[i*vectSize+j],j);
00085 }
00086 
00087 /*void projectOnSubSpace(RealVector<double> &x, Matrix<double> &U, RealVector <double>&proj) {
00088         // trick is to transform x'=S'Sx in x'=(U(U'x))' so that no need to compute U explicitely
00089         Matrix<double> Utmp=U;
00090         if (U.rows() > U.cols())
00091                 Utmp.transpose();       
00092         Matrix<double> xt=((DoubleMatrix)x).transpose();
00093         Matrix<double> spkFactors=Utmp*xt;              
00094         if (verboseLevel > 1) {
00095                 cout <<  "(SuperVectors) SpkFactors: (";
00096                 for (unsigned long i=0;i<spkFactors.rows();i++)
00097                         cout <<i<<":"<<spkFactors(i,0)<<",";                    
00098         }
00099         if (verboseLevel > 2) cout << "(SuperVectors) SpkFactors: "<<spkFactors.rows() <<" " <<spkFactors.cols() << "...";
00100         Matrix<double> Ut=Utmp.transpose();
00101         Matrix<double>  offset=Ut*spkFactors;
00102         if (verboseLevel > 2) cout << "(SuperVectors) offset: "<<offset.rows() <<" " <<offset.cols() <<"...";
00103         for (unsigned long i=0;i<offset.rows();i++) {
00104                 proj[i]=offset(i,0);
00105         }
00106 }*/
00107 
00108 void projectOnSubSpace(RealVector<double> &x, Matrix<double> &U, RealVector <double>&proj) { //fast version
00109         // trick is to transform x'=S'Sx in x'=(U(U'x))' so that no need to compute U explicitely
00110         double *_u=U.getArray();
00111         RealVector <double> tmp;
00112         tmp.setSize(U.rows());
00113         tmp.setAllValues(0.0);
00114         proj.setAllValues(0.0);
00115         if (verboseLevel > 1) cout<<"x: ("<<x.size()<<") - U; ("<<U.rows()<<","<<U.cols()<<")"<<endl;
00116         for (unsigned long i=0;i<U.rows();i++) 
00117                 for (unsigned long j=0;j<U.cols();j++) 
00118                         tmp[i]+=_u[i*U.cols()+j]*x[j];
00119         U.transpose();
00120         _u=U.getArray();        
00121         for (unsigned long i=0;i<U.rows();i++) 
00122                 for (unsigned long j=0;j<U.cols();j++) 
00123                         proj[i]+=_u[i*U.cols()+j]*tmp[j];               
00124         U.transpose();
00125 }
00126 
00127 
00128 void computeNap(MixtureGD &M,Matrix <double> &U) {
00129         unsigned long svSize=M.getDistribCount()*M.getVectSize();       
00130         RealVector <double> v(svSize,svSize);
00131         v.setAllValues(0.0);
00132         RealVector <double> proj(svSize,svSize);                
00133         proj.setAllValues(0.0); 
00134         modelToSv(M,v);
00135         projectOnSubSpace(v,U,proj);
00136         v-=proj;
00137         svToModel(v,M);
00138 }
00139 
00140 
00141 // Estimate NAP channel effect with a fixed client supervector and put it into model
00142 // 1. Project client vector on test vector
00143 // 2. Project result on channel subspace
00144 // 3. Compute NAP on client model
00145 // 4. Add projection to client NAPed model
00146 // Three same function are coded but none of them work right now!
00147 void computeNAPChannelEffect(MixtureGD &T,MixtureGD &M,Matrix<double>&U) {
00148         unsigned long svSize=max(U.rows(),U.cols());
00149         RealVector <double> t(svSize,svSize);
00150         RealVector <double> m(svSize,svSize);
00151         RealVector <double> proj(svSize,svSize);
00152         RealVector <double> tmp(svSize,svSize); 
00153         RealVector <double> channel(svSize,svSize);     
00154         modelToSv(T,t);
00155         modelToSv(M,m); 
00156         double normT=0.0;
00157         double angle=0.0;
00158         for (unsigned long i=0;i<t.size();i++) // norm du test
00159                 normT+=t[i]*t[i];
00160         for (unsigned long i=0;i<t.size();i++) // produit scalaire
00161                 angle+=t[i]*m[i];
00162         if (verboseLevel > 1) cout << "# <t,m>/||t||2=" << angle/normT << endl;
00163         for (unsigned long i=0;i<t.size();i++) // norm du test
00164                 proj[i]=(angle/normT)*t[i];     
00165         projectOnSubSpace(m,U,tmp); // Channel effect for test
00166         RealVector <double> mNap=m;
00167         mNap-=tmp;      
00168         projectOnSubSpace(proj,U,channel);
00169         channel+=mNap;
00170         if (debug) {
00171                 double n=0.0;
00172                 double a=0.0;   
00173                 for (unsigned long i=0;i<t.size();i++) // norm du test
00174                         n+=channel[i]*channel[i];
00175                 for (unsigned long i=0;i<t.size();i++) // produit scalaire
00176                         a+=t[i]*channel[i];
00177                 cout << "# <model,test>/||model||2=" << a/n << endl;    
00178         }
00179         svToModel(channel,M);
00180 }
00181 
00182 // Get an estimate of channel effect for a model. 
00183 //The image of projection of the SV client on the test vector is computed and the channel effect 
00184 // estimate is done on this result
00185 // This is computed with the Thales theorem
00186 /*void computeNAPChannelEffect(MixtureGD &T,MixtureGD &M,Matrix<double>&U) {
00187         unsigned long svSize=max(U.rows(),U.cols());
00188         RealVector <double> t(svSize,svSize);
00189         RealVector <double> m(svSize,svSize);
00190         RealVector <double> proj(svSize,svSize);
00191         RealVector <double> tmp(svSize,svSize);
00192         RealVector <double> channel(svSize,svSize);     
00193 
00194         modelToSv(T,t);
00195         modelToSv(M,m); 
00196         projectOnSubSpace(t,U,proj); // Channel effect for test
00197         RealVector <double> tNap=t;
00198         tNap-=proj;
00199         projectOnSubSpace(m,U,tmp); // Channel effect for test
00200         RealVector <double> mNap=m;
00201         mNap-=tmp;
00202 
00203         double normT=0.0;
00204         double normM=0.0;
00205         for (unsigned long i=0;i<t.size();i++) // norm du test
00206                 normT+=tNap[i]*tNap[i];
00207         for (unsigned long i=0;i<m.size();i++) // produit scalaire
00208                 normM+=mNap[i]*mNap[i];
00209         cout << "# Expanding factor [" << sqrt(normM/normT) <<"]"<< endl;
00210         for (unsigned long i=0;i<channel.size();i++)
00211                 channel[i]=sqrt(normM/normT)*proj[i];   
00212 
00213         channel+=mNap;
00214         double n=0.0;
00215         double a=0.0;   
00216         for (unsigned long i=0;i<t.size();i++) // norm du test
00217                 n+=channel[i]*channel[i];
00218         for (unsigned long i=0;i<t.size();i++) // produit scalaire
00219                 a+=t[i]*channel[i];
00220         cout << "# <model,test>/||model||2=" << a/n << endl;            
00221         svToModel(channel,M);
00222 }*/
00223 
00224 // Add UUt Mt to the napped SV for each model (same channel for every model)
00225 /*void computeNAPChannelEffect(MixtureGD &T,MixtureGD &M,Matrix<double>&U) {
00226         unsigned long svSize=max(U.rows(),U.cols());
00227         RealVector <double> t(svSize,svSize);
00228         RealVector <double> m(svSize,svSize);
00229         RealVector <double> channel(svSize,svSize);
00230         modelToSv(T,t);
00231         projectOnSubSpace(t,U,channel); 
00232         computeNap(M,U);        
00233         modelToSv(M,m);         
00234         channel+=m;
00235         svToModel(channel,M);
00236 }*/
00237 
00238 //------------------------------------------------------------------------
00239 // Weight part of fisher kernel -- Nicolas SCHEFFER (deprecated)
00240 void getFisherWeightVector(const MixtureGD& world, const MixtureGD& clientMixture, RealVector<double> & v,Config& config) {
00241   double w,c;
00242   for (unsigned long i=0;i<world.getDistribCount();i++) {
00243     w=world.weight(i);
00244     c=clientMixture.weight(i);
00245     //v[i]=c/sqrt(w);
00246         v[i]=c/w;
00247   }
00248 }
00249 
00250 
00251 //------------------------------------------------------------------------
00252 // Weight part of fisher kernel -- Nicolas SCHEFFER (deprecated)
00253 void getKLVector(MixtureGD& model, RealVector<double> & v,Config& config) {
00254         unsigned long mSize=model.getDistribCount();
00255         unsigned long vSize=model.getVectSize();
00256         for (unsigned long i=0;i<mSize;i++) {
00257                 DistribGD & d=model.getDistrib(i);
00258                 double w=model.weight(i);
00259                 for (unsigned long j=0;j<vSize;j++) {
00260                         //v[i*vSize+j]=d.getMean(j)*(sqrt(w*d.getCovInv(j)));
00261                         v[i*vSize+j]=d.getMean(j)*(sqrt(w*d.getCovInv(j)));
00262                 }
00263         }
00264 }
00265 
00266 void getSuperVector(RealVector<double> &v,MixtureGD &aprioriModel,MixtureGD &clientMixture,Config &config) {
00267         if (config.getParam("superVector")=="SVMUBM"){ //weight supervector
00268                 v.setSize(clientMixture.getDistribCount(),true);
00269                 getFisherWeightVector(aprioriModel,clientMixture,v,config); // For an SVM/UBM system (have to force client Mixture to be ML estimate MAPConst, alpha=1);
00270             }
00271         else if (config.getParam("superVector")=="KL") {//kl supervector
00272                 v.setSize(clientMixture.getDistribCount()*clientMixture.getVectSize(),true);            
00273                 getKLVector(clientMixture,v,config);
00274         }
00275         else throw Exception("Cannot find supervector mode [kl|svmubm]",__FILE__,__LINE__);
00276 }
00277 
00278 #endif