00001 /* 00002 This file is part of LIA_RAL which is a set of software based on ALIZE 00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL. 00004 00005 LIA_RAL project is a development project was initiated by the computer 00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon - 00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it 00008 was supported by two national projects of the French Research Ministry: 00009 - TECHNOLANGUE program [http://www.technolangue.net] 00010 - MISTRAL program [http://mistral.univ-avignon.fr] 00011 00012 LIA_RAL is free software: you can redistribute it and/or modify 00013 it under the terms of the GNU Lesser General Public License as 00014 published by the Free Software Foundation, either version 3 of 00015 the License, or any later version. 00016 00017 LIA_RAL is distributed in the hope that it will be useful, 00018 but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 GNU Lesser General Public License for more details. 00021 00022 You should have received a copy of the GNU Lesser General Public 00023 License along with LIA_RAL. 00024 If not, see [http://www.gnu.org/licenses/]. 00025 00026 The LIA team as well as the LIA_RAL project team wants to highlight the 00027 limits of voice authentication in a forensic context. 00028 The "Person Authentification by Voice: A Need of Caution" paper 00029 proposes a good overview of this point (cf. "Person 00030 Authentification by Voice: A Need of Caution", Bonastre J.F., 00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin- 00032 chagnolleau I., Eurospeech 2003, Genova]. 00033 The conclusion of the paper of the paper is proposed bellow: 00034 [Currently, it is not possible to completely determine whether the 00035 similarity between two recordings is due to the speaker or to other 00036 factors, especially when: (a) the speaker does not cooperate, (b) there 00037 is no control over recording equipment, (c) recording conditions are not 00038 known, (d) one does not know whether the voice was disguised and, to a 00039 lesser extent, (e) the linguistic content of the message is not 00040 controlled. Caution and judgment must be exercised when applying speaker 00041 recognition techniques, whether human or automatic, to account for these 00042 uncontrolled factors. Under more constrained or calibrated situations, 00043 or as an aid for investigative purposes, judicious application of these 00044 techniques may be suitable, provided they are not considered as infallible. 00045 At the present time, there is no scientific process that enables one to 00046 uniquely characterize a persones voice or to identify with absolute 00047 certainty an individual from his or her voice.] 00048 00049 Copyright (C) 2004-2010 00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr] 00051 LIA_RAL admin [alize@univ-avignon.fr] 00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr] 00053 */ 00054 00055 #if !defined(ALIZE_SuperVectors_cpp) 00056 #define ALIZE_SuperVectors_cpp 00057 00058 #include "SuperVectors.h" 00059 #include<iostream> 00060 #include<fstream> 00061 #include<cstdio> 00062 #include<cassert> 00063 #include<cmath> 00064 00065 using namespace alize; 00066 using namespace std; 00067 00068 // comments in .h 00069 00070 void modelToSv(const MixtureGD &M,RealVector <double> &v){ 00071 unsigned long modelSize=M.getDistribCount(); 00072 unsigned long vectSize=M.getVectSize(); 00073 v.setSize(modelSize*vectSize); 00074 for (unsigned long i=0;i<modelSize;i++) 00075 for (unsigned long j=0;j<vectSize;j++) 00076 v[i*vectSize+j]=M.getDistrib(i).getMean(j); 00077 } 00078 00079 void svToModel(RealVector <double> &v, MixtureGD &M) { 00080 unsigned long modelSize=M.getDistribCount(); 00081 unsigned long vectSize=M.getVectSize(); 00082 for (unsigned long i=0;i<modelSize;i++) 00083 for (unsigned long j=0;j<vectSize;j++) 00084 M.getDistrib(i).setMean(v[i*vectSize+j],j); 00085 } 00086 00087 /*void projectOnSubSpace(RealVector<double> &x, Matrix<double> &U, RealVector <double>&proj) { 00088 // trick is to transform x'=S'Sx in x'=(U(U'x))' so that no need to compute U explicitely 00089 Matrix<double> Utmp=U; 00090 if (U.rows() > U.cols()) 00091 Utmp.transpose(); 00092 Matrix<double> xt=((DoubleMatrix)x).transpose(); 00093 Matrix<double> spkFactors=Utmp*xt; 00094 if (verboseLevel > 1) { 00095 cout << "(SuperVectors) SpkFactors: ("; 00096 for (unsigned long i=0;i<spkFactors.rows();i++) 00097 cout <<i<<":"<<spkFactors(i,0)<<","; 00098 } 00099 if (verboseLevel > 2) cout << "(SuperVectors) SpkFactors: "<<spkFactors.rows() <<" " <<spkFactors.cols() << "..."; 00100 Matrix<double> Ut=Utmp.transpose(); 00101 Matrix<double> offset=Ut*spkFactors; 00102 if (verboseLevel > 2) cout << "(SuperVectors) offset: "<<offset.rows() <<" " <<offset.cols() <<"..."; 00103 for (unsigned long i=0;i<offset.rows();i++) { 00104 proj[i]=offset(i,0); 00105 } 00106 }*/ 00107 00108 void projectOnSubSpace(RealVector<double> &x, Matrix<double> &U, RealVector <double>&proj) { //fast version 00109 // trick is to transform x'=S'Sx in x'=(U(U'x))' so that no need to compute U explicitely 00110 double *_u=U.getArray(); 00111 RealVector <double> tmp; 00112 tmp.setSize(U.rows()); 00113 tmp.setAllValues(0.0); 00114 proj.setAllValues(0.0); 00115 if (verboseLevel > 1) cout<<"x: ("<<x.size()<<") - U; ("<<U.rows()<<","<<U.cols()<<")"<<endl; 00116 for (unsigned long i=0;i<U.rows();i++) 00117 for (unsigned long j=0;j<U.cols();j++) 00118 tmp[i]+=_u[i*U.cols()+j]*x[j]; 00119 U.transpose(); 00120 _u=U.getArray(); 00121 for (unsigned long i=0;i<U.rows();i++) 00122 for (unsigned long j=0;j<U.cols();j++) 00123 proj[i]+=_u[i*U.cols()+j]*tmp[j]; 00124 U.transpose(); 00125 } 00126 00127 00128 void computeNap(MixtureGD &M,Matrix <double> &U) { 00129 unsigned long svSize=M.getDistribCount()*M.getVectSize(); 00130 RealVector <double> v(svSize,svSize); 00131 v.setAllValues(0.0); 00132 RealVector <double> proj(svSize,svSize); 00133 proj.setAllValues(0.0); 00134 modelToSv(M,v); 00135 projectOnSubSpace(v,U,proj); 00136 v-=proj; 00137 svToModel(v,M); 00138 } 00139 00140 00141 // Estimate NAP channel effect with a fixed client supervector and put it into model 00142 // 1. Project client vector on test vector 00143 // 2. Project result on channel subspace 00144 // 3. Compute NAP on client model 00145 // 4. Add projection to client NAPed model 00146 // Three same function are coded but none of them work right now! 00147 void computeNAPChannelEffect(MixtureGD &T,MixtureGD &M,Matrix<double>&U) { 00148 unsigned long svSize=max(U.rows(),U.cols()); 00149 RealVector <double> t(svSize,svSize); 00150 RealVector <double> m(svSize,svSize); 00151 RealVector <double> proj(svSize,svSize); 00152 RealVector <double> tmp(svSize,svSize); 00153 RealVector <double> channel(svSize,svSize); 00154 modelToSv(T,t); 00155 modelToSv(M,m); 00156 double normT=0.0; 00157 double angle=0.0; 00158 for (unsigned long i=0;i<t.size();i++) // norm du test 00159 normT+=t[i]*t[i]; 00160 for (unsigned long i=0;i<t.size();i++) // produit scalaire 00161 angle+=t[i]*m[i]; 00162 if (verboseLevel > 1) cout << "# <t,m>/||t||2=" << angle/normT << endl; 00163 for (unsigned long i=0;i<t.size();i++) // norm du test 00164 proj[i]=(angle/normT)*t[i]; 00165 projectOnSubSpace(m,U,tmp); // Channel effect for test 00166 RealVector <double> mNap=m; 00167 mNap-=tmp; 00168 projectOnSubSpace(proj,U,channel); 00169 channel+=mNap; 00170 if (debug) { 00171 double n=0.0; 00172 double a=0.0; 00173 for (unsigned long i=0;i<t.size();i++) // norm du test 00174 n+=channel[i]*channel[i]; 00175 for (unsigned long i=0;i<t.size();i++) // produit scalaire 00176 a+=t[i]*channel[i]; 00177 cout << "# <model,test>/||model||2=" << a/n << endl; 00178 } 00179 svToModel(channel,M); 00180 } 00181 00182 // Get an estimate of channel effect for a model. 00183 //The image of projection of the SV client on the test vector is computed and the channel effect 00184 // estimate is done on this result 00185 // This is computed with the Thales theorem 00186 /*void computeNAPChannelEffect(MixtureGD &T,MixtureGD &M,Matrix<double>&U) { 00187 unsigned long svSize=max(U.rows(),U.cols()); 00188 RealVector <double> t(svSize,svSize); 00189 RealVector <double> m(svSize,svSize); 00190 RealVector <double> proj(svSize,svSize); 00191 RealVector <double> tmp(svSize,svSize); 00192 RealVector <double> channel(svSize,svSize); 00193 00194 modelToSv(T,t); 00195 modelToSv(M,m); 00196 projectOnSubSpace(t,U,proj); // Channel effect for test 00197 RealVector <double> tNap=t; 00198 tNap-=proj; 00199 projectOnSubSpace(m,U,tmp); // Channel effect for test 00200 RealVector <double> mNap=m; 00201 mNap-=tmp; 00202 00203 double normT=0.0; 00204 double normM=0.0; 00205 for (unsigned long i=0;i<t.size();i++) // norm du test 00206 normT+=tNap[i]*tNap[i]; 00207 for (unsigned long i=0;i<m.size();i++) // produit scalaire 00208 normM+=mNap[i]*mNap[i]; 00209 cout << "# Expanding factor [" << sqrt(normM/normT) <<"]"<< endl; 00210 for (unsigned long i=0;i<channel.size();i++) 00211 channel[i]=sqrt(normM/normT)*proj[i]; 00212 00213 channel+=mNap; 00214 double n=0.0; 00215 double a=0.0; 00216 for (unsigned long i=0;i<t.size();i++) // norm du test 00217 n+=channel[i]*channel[i]; 00218 for (unsigned long i=0;i<t.size();i++) // produit scalaire 00219 a+=t[i]*channel[i]; 00220 cout << "# <model,test>/||model||2=" << a/n << endl; 00221 svToModel(channel,M); 00222 }*/ 00223 00224 // Add UUt Mt to the napped SV for each model (same channel for every model) 00225 /*void computeNAPChannelEffect(MixtureGD &T,MixtureGD &M,Matrix<double>&U) { 00226 unsigned long svSize=max(U.rows(),U.cols()); 00227 RealVector <double> t(svSize,svSize); 00228 RealVector <double> m(svSize,svSize); 00229 RealVector <double> channel(svSize,svSize); 00230 modelToSv(T,t); 00231 projectOnSubSpace(t,U,channel); 00232 computeNap(M,U); 00233 modelToSv(M,m); 00234 channel+=m; 00235 svToModel(channel,M); 00236 }*/ 00237 00238 //------------------------------------------------------------------------ 00239 // Weight part of fisher kernel -- Nicolas SCHEFFER (deprecated) 00240 void getFisherWeightVector(const MixtureGD& world, const MixtureGD& clientMixture, RealVector<double> & v,Config& config) { 00241 double w,c; 00242 for (unsigned long i=0;i<world.getDistribCount();i++) { 00243 w=world.weight(i); 00244 c=clientMixture.weight(i); 00245 //v[i]=c/sqrt(w); 00246 v[i]=c/w; 00247 } 00248 } 00249 00250 00251 //------------------------------------------------------------------------ 00252 // Weight part of fisher kernel -- Nicolas SCHEFFER (deprecated) 00253 void getKLVector(MixtureGD& model, RealVector<double> & v,Config& config) { 00254 unsigned long mSize=model.getDistribCount(); 00255 unsigned long vSize=model.getVectSize(); 00256 for (unsigned long i=0;i<mSize;i++) { 00257 DistribGD & d=model.getDistrib(i); 00258 double w=model.weight(i); 00259 for (unsigned long j=0;j<vSize;j++) { 00260 //v[i*vSize+j]=d.getMean(j)*(sqrt(w*d.getCovInv(j))); 00261 v[i*vSize+j]=d.getMean(j)*(sqrt(w*d.getCovInv(j))); 00262 } 00263 } 00264 } 00265 00266 void getSuperVector(RealVector<double> &v,MixtureGD &aprioriModel,MixtureGD &clientMixture,Config &config) { 00267 if (config.getParam("superVector")=="SVMUBM"){ //weight supervector 00268 v.setSize(clientMixture.getDistribCount(),true); 00269 getFisherWeightVector(aprioriModel,clientMixture,v,config); // For an SVM/UBM system (have to force client Mixture to be ML estimate MAPConst, alpha=1); 00270 } 00271 else if (config.getParam("superVector")=="KL") {//kl supervector 00272 v.setSize(clientMixture.getDistribCount()*clientMixture.getVectSize(),true); 00273 getKLVector(clientMixture,v,config); 00274 } 00275 else throw Exception("Cannot find supervector mode [kl|svmubm]",__FILE__,__LINE__); 00276 } 00277 00278 #endif
1.7.2