ScoreWarp.cpp

Go to the documentation of this file.
00001 /*
00002 This file is part of LIA_RAL which is a set of software based on ALIZE
00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL.
00004 
00005 LIA_RAL project is a development project was initiated by the computer
00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon -
00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it
00008 was supported by two national projects of the French Research Ministry:
00009         - TECHNOLANGUE program [http://www.technolangue.net]
00010         - MISTRAL program [http://mistral.univ-avignon.fr]
00011 
00012 LIA_RAL is free software: you can redistribute it and/or modify
00013 it under the terms of the GNU Lesser General Public License as
00014 published by the Free Software Foundation, either version 3 of
00015 the License, or any later version.
00016 
00017 LIA_RAL is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU Lesser General Public License for more details.
00021 
00022 You should have received a copy of the GNU Lesser General Public
00023 License along with LIA_RAL.
00024 If not, see [http://www.gnu.org/licenses/].
00025 
00026 The LIA team as well as the LIA_RAL project team wants to highlight the
00027 limits of voice authentication in a forensic context.
00028 The "Person Authentification by Voice: A Need of Caution" paper
00029 proposes a good overview of this point (cf. "Person
00030 Authentification by Voice: A Need of Caution", Bonastre J.F.,
00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00032 chagnolleau I., Eurospeech 2003, Genova].
00033 The conclusion of the paper of the paper is proposed bellow:
00034 [Currently, it is not possible to completely determine whether the
00035 similarity between two recordings is due to the speaker or to other
00036 factors, especially when: (a) the speaker does not cooperate, (b) there
00037 is no control over recording equipment, (c) recording conditions are not
00038 known, (d) one does not know whether the voice was disguised and, to a
00039 lesser extent, (e) the linguistic content of the message is not
00040 controlled. Caution and judgment must be exercised when applying speaker
00041 recognition techniques, whether human or automatic, to account for these
00042 uncontrolled factors. Under more constrained or calibrated situations,
00043 or as an aid for investigative purposes, judicious application of these
00044 techniques may be suitable, provided they are not considered as infallible.
00045 At the present time, there is no scientific process that enables one to
00046 uniquely characterize a persones voice or to identify with absolute
00047 certainty an individual from his or her voice.]
00048 
00049 Copyright (C) 2004-2010
00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr]
00051 LIA_RAL admin [alize@univ-avignon.fr]
00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_ScoreWarp_cpp)
00056 #define ALIZE_ScoreWarp_cpp
00057  
00058 #include <iostream>
00059 #include <fstream>
00060 #include <cstdio>
00061 #include <cassert>
00062 #include <cmath>
00063 #include "TrainTools.h"
00064 #include "ScoreWarp.h"
00065 const double PI2 = 3.14159265358979323846*2;
00066 static double x1;
00067 static double x2;
00068 void boxMullerGeneratorInit(){
00069  x1= (rand()/ (float)RAND_MAX);
00070 }
00071 double boxMullerGenerator(double mean, double cov){
00072   x2=x1;
00073   x1= (rand()/ (float)RAND_MAX);
00074   if (debug) cout <<"boxMullerGenerator x1["<<x1<<"] x2["<<x2<<"]"<<endl;
00075   double y= sqrt(-2.0*log(x1))*cos(PI2*x2);   // Box-Muller gaussian generator from 2 sample of [0,1] indep. random numbers
00076   double ret= (y*cov)+mean;                        // fit to the N(mean,cov) distribution
00077   if (debug) cout <<"boxMullerGenerator["<<ret<<"]"<<endl;
00078   return ret;
00079 }
00080 
00081 
00082 // Build the Gaussian target distribution (mean,cov), by generating nbSample data, specifying the number of bins
00083 Histo makeGausHisto(unsigned long nbSample,double mean, double cov,unsigned long nbBins){
00084   if (verbose) cout << "makeGaussHisto, nbSample["<<nbSample<<"] mean["<<mean<<"] cov["<<cov<<"] nbBin["<<nbBins<<"]"<<endl;
00085   Histo histo(nbBins);
00086   boxMullerGeneratorInit();       // Init The box-muller gaussian number generator
00087   for (unsigned long i=0;i<nbSample;i++)
00088     histo.accumulateValue(boxMullerGenerator(mean,cov));
00089   histo.computeHisto(); 
00090   if (verbose){
00091     double tot=0;
00092     for (unsigned long i=0;i<histo.size();i++) 
00093       tot+=histo.count(i)*(histo.higherBound(i)-histo.lowerBound(i));
00094     cout <<"makeGaussHisto tot of the final pdf["<<tot<<"]"<<endl;
00095   }
00096   return histo;
00097 }
00098 
00099 
00100 double centralSpace(const Histo &warpH,double a){
00101   if (a==0) return 0;
00102   unsigned long inf,sup;
00103   double t=(1.0-a)/2.0;
00104   inf=0;
00105   for (double tot=0.0;(inf<warpH.size()) && (tot<t);inf++)
00106     tot+=areaHisto(warpH,inf);
00107   sup=warpH.size()-1;
00108   for (double tot=1.0;(sup>=0) && (tot>1-t);sup--)
00109     tot-=areaHisto(warpH,sup);
00110   return (warpH.lowerBound(sup)-warpH.higherBound(inf));
00111 }
00112 
00113 // Compute the warped score, using the raw score distribution warH and the destination distribution destH
00114 double scoreWarping(double score, const Histo& warpH, const Histo& destH, double nonObserved,double refArea)
00115 {
00116   if (debug) cout << "scoreWarping score ["<<score<<"] nonObserved["<<nonObserved<<"] refArea["<<refArea<<"] ";
00117   // Value before the min, after the max
00118   if (score<warpH.lowerBound(0)){                // the value is less than the minimum of the raw score distribution 
00119     double infBound=warpH.lowerBound(0)-centralSpace(warpH,refArea);
00120     return destH.lowerBound(0)-(linearInterpolation(score,infBound,warpH.lowerBound(0))*centralSpace(destH,refArea));
00121   } 
00122   if (score>warpH.higherBound(warpH.size()-1)){  // the value is more than the maximum of the raw distribution
00123     double supBound=warpH.higherBound(warpH.size()-1)+centralSpace(warpH,refArea);
00124     return destH.higherBound(destH.size()-1)+(linearInterpolation(score,warpH.higherBound(warpH.size()-1),supBound)*centralSpace(destH,refArea)); 
00125   }
00126   
00127   double totalWarp=0.0;
00128   // Compute the area between -infinite and the score (raw distrib) - totalWarp
00129   unsigned long idxW=0;                          // Will be the bin number where is the score (in the raw distrib)
00130   for(;(idxW<warpH.size())&&(warpH.higherBound(idxW)<score); idxW++);
00131   if (debug) cout << " idxW["<<idxW<<"]";
00132   totalWarp=nonObserved/2;                       // nonObserved is the estimated amount of data non seen in the set
00133   for (unsigned long idx=0;idx<idxW;idx++)       // Accumulate the area for the raw distrib (numerical integration)
00134     totalWarp+=areaHisto(warpH,idx);             // Without the last bin
00135   double percentW=linearInterpolation(score,warpH.lowerBound(idxW),warpH.higherBound(idxW));
00136   if (debug) cout << " percentW["<<percentW<<"]";
00137   totalWarp+=areaHisto(warpH,idxW)*percentW;    // Add a percentage of the last bin (linear interpolation)
00138   if (debug) cout << " totalW["<<totalWarp<<"]";
00139 
00140   // Find idxD, the index of the corresponding bin (area bin[-infinite] until bin[idxD]=totalWarp
00141   double totalDest;
00142   unsigned long idxD; 
00143   for (idxD=0,totalDest=0;(idxD<destH.size())&&(totalDest<totalWarp);idxD++)
00144     totalDest+=areaHisto(destH,idxD);
00145   if (idxD==destH.size()){
00146     idxD--;
00147     totalDest-=areaHisto(destH,idxD);
00148   }
00149   else
00150     if (idxD>0){
00151       totalDest-=areaHisto(destH,idxD);
00152       idxD--;
00153     }    
00154   // Compute the final score
00155   double ret=destH.lowerBound(idxD);                                                       // Set the ret to the lowerbound of the good bin
00156   double percentH=linearInterpolation(totalWarp,totalDest,areaHisto(destH,idxD)+totalDest);// Compute the linear interpolation % for the last bin
00157   if (debug) cout << " idxD["<<idxD<<"] TotalDet["<<totalDest<<"] PercentD["<<percentH<<"]";
00158   ret+=(destH.higherBound(idxD)-destH.lowerBound(idxD))*percentH;                          // apply the interpolation
00159   if (debug) cout <<" final["<<ret<<"]"<<endl;
00160   return ret;
00161 }
00162  
00163 
00164 // Compute the warped value using score, data histo warpH,
00165 // target histo destH
00166 // it assumes that all the values are in warpH bounds
00167 
00168 double warping(double score, const Histo& warpH, const Histo& destH)
00169 {
00170   if (debug) cout << "warping input ["<<score<<"] ";
00171   
00172   double totalWarp=0.0;
00173   // Compute the area between -infinite and the score (raw distrib) - totalWarp
00174   unsigned long idxW=0;                          // Will be the bin number where is the score (in the raw distrib)
00175   for(;(idxW<warpH.size())&&(warpH.higherBound(idxW)<score); idxW++);
00176   if (debug) cout << " idxW["<<idxW<<"] ";
00177 
00178   for (unsigned long idx=0;idx<idxW;idx++)       // Accumulate the area for the raw distrib (numerical integration)
00179     totalWarp+=areaHisto(warpH,idx);             // Without the last bin
00180   double percentW=linearInterpolation(score,warpH.lowerBound(idxW),warpH.higherBound(idxW));
00181   if (debug) cout << " percentW["<<percentW<<"] ";
00182   totalWarp+=areaHisto(warpH,idxW)*percentW;    // Add a percentage of the last bin (linear interpolation)
00183   if (debug) cout << " totalW["<<totalWarp<<"] ";
00184 
00185   // Find idxD, the index of the corresponding bin (area bin[-infinite] until bin[idxD]=totalWarp
00186   double totalDest;
00187   unsigned long idxD; 
00188   for (idxD=0,totalDest=0;(idxD<destH.size())&&(totalDest<totalWarp);idxD++)
00189     totalDest+=areaHisto(destH,idxD);
00190   if (idxD==destH.size()){
00191     idxD--;
00192     totalDest-=areaHisto(destH,idxD);
00193   }
00194   else
00195     if (idxD>0){
00196       totalDest-=areaHisto(destH,idxD);
00197       idxD--;
00198     }    
00199   // Compute the final value
00200   double ret=destH.lowerBound(idxD);                                                       // Set the ret to the lowerbound of the good bin
00201   double percentH=linearInterpolation(totalWarp,totalDest,areaHisto(destH,idxD)+totalDest);// Compute the linear interpolation % for the last bin
00202   if (debug) cout << " idxD["<<idxD<<"] TotalDet["<<totalDest<<"] Percent last bin["<<percentH<<"] ";
00203   ret+=(destH.higherBound(idxD)-destH.lowerBound(idxD))*percentH;                          // apply the interpolation
00204   if (debug) cout <<" warping output["<<ret<<"] "<<endl;
00205   return ret;
00206 }
00207 
00208 #endif // !defined(ALIZE_ScoreWarp_cpp)