EnergyDetector.cpp

Go to the documentation of this file.
00001 /*
00002 This file is part of LIA_RAL which is a set of software based on ALIZE
00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL.
00004 
00005 LIA_RAL project is a development project was initiated by the computer
00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon -
00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it
00008 was supported by two national projects of the French Research Ministry:
00009         - TECHNOLANGUE program [http://www.technolangue.net]
00010         - MISTRAL program [http://mistral.univ-avignon.fr]
00011 
00012 LIA_RAL is free software: you can redistribute it and/or modify
00013 it under the terms of the GNU Lesser General Public License as
00014 published by the Free Software Foundation, either version 3 of
00015 the License, or any later version.
00016 
00017 LIA_RAL is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU Lesser General Public License for more details.
00021 
00022 You should have received a copy of the GNU Lesser General Public
00023 License along with LIA_RAL.
00024 If not, see [http://www.gnu.org/licenses/].
00025 
00026 The LIA team as well as the LIA_RAL project team wants to highlight the
00027 limits of voice authentication in a forensic context.
00028 The "Person Authentification by Voice: A Need of Caution" paper
00029 proposes a good overview of this point (cf. "Person
00030 Authentification by Voice: A Need of Caution", Bonastre J.F.,
00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00032 chagnolleau I., Eurospeech 2003, Genova].
00033 The conclusion of the paper of the paper is proposed bellow:
00034 [Currently, it is not possible to completely determine whether the
00035 similarity between two recordings is due to the speaker or to other
00036 factors, especially when: (a) the speaker does not cooperate, (b) there
00037 is no control over recording equipment, (c) recording conditions are not
00038 known, (d) one does not know whether the voice was disguised and, to a
00039 lesser extent, (e) the linguistic content of the message is not
00040 controlled. Caution and judgment must be exercised when applying speaker
00041 recognition techniques, whether human or automatic, to account for these
00042 uncontrolled factors. Under more constrained or calibrated situations,
00043 or as an aid for investigative purposes, judicious application of these
00044 techniques may be suitable, provided they are not considered as infallible.
00045 At the present time, there is no scientific process that enables one to
00046 uniquely characterize a persones voice or to identify with absolute
00047 certainty an individual from his or her voice.]
00048 
00049 Copyright (C) 2004-2010
00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr]
00051 LIA_RAL admin [alize@univ-avignon.fr]
00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_EnergyDetector_cpp)
00056 #define ALIZE_EnergyDetector_cpp
00057 
00058 #include <iostream>
00059 #include <fstream>  
00060 #include <cassert> 
00061 #include <cmath>
00062 #include <liatools.h>
00063 #include "SegTools.h"
00064 #include "EnergyDetector.h"
00065 
00066 #define DirectInterpolation 1
00067 
00068 using namespace alize;
00069 using namespace std;
00070 
00071 // Plot the enrgy model
00072 void plotEnergyDistrib(MixtureGD &mixt)
00073 {
00074   unsigned long distribCount = mixt.getDistribCount();
00075   cout << "EnergyModel"<<endl;
00076   for (unsigned long c=0; c<distribCount; c++)
00077     cout << "Component["<<c<<"] Mean["<<mixt.getDistrib(c).getMean(0)<<
00078                  "] Cov["<<mixt.getDistrib(c).getCov(0)<<"] Weight["<<mixt.weight(c)<<"]"<<endl;
00079 }       
00080 //---------------------------------------------------------------------------------------------------------------
00081 // find the highest energy component...
00082 unsigned long findMaxEnergyDistrib(MixtureGD &mixt)
00083 {
00084   unsigned long distribCount = mixt.getDistribCount();
00085   unsigned long cmpMax=0;
00086   for (unsigned long c=1; c<distribCount; c++)
00087     if (mixt.getDistrib(c).getMean(0)>mixt.getDistrib(cmpMax).getMean(0))
00088       cmpMax=c;
00089   if (verbose) cout << "Highest component["<<cmpMax<<"] Mean["<<mixt.getDistrib(cmpMax).getMean(0)<<
00090                  "] Cov["<<mixt.getDistrib(cmpMax).getCov(0)<<"] Weight["<<mixt.weight(cmpMax)<<"]"<<endl;
00091   return cmpMax;
00092 }       
00093 // find the lowest energy component...
00094 unsigned long findMinEnergyDistrib(MixtureGD &mixt)
00095 {
00096   unsigned long distribCount = mixt.getDistribCount();
00097   unsigned long cmpMin=0;
00098   for (unsigned long c=1; c<distribCount; c++)
00099     if (mixt.getDistrib(c).getMean(0)<mixt.getDistrib(cmpMin).getMean(0))
00100       cmpMin=c;
00101   if (verbose) cout << "Lowest component["<<cmpMin<<"] Mean["<<mixt.getDistrib(cmpMin).getMean(0)<<
00102                  "] Cov["<<mixt.getDistrib(cmpMin).getCov(0)<<"] Weight["<<mixt.weight(cmpMin)<<"]"<<endl;
00103   return cmpMin; 
00104 }       
00105 
00106 double computeEnergyThreshold(FeatureServer & fs,double pSelect,unsigned long nbBins=100)
00107 {
00108   Histo histo(nbBins);                                             // Create an histo accumulator with 100 bins
00109   Feature f;                                                     // reset the reader at the begin of the input stream
00110   fs.reset();                                                     // feature server reset 
00111 for (unsigned long ind=0;fs.readFeature(f); ind++) // feature loop
00112     histo.accumulateValue(f[0]);                               // Accumulate the energy in the histo Accumulator
00113   histo.computeHisto();                                           // Compute the histo
00114   long i=nbBins-1;  
00115   real_t count=0;
00116 while((i>=0) && (count<=pSelect)){                               // Find the bin corresponding to the percentage of data wanted
00117     count+=histo.count(i)*(histo.higherBound(i)-histo.lowerBound(i));
00118     i--;
00119  }
00120   double threshold;
00121   if (i>=0) threshold=histo.higherBound(i);                        // Set the threshold to the higherBound of the next bin
00122   else threshold=histo.lowerBound(0);                              // if 100% of data should be selected
00123   if (verbose)  cout << "Percentage wanted["<<(int) (pSelect*100.0) <<"]Energy threshold["<<threshold<<"]"<<endl;
00124   return threshold;     
00125 }
00126 
00127 // Build the segments with the energized frames
00128 unsigned long selectFrames(FeatureServer &fs,SegServer & segServer,double threshold,SegCluster &selectedSeg,SegCluster &outputSeg,String labelOutput,String fileName)
00129 {
00130   unsigned long countFrames=0;
00131   fs.reset();                                                       // feature server reset
00132   unsigned long ind=0;
00133   unsigned long begin=0;
00134   bool in=false;
00135   Seg *seg;                                                         // current selectd segment
00136   selectedSeg.rewind();                                             // reset the reader at the begin of the input stream
00137   while((seg=selectedSeg.getSeg())!=NULL){                          // For each input segments
00138     for (unsigned long idx=seg->begin();idx<seg->begin()+seg->length();idx++){ // for each frame
00139       Feature f;
00140       fs.seekFeature(idx);
00141       fs.readFeature(f);
00142       if (f[0]>threshold){                                         // the frame is selected
00143         countFrames++;
00144         if (in==false){                                             // Begin of a new segment         
00145           in=true;                                                  
00146           begin=ind;
00147         }
00148       }
00149       else if (in){                                                // End of a segment
00150         in=false;
00151         Seg & segFake=segServer.createSeg(begin,ind-begin,0,       // Create a segment - Take care : length=end-begin+1 but ind =end+1 !!
00152                                           labelOutput,fileName);
00153         outputSeg.add(segFake);                                  // Add a segment       
00154       }
00155       ind++;                                                       // Increment the frame index
00156     }                                                              // end of one input segment
00157     if (in){                                                       // deal with the last energized segmeent inside the current input segment
00158       in=false;
00159       Seg & segFake=segServer.createSeg(begin,ind-begin+1,0,       // Create a segment 
00160                                         labelOutput,fileName);
00161       outputSeg.add(segFake);                                    // Add a segment  - Take care : length=end-begin+1 and ind=end in this case !!
00162     }                 
00163   }                                                              // end feature loop                   
00164   
00165   return countFrames;
00166 }
00167 
00168 
00169 
00170 //-------------------------------------------------------------------------
00171 // Iniitialise the energy distrib to be as we want, we have an a priori on this distrib
00172 //------------------------------------------------------------------------
00173 MixtureGD &energyMixtureInit(MixtureServer &ms, StatServer &ss, FeatureServer &fs,MixtureGD 
00174 &world,SegCluster &selectedSegments,const DoubleVector &globalCov, Config& config)
00175 {
00176    unsigned long vectSize = world.getVectSize();        
00177   unsigned long distribCount = world.getDistribCount();
00178  
00179   double mean=-2.0;
00180   double meanIncrement;
00181   if (distribCount>1) meanIncrement=4.0/(double)(distribCount-1);
00182   else meanIncrement=1;
00183   for (unsigned long indg=0;indg<distribCount;indg++)  {        // For each component
00184     DistribGD& d = world.getDistrib(indg);                      // Get it
00185     for (unsigned long c=0;c<vectSize;c++,mean+=meanIncrement){                     // copy it
00186       d.setCov(1.0, c);
00187       d.setMean(mean, c);
00188     }
00189     d.computeAll();
00190   }  
00191   world.equalizeWeights();                                      // set weight = 1/distribCount for each distrib 
00192   return world;
00193 }
00194 
00195 
00196 //---------------------------------------------------------------------------------------------------------------
00197 // Energy detector
00198 // TAKE CARE !!!! INPUT DATA IS A PARAMETER FILE WITH ONLY THE ENERGY AS THE COEFF 0
00199 // Use the featureMask ALIZE option for selecting the ernegy
00200 SegCluster&  energyDetector(Config& config,SegServer &segServer,String &featureFileName)
00201 {
00202   int nbTrainIt = config.getParam("nbTrainIt").toLong();                 // number of train it
00203   String thresholdMode;                                                  // Select the mode for the threshold (default=weight)
00204   if (config.existsParam("thresholdMode"))                               // weight -> select wh(+alpha*wm) % of the frames 
00205     thresholdMode= config.getParam("thresholdMode");                     //     (wh is the weight of the highest comp, wm, the weight of the midle component
00206   else thresholdMode="meanStd";                                          // meanStd -> the threshold is mean of the highest comp - alpha*Std of the same comp
00207   double alpha = config.getParam("alpha").toDouble();                    // alpha is the percentage of central gaussian framess taken into account
00208   double flooring = config.getParam("varianceFlooring").toDouble();      // Variance flooring and ceiling 
00209   double ceiling  = config.getParam("varianceCeiling").toDouble();  
00210   String labelSelectedFrames = config.getParam("labelSelectedFrames");   // Label of the selected frames/segments in the input file
00211   String labelOutput = config.getParam("labelOutputFrames");             // Label of the selected frames in the output file
00212   
00213   if (verbose) cout << "Proceeding Energy based silence detection for ["<<featureFileName<<"]"<<endl;
00214   FeatureServer fs(config,featureFileName);                            // Reading the feature file and create a feature server to deal with the data
00215   LabelServer labelServer;                                                              // Create the lable server, for indexing the segments/clusters
00216   initializeClusters(featureFileName,segServer,labelServer,config);                     // Reading the segmentation files for each feature input file
00217   unsigned long codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames);            // Get the index of the cluster with in interest audio segments    
00218   SegCluster& selectedSegments=segServer.getCluster(codeSelectedFrame);   // Gives the cluster of the selected/used segments   
00219   //FrameAcc energyAccu(fs.getVectSize());
00220   //computeZeroOneOnACluster(energyAccu, fs, selectedSegments, config);
00221   unsigned long codeOutput=segServer.getClusterCount();
00222   segServer.createCluster(codeOutput);                                    // Create a new cluster for the output segments
00223   SegCluster& outputSeg=segServer.getCluster(codeOutput);                 // Get the cluster  
00224   MixtureServer ms(config);                                          // Create a mixture server in order to build an energy model
00225   StatServer ss(config,ms);                                          // Create a statistic server for model learning
00226   
00227   FrameAccGD globalFrameAcc;                                         // Create a frame accumulator for mean/cov computation
00228   globalMeanCov (fs,selectedSegments,globalFrameAcc,config);         // Compute the global mean and cov on the input segments
00229   
00230   DoubleVector globalMean=globalFrameAcc.getMeanVect();              // Get the Mean
00231   DoubleVector globalCov=globalFrameAcc.getCovVect();                // Get the Cov
00232   if (verboseLevel>1){
00233         cout <<"global mean and cov"<<endl;
00234         for (unsigned i=0; i < fs.getVectSize(); i++)cout << "mean[" << i << "=" << globalMean[i] << "]\tcov[" << globalCov[i] << "]" << endl;
00235   }
00236   MixtureGD & energyModel=ms.createMixtureGD();                // Creating the energy model
00237   //  energyMixtureInit(ms,ss,fs,energyModel,selectedSegments,globalCov,config); // Init the energy model with randomly picked data 
00238   energyMixtureInit(ms, ss, fs,energyModel,selectedSegments,globalCov,config);  // Fixed init
00239   if (verboseLevel>1)plotEnergyDistrib(energyModel);
00240   MixtureStat &emAcc=ss.createAndStoreMixtureStat(energyModel);
00241   for (int trainIt=0; trainIt<nbTrainIt; trainIt++){                 // Training It loop
00242     emAcc.resetEM();                                                 // EM stuff
00243     double llkPreviousIt=accumulateStatEM(ss,fs,emAcc,selectedSegments,config); // Accumulate EM statistics 
00244     energyModel = emAcc.getEM();                                     // Get the EM estimate
00245     varianceControl(energyModel,flooring,ceiling,globalCov);         // Apply the variance normalisation                          
00246     if (verbose) cout << "Partial Train it["<<trainIt-1 <<"] (-1 means initial partial it) LLK="<<llkPreviousIt<<" Nb Frames="
00247                       <<emAcc.getEMFeatureCount()<<endl;
00248     if (verboseLevel>1)plotEnergyDistrib(energyModel);
00249   } 
00250   unsigned long nbInitialFrames= (unsigned long) emAcc.getEMFeatureCount(); // TODO, REPLACE THIS BY A COUNT OF selectedSegments duration - 
00251   double threshold=0;                                                  // The Energy threshold 
00252   unsigned long higher=findMaxEnergyDistrib(energyModel);           
00253   if (thresholdMode=="weight"){                                      // The selection is based on the weight of the highest energy component
00254     double selectedWeight=energyModel.weight(higher);
00255     if  (energyModel.getDistribCount()== 3){
00256       unsigned long lower=findMinEnergyDistrib(energyModel);
00257       unsigned long middle=3-(higher+lower);
00258       double lossH=likelihoodLoss(energyModel.getDistrib(middle),energyModel.weight(middle),
00259                                   energyModel.getDistrib(higher),energyModel.weight(higher));
00260       double lossL=likelihoodLoss(energyModel.getDistrib(middle),energyModel.weight(middle),
00261                                   energyModel.getDistrib(lower),energyModel.weight(lower));
00262       if (verbose) cout  << "lossL["<<lossL<<"] lossH["<<lossH<<"]"<<endl;
00263       if (lossH<lossL)
00264         selectedWeight+=alpha*energyModel.weight(middle);                 
00265     }
00266     threshold=computeEnergyThreshold(fs,selectedWeight);                
00267   }
00268   else if (thresholdMode=="meanStd"){                                     // the Energy threshold is the mean of the highest energy component minus alpha*std 
00269     threshold=energyModel.getDistrib(higher).getMean(0) - (alpha*sqrt(energyModel.getDistrib(higher).getCov(0)));
00270   }
00271   else cout << thresholdMode<< "unknown"<<endl;
00272   unsigned long nbCurrentFrames=selectFrames(fs,segServer,threshold,selectedSegments,outputSeg,labelOutput,featureFileName); // Build the segments with the energized frames
00273   if (verbose){
00274     double selected=(float) nbCurrentFrames/ (float)nbInitialFrames;
00275     cout <<"File ["<<featureFileName<<"]        Number of initial frames ["<<nbInitialFrames<<"]        Number of selected frames ["
00276          <<nbCurrentFrames << "]        percentage selected [" << (int) (selected*100) << "]" << endl;
00277   }
00278   return outputSeg;
00279 }
00280 
00281 
00282 #endif //!defined(ALIZE_EnergyDetector_cpp)