LIA_RAL: LIA_Utils/GmmTokenizer/src/GmmTokenizer.cpp Source File

Go to the documentation of this file.
00001 /*
00002 This file is part of LIA_RAL which is a set of software based on ALIZE
00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL.
00004 
00005 LIA_RAL project is a development project was initiated by the computer
00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon -
00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it
00008 was supported by two national projects of the French Research Ministry:
00009         - TECHNOLANGUE program [http://www.technolangue.net]
00010         - MISTRAL program [http://mistral.univ-avignon.fr]
00011 
00012 LIA_RAL is free software: you can redistribute it and/or modify
00013 it under the terms of the GNU Lesser General Public License as
00014 published by the Free Software Foundation, either version 3 of
00015 the License, or any later version.
00016 
00017 LIA_RAL is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU Lesser General Public License for more details.
00021 
00022 You should have received a copy of the GNU Lesser General Public
00023 License along with LIA_RAL.
00024 If not, see [http://www.gnu.org/licenses/].
00025 
00026 The LIA team as well as the LIA_RAL project team wants to highlight the
00027 limits of voice authentication in a forensic context.
00028 The "Person Authentification by Voice: A Need of Caution" paper
00029 proposes a good overview of this point (cf. "Person
00030 Authentification by Voice: A Need of Caution", Bonastre J.F.,
00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00032 chagnolleau I., Eurospeech 2003, Genova].
00033 The conclusion of the paper of the paper is proposed bellow:
00034 [Currently, it is not possible to completely determine whether the
00035 similarity between two recordings is due to the speaker or to other
00036 factors, especially when: (a) the speaker does not cooperate, (b) there
00037 is no control over recording equipment, (c) recording conditions are not
00038 known, (d) one does not know whether the voice was disguised and, to a
00039 lesser extent, (e) the linguistic content of the message is not
00040 controlled. Caution and judgment must be exercised when applying speaker
00041 recognition techniques, whether human or automatic, to account for these
00042 uncontrolled factors. Under more constrained or calibrated situations,
00043 or as an aid for investigative purposes, judicious application of these
00044 techniques may be suitable, provided they are not considered as infallible.
00045 At the present time, there is no scientific process that enables one to
00046 uniquely characterize a persones voice or to identify with absolute
00047 certainty an individual from his or her voice.]
00048 
00049 Copyright (C) 2004-2010
00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr]
00051 LIA_RAL admin [alize@univ-avignon.fr]
00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_GMMTokenizer_cpp)
00056 #define ALIZE_GMMTokenizer_cpp
00057 
00058 #include <iostream>
00059 #include <fstream>  // pour outFile
00060 #include <cstdio>   // pour printf()
00061 #include <cassert> // pour le debug pratique
00062 #include <cmath>
00063 #include "GmmTokenizer.h"
00064 #include "liatools.h"
00065 
00066 using namespace alize;
00067 using namespace std;
00068 
00069 void computeConfusionMatrix(Feature & f,StatServer &ss, MixtureGDStat &acc, unsigned long & nBest, Matrix <unsigned long> & mce_matrix) {
00070         
00071         acc.computeAndAccumulateLLK(f,1.0,DETERMINE_TOP_DISTRIBS);     // Determine the winning components and compute world LLK
00072         const LKVector& v = ss.getTopDistribIndexVector();
00073         const unsigned long bestDistrib=v[0].idx;
00074         for (unsigned long i=0; i<nBest;i++)
00075                 mce_matrix(bestDistrib,v[i].idx)++;
00076         acc.resetLLK();                    // Reset the world LLK accumulator
00077 }
00078 
00079 void computeConfusionMatrix(Seg * seg, FeatureServer & fs, StatServer & ss,  MixtureGDStat & acc, unsigned long & nBest,Matrix <unsigned long> &mce_matrix) {
00080         unsigned long idxBeginFrame=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); 
00081         fs.seekFeature(idxBeginFrame); 
00082         Feature f;
00083         for (unsigned long idxFrame=0;idxFrame<seg->length();idxFrame++){    // For each frame of the segment
00084                 fs.readFeature(f);
00085                 computeConfusionMatrix(f,ss,acc,nBest,mce_matrix);
00086         } // frame loop
00087 }
00088 
00089 void computeConfusionMatrix(SegCluster & selectedSegments,  FeatureServer & fs,  StatServer & ss,  MixtureGD & world, unsigned long & nBest, Matrix <unsigned long> & mce_matrix) {
00090         MixtureGDStat &acc=ss.createAndStoreMixtureGDStat(world);
00091         Seg* seg;                                                                         // reset the reader at the begin of the input stream
00092         selectedSegments.rewind();                              
00093         while((seg=selectedSegments.getSeg())!=NULL){                                     // For each of the selected segments
00094                 computeConfusionMatrix(seg, fs, ss,acc,nBest, mce_matrix) ;
00095         } // segments loop
00096 }
00097 
00098 
00099 void computeSymbols(Feature & f, MixtureGD & world, StatServer & ss, ULongVector & stream, Config & config) {
00100         ss.computeAndAccumulateLLK(world, f,DETERMINE_TOP_DISTRIBS);     // Determine the winning components and compute world LLK
00101         const LKVector& v = ss.getTopDistribIndexVector();
00102         const unsigned long bestDistrib=v[0].idx;
00103         stream.addValue(bestDistrib);
00104 }
00105 
00106 // on a segment
00107 void computeSymbols(Seg* seg, FeatureServer & fs, MixtureGD & world, StatServer & ss, ULongVector & stream,Config & config) {
00108         unsigned long idxBeginFrame=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); 
00109         fs.seekFeature(idxBeginFrame); 
00110         Feature f;
00111         for (unsigned long idxFrame=0;idxFrame<seg->length();idxFrame++){
00112                 fs.readFeature(f);      
00113                 computeSymbols(f,world,ss,stream,config);
00114         }
00115 }
00116 // on a cluster
00117 void computeSymbols(SegCluster & selectedSegments, FeatureServer & fs, MixtureGD & world, StatServer & ss, ULongVector & stream, Config & config) {     
00118         Seg* seg;                       // reset the reader at the begin of the input stream
00119         selectedSegments.rewind();
00120         while((seg=selectedSegments.getSeg())!=NULL){ 
00121                 computeSymbols(seg, fs, world, ss, stream, config);     
00122         }
00123 }
00124 
00125 int GaussianConfusionMatrix(Config & config)
00126 {
00127          String inputNDXFileName = config.getParam("inputFeatureFilename");                        // NDX inputfile filename - described the experience 
00128         String inputWorldFilename = config.getParam("inputWorldModelName");                   // World model file used for the LLR computation
00129         String labelSelectedFrames =config.getParam("labelSelectedFrames");              // label for selected frames - Only the frames from segment with this label  will be used
00130         //double frameLength = config.getParam("frameLength").toDouble();                  // length in s of a frame
00131         String matrixName=config.getParam("matrixOutputName");
00132         unsigned long nBest =config.getParam("topDistribsCount").toLong();  
00133         
00134         try{
00135                 XList ndx(inputNDXFileName,config);                                    // Read the test definition file (ndx)
00136                 XLine *linep;                                                          // Pointor on the current test line
00137                 ndx.getLine(0);
00138                 MixtureServer ms(config);
00139                 StatServer ss(config, ms);
00140                 MixtureGD& world = ms.loadMixtureGD(inputWorldFilename);               // Load the world model
00141                 if (verbose) cout << "Confusion Matrix Memory Allocation" << endl;
00142                 unsigned long model_size=world.getDistribCount(); 
00143                 Matrix <unsigned long> mce_matrix;
00144                 mce_matrix.setDimensions(model_size,model_size);
00145                 
00146                 while ((linep=ndx.getLine()) != NULL){                                 // Loop on each line of the ndx input file
00147                         String &featureFileName=linep->getElement(0);                        // Get the testfile basename
00148                         FeatureServer fs(config,featureFileName);                            // Reading the feature file
00149                         SegServer segmentsServer;                                                             // Create the segment server for managing the segments/clusters
00150                         LabelServer labelServer;                                                              // Create the lable server, for indexing the segments/clusters
00151                         initializeClusters(featureFileName,segmentsServer,labelServer,config);                // Reading the segmentation files for each feature input file
00152                         verifyClusterFile(segmentsServer,fs,config);                                          // Verify if the segments ending before the end of the feature files...
00153                         unsigned long codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames);            // Get the index of the cluster with in interest audio segments
00154                         SegCluster& selectedSegments=segmentsServer.getCluster(codeSelectedFrame); // Gives the cluster of the selected/used segments   
00155                         if (verbose) cout << "test seg["<<featureFileName<<"]"<< endl;
00156                         computeConfusionMatrix(selectedSegments, fs, ss, world, nBest, mce_matrix);
00157                 } // ndx loop
00158         mce_matrix.save(matrixName,config);
00159       
00160   } // fin try
00161  
00162   catch (Exception& e){ 
00163     cout << e.toString().c_str() << endl;
00164   }
00165   return 0;          
00166         
00167 }
00168 
00169 
00170 int GMMTokenizer(Config & config)
00171 {
00172         String inputNDXFileName = config.getParam("inputFeatureFilename");                        // NDX inputfile filename - described the experience 
00173         String inputWorldFilename = config.getParam("inputWorldModelName");                   // World model file used for the LLR computation
00174         String labelSelectedFrames =config.getParam("labelSelectedFrames");              // label for selected frames - Only the frames from segment with this label  will be used     
00175         String matrixName, symbolsFilesPath;
00176         symbolsFilesPath=config.getParam("symbolsFilesPath");
00177         
00178         try{
00179                 XList ndx(inputNDXFileName,config);                                    // Read the test definition file (ndx)
00180                 XLine *linep;                                                          // Pointor on the current test line
00181                 ndx.getLine(0);
00182                 MixtureServer ms(config);
00183                 StatServer ss(config, ms);
00184                 MixtureGD& world = ms.loadMixtureGD(inputWorldFilename);               // Load the world model
00185                 
00186                 while ((linep=ndx.getLine()) != NULL){                                 // Loop on each line of the ndx input file
00187                         String &featureFileName=linep->getElement(0);                        // Get the testfile basename
00188                         FeatureServer fs(config,featureFileName);                            // Reading the feature file
00189                         SegServer segmentsServer;                                                             // Create the segment server for managing the segments/clusters
00190                         LabelServer labelServer;                                                              // Create the lable server, for indexing the segments/clusters
00191                         initializeClusters(featureFileName,segmentsServer,labelServer,config);                // Reading the segmentation files for each feature input file
00192                         verifyClusterFile(segmentsServer,fs,config);                                          // Verify if the segments ending before the end of the feature files...
00193                         unsigned long codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames);            // Get the index of the cluster with in interest audio segments
00194                         SegCluster& selectedSegments=segmentsServer.getCluster(codeSelectedFrame); // Gives the cluster of the selected/used segments   
00195                         if (verbose) cout << "test seg["<<featureFileName<<"]"<< endl;
00196                         ULongVector stream;
00197                                                 computeSymbols(selectedSegments, fs, world, ss, stream,config);
00198                         String output="./"+symbolsFilesPath+"/"+featureFileName+".sym";
00199                         stream.save(output);
00200                 } // ndx loop
00201  } // fin try
00202  
00203   catch (Exception& e){ 
00204     cout << e.toString().c_str() << endl;
00205   }
00206   return 0;     
00207 
00208 }
00209 
00210 #endif //!defined(ALIZE_GMMTokenizer_cpp)