TrainWorld.cpp

Go to the documentation of this file.
00001 /*
00002 This file is part of LIA_RAL which is a set of software based on ALIZE
00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL.
00004 
00005 LIA_RAL project is a development project was initiated by the computer
00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon -
00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it
00008 was supported by two national projects of the French Research Ministry:
00009         - TECHNOLANGUE program [http://www.technolangue.net]
00010         - MISTRAL program [http://mistral.univ-avignon.fr]
00011 
00012 LIA_RAL is free software: you can redistribute it and/or modify
00013 it under the terms of the GNU Lesser General Public License as
00014 published by the Free Software Foundation, either version 3 of
00015 the License, or any later version.
00016 
00017 LIA_RAL is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU Lesser General Public License for more details.
00021 
00022 You should have received a copy of the GNU Lesser General Public
00023 License along with LIA_RAL.
00024 If not, see [http://www.gnu.org/licenses/].
00025 
00026 The LIA team as well as the LIA_RAL project team wants to highlight the
00027 limits of voice authentication in a forensic context.
00028 The "Person Authentification by Voice: A Need of Caution" paper
00029 proposes a good overview of this point (cf. "Person
00030 Authentification by Voice: A Need of Caution", Bonastre J.F.,
00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00032 chagnolleau I., Eurospeech 2003, Genova].
00033 The conclusion of the paper of the paper is proposed bellow:
00034 [Currently, it is not possible to completely determine whether the
00035 similarity between two recordings is due to the speaker or to other
00036 factors, especially when: (a) the speaker does not cooperate, (b) there
00037 is no control over recording equipment, (c) recording conditions are not
00038 known, (d) one does not know whether the voice was disguised and, to a
00039 lesser extent, (e) the linguistic content of the message is not
00040 controlled. Caution and judgment must be exercised when applying speaker
00041 recognition techniques, whether human or automatic, to account for these
00042 uncontrolled factors. Under more constrained or calibrated situations,
00043 or as an aid for investigative purposes, judicious application of these
00044 techniques may be suitable, provided they are not considered as infallible.
00045 At the present time, there is no scientific process that enables one to
00046 uniquely characterize a persones voice or to identify with absolute
00047 certainty an individual from his or her voice.]
00048 
00049 Copyright (C) 2004-2010
00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr]
00051 LIA_RAL admin [alize@univ-avignon.fr]
00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_TrainWorld_cpp)
00056 #define ALIZE_TrainWorld_cpp
00057 
00058 #include <iostream>
00059 #include <cmath>
00060 #include "liatools.h"
00061 #include "TrainWorld.h"
00062 
00063 using namespace alize;
00064 using namespace std;
00065 
00066 void featureStream(Config &config,String filename,FeatureServer *&fs,SegServer *&segServ,SegCluster *&segCluster,String labelSelectedFrames){
00067   fs=new FeatureServer(config,filename);
00068   try{   
00069         // TODO Test if the stream is not empty
00070         
00071         segServ=new SegServer;                                                                      // Create the segment server for managing the segments/clusters
00072         LabelServer labelServer;                                                                    // Create the label server, for indexing the segments/clusters
00073         initializeClusters(filename,*segServ,labelServer,config);                                   // Reading the segmentation files for each feature input file
00074         verifyClusterFile(*segServ,*fs,config);                                                     // Verify if the segments ending before the end of the feature files...
00075         unsigned long codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames);     // Get the index of the cluster with in interest audio segments
00076         segCluster=&(segServ->getCluster(codeSelectedFrame));
00077   }
00078   catch (Exception& e){
00079     cout << e.toString() << endl;
00080   }
00081 }
00082 void reserveMem(FeatureServer** &fsTab,SegServer** &segServTab,SegCluster** &segTab,double *&weightTab,unsigned long nbStream){
00083   fsTab=new FeatureServer*[nbStream];
00084   segServTab=new SegServer*[nbStream];
00085   segTab=new SegCluster*[nbStream];
00086   weightTab=new double [nbStream];
00087   for (unsigned long i=0;i<nbStream;i++)weightTab[i]=1/(double) nbStream;
00088 }
00089 void freeMem(FeatureServer** &fsTab,SegServer** &segServTab,SegCluster** &segTab,double *&weightTab,unsigned long nbStream){
00090   for (unsigned long i=0;i<nbStream;i++){
00091     delete fsTab[i]; 
00092     delete segServTab[i];
00093   }
00094   delete [] fsTab;
00095   delete [] segServTab;
00096   delete [] segTab;
00097   delete [] weightTab;
00098 }
00099 
00100 //-------------------------------------------------------------------------
00101 int trainWorld(Config& config){
00102   if (verbose) cout << "Begin world model training"<<endl;   
00103   try{    
00104     // Reading the data, one or multiple separate streams
00105     unsigned long nbStream=0;                                                             // Number of Streams
00106     FeatureServer **fsTab=NULL;                                                           // Array of FeatureServer (address) - one by input stream
00107     SegServer     **segServTab=NULL;                                                      // Array of segment server (address)- one by input stream
00108     SegCluster    **segTab=NULL;                                                          // Array of selected segments cluster(address) - one by stream
00109     double         *weightTab=NULL;                                                       // Array of weight of each stream. i.e influence of a stream on the final model
00110     String outputWorldFilename = config.getParam("outputWorldFilename");                  // output worldmodel file filename                            
00111     bool fileInit=config.existsParam("inputWorldFilename");                               // if a inputWorlFilename is given, init by file, else from scratch
00112     bool saveInitModel=true;
00113     if (config.existsParam("saveInitModel")) saveInitModel=config.getParam("saveInitModel").toBool();
00114     String inputWorldFilename="";
00115     if (fileInit) inputWorldFilename=config.getParam("inputWorldFilename");                // if file init, the initial model filename
00116     String labelSelectedFrames =config.getParam("labelSelectedFrames");                    // label for selected frames
00117     TrainCfg trainCfg(config);                                                             // Get the training algo params
00118 
00119     // Reading the data
00120     if(config.existsParam("inputStreamList")){// We want to work on separated list 
00121       XList tmp(config.getParam("inputStreamList"),config);                                        // Each data set influence will be balanced during training
00122       XLine & listInputFilename=tmp.getAllElements();                                              // Read the list of (list) filenames in tmp -> listInputFilename
00123       nbStream=listInputFilename.getElementCount();
00124       if (nbStream==0) throw Exception("TrainWorld error:no input stream" , __FILE__, __LINE__);
00125       reserveMem(fsTab,segServTab,segTab,weightTab,nbStream);
00126       for (unsigned i=0;i<nbStream;i++)
00127                 featureStream(config,listInputFilename.getElement(i),fsTab[i],segServTab[i],segTab[i],labelSelectedFrames);
00128       if (config.existsParam("weightStreamList")){ // Read the weight of each stream, text file
00129                 XList tmpW(config.getParam("weightStreamList"),config);
00130                 XLine & listW=tmpW.getAllElements();                                              // Read the list of (list) filenames in tmp -> listInputFilename
00131                 if (listW.getElementCount()!=nbStream) throw Exception("TrainWorld error: number of weigths differs than number of input streams" , __FILE__, __LINE__);
00132                 for (unsigned i=0;i<nbStream;i++) weightTab[i]=listW.getElement(i).toDouble();
00133       }
00134     }
00135     else{ // Only one input stream, no stream list
00136       nbStream=1;
00137       reserveMem(fsTab,segServTab,segTab,weightTab,nbStream);
00138       featureStream(config,config.getParam("inputFeatureFilename"),fsTab[0],segServTab[0],segTab[0],labelSelectedFrames);
00139     }
00140     unsigned long vectSize=fsTab[0]->getVectSize();                                          // size of the input vectors
00141     // Create stat server and mixture server
00142     MixtureServer ms(config);
00143     StatServer ss(config, ms);
00144     if (debug || verbose) cout << "Stream mode, nb Stream="<<nbStream<<endl;
00145     if (debug|| (verboseLevel>2)){
00146       for (unsigned long i=0;i<nbStream;i++){
00147                 cout <<"Stream["<<i<<"]"<<endl;
00148                 segTab[i]->rewind(); 
00149                 Seg *seg;                                                                            // Reset to the first segment
00150                 while((seg=segTab[i]->getSeg())!=NULL)                                         // For each of the selected segments
00151                         cout << "File["<<seg->sourceName()<<"] Segment begin["<<
00152                         seg->begin()<<"] length["<<seg->length()<<"] index in the feature server["<<fsTab[i]->getFirstFeatureIndexOfASource(seg->sourceName())<<"]"<<endl;
00153       }
00154     }  
00155     // Global mean and variance matrices initialisation (computed from dataa or set to 0,1)
00156     bool use01=false;
00157     if (config.existsParam("use01")) use01=config.getParam("use01").toBool();
00158     if (verbose){ if (use01) cout<<"Use 0 mean, 1 cov "<<endl; else cout << "Compute global mean and cov"<<endl;}
00159     DoubleVector globalMean;
00160     DoubleVector globalCov;
00161     if (!use01){
00162       FrameAccGD globalFrameAcc;
00163       unsigned long nbFrame=computeMeanCov(config,fsTab,segTab,nbStream,globalMean,globalCov);                             // Compute the global mean and covariance
00164       if (verboseLevel>1){
00165         cout <<"global mean and cov of training data, number of frame= ["<<nbFrame<<"]"<<endl;
00166         for (unsigned i=0; i < vectSize; i++)cout << "mean[" << i << "=" << globalMean[i] << "]\tcov[" << globalCov[i] << "]" << endl;
00167       }
00168     }
00169     else initialize01(vectSize,globalMean,globalCov);
00170     MixtureGD &world=ms.createMixtureGD();
00171     if (fileInit){                                                                                        // Load or initialize the initial model
00172       if (verbose) cout << "Load initial world model ["<<inputWorldFilename<<"]" << endl;                 
00173       world=ms.loadMixtureGD(inputWorldFilename);                                                         // Load
00174     } 
00175     else{ 
00176       if (verbose) cout <<"World model init from scratch"<<endl;
00177       mixtureInit(ms,fsTab,segTab,weightTab,nbStream,world,globalCov,config,trainCfg);                             // Initialize    
00178       if (saveInitModel) world.save(outputWorldFilename+"init", config);
00179     }
00180     MixtureGD *newWorld=&world; // TODO Verify and suppress...
00181     trainModelStream(config,ms,ss,fsTab,segTab,weightTab,nbStream,globalMean,globalCov,newWorld,trainCfg);
00182     if (verbose) cout << "Save world model ["<<outputWorldFilename<<"]" << endl;
00183     newWorld->save(outputWorldFilename, config);                                          
00184     // Cleaning the memory
00185     freeMem(fsTab,segServTab,segTab,weightTab,nbStream);
00186   }
00187   catch (Exception& e){
00188     cout << e.toString() << endl;
00189   }
00190   return 0;
00191 }
00192 
00193 
00194 #endif // !defined(ALIZE_TrainWorld_cpp)