PolyExpand.cpp

Go to the documentation of this file.
00001 /*
00002 This file is part of LIA_RAL which is a set of software based on ALIZE
00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL.
00004 
00005 LIA_RAL project is a development project was initiated by the computer
00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon -
00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it
00008 was supported by two national projects of the French Research Ministry:
00009         - TECHNOLANGUE program [http://www.technolangue.net]
00010         - MISTRAL program [http://mistral.univ-avignon.fr]
00011 
00012 LIA_RAL is free software: you can redistribute it and/or modify
00013 it under the terms of the GNU Lesser General Public License as
00014 published by the Free Software Foundation, either version 3 of
00015 the License, or any later version.
00016 
00017 LIA_RAL is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU Lesser General Public License for more details.
00021 
00022 You should have received a copy of the GNU Lesser General Public
00023 License along with LIA_RAL.
00024 If not, see [http://www.gnu.org/licenses/].
00025 
00026 The LIA team as well as the LIA_RAL project team wants to highlight the
00027 limits of voice authentication in a forensic context.
00028 The "Person Authentification by Voice: A Need of Caution" paper
00029 proposes a good overview of this point (cf. "Person
00030 Authentification by Voice: A Need of Caution", Bonastre J.F.,
00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00032 chagnolleau I., Eurospeech 2003, Genova].
00033 The conclusion of the paper of the paper is proposed bellow:
00034 [Currently, it is not possible to completely determine whether the
00035 similarity between two recordings is due to the speaker or to other
00036 factors, especially when: (a) the speaker does not cooperate, (b) there
00037 is no control over recording equipment, (c) recording conditions are not
00038 known, (d) one does not know whether the voice was disguised and, to a
00039 lesser extent, (e) the linguistic content of the message is not
00040 controlled. Caution and judgment must be exercised when applying speaker
00041 recognition techniques, whether human or automatic, to account for these
00042 uncontrolled factors. Under more constrained or calibrated situations,
00043 or as an aid for investigative purposes, judicious application of these
00044 techniques may be suitable, provided they are not considered as infallible.
00045 At the present time, there is no scientific process that enables one to
00046 uniquely characterize a persones voice or to identify with absolute
00047 certainty an individual from his or her voice.]
00048 
00049 Copyright (C) 2004-2010
00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr]
00051 LIA_RAL admin [alize@univ-avignon.fr]
00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #include <iostream>
00056 #include "alize.h"
00057 #include "liatools.h"
00058 #include <cmath>
00059 using namespace std;
00060 using namespace alize;
00061 
00062 //**************************************************************************************************
00063 // ---- Polynomial expension
00064 // For a single frame 
00065 void computeExpansion(Feature & f,Feature & expF) {
00066         expF[0]=1;// first value in expansion is one
00067         unsigned long size=f.getVectSize();
00068         for (unsigned long i=0;i<size;i++) // first [1 VectSize+1] values in the expension are the feature
00069                 expF[i+1]=f[i];
00070         
00071         unsigned long idx=0;    // index of the expansion 
00072         //calculate expansion
00073         for (unsigned long i=0;i<size+1;i++){
00074                 for (unsigned long j=i;j<size+1;j++){
00075                         for (unsigned long k=j;k<size+1;k++){
00076                                 expF[idx] = expF[i]*expF[j]*expF[k] ;// all combinations with repetion
00077                                 idx++;
00078                         }       
00079                 }
00080         }
00081         
00082 }
00083 
00084 // Frame Level
00085 void computeAndAccumulateExpansion(FeatureServer & fs,FrameAccGD & avgExp,unsigned long idxBeginFrame,unsigned long nbFrames,Config & config) {
00086         
00087         fs.seekFeature(idxBeginFrame);                                           // go to the frame in the buffer (and load it if needed)
00088         unsigned long vectSize=fs.getVectSize();        
00089         unsigned long expSize = (vectSize + 3) * (vectSize + 2) * (vectSize + 1) / 6 ;
00090         if (debug) cout << "Expension size: "<<expSize<<endl;
00091         Feature expF(expSize);
00092         for (unsigned long n=0;n<nbFrames;n++){
00093                 Feature f;
00094                 fs.readFeature(f);
00095                 // Defines size of expansion
00096                 computeExpansion(f,expF);
00097                 avgExp.accumulate(expF);
00098         }
00099 }
00100 
00101 // Segment level
00102 void computeAndAccumulateExpansion(FeatureServer & fs, FrameAccGD & avgExp, Seg * seg,Config & config) {
00103         unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); // Find the index of the first frame of the file in the buffer
00104         computeAndAccumulateExpansion(fs,avgExp,begin,seg->length(),config);
00105 }
00106         
00107         
00108 // Cluster level
00109 void computeAndAccumulateExpansion(FeatureServer & fs, FrameAccGD & avgExp, SegCluster & selectedSegments,Config & config) {
00110         Seg* seg;                                                     // reset the reader at the begin of the input stream
00111         selectedSegments.rewind();      
00112         while((seg=selectedSegments.getSeg())!=NULL)                  // For each of the selected segments
00113                 computeAndAccumulateExpansion(fs,avgExp,seg,config);
00114   }
00115 // ---------------------- End of poly exp
00116 //**************************************************************************************************
00117 
00118 void multiplyByR(DoubleVector & avgExp, XList & rMat) {
00119         for (unsigned long i=0;i<avgExp.size();i++) {
00120                 avgExp[i]*=rMat.getLine(i).getElement(0).toDouble();
00121         }
00122 }
00123 
00124 void zNorm(DoubleVector & avgExp, XList & rMat) {
00125         for (unsigned long i=0;i<avgExp.size();i++) {
00126                 avgExp[i]-=rMat.getLine(i).getElement(1).toDouble(); // minus mean
00127                 avgExp[i]/=rMat.getLine(i).getElement(0).toDouble(); // divides std
00128         }
00129 }
00130 
00131 void computeRSqrt(DoubleVector & R,unsigned long count) {
00132         for (unsigned long i=0;i<R.size();i++) {
00133                 R[i]/=count;
00134                 R[i]=1/sqrt(R[i]);
00135         }
00136 }
00137 
00138 void outputR(const DoubleVector & v1,const DoubleVector & v2,Config &config) {
00139         String outRFile=config.getParam("computeR");
00140         ofstream out(outRFile.c_str());
00141         for (unsigned long i=0;i<v1.size();i++) {
00142                 out << v1[i] << " " <<  v2[i] << endl;
00143         }
00144         out << endl;
00145         out.close();
00146 }
00147 void outputInstanceSVMLight(const DoubleVector & v, String & filename,Config & config) {
00148         String exType=config.getParam("exType");
00149         ofstream out(filename.c_str());
00150         out << exType << " ";
00151         for (unsigned long i=0;i<v.size();i++) {
00152                 out << i+1 << ":" << v[i] << " ";
00153         }
00154         out << endl;
00155         out.close();
00156 }
00157 void outputInstance(const DoubleVector & v, String & filename,Config & config) {
00158         String format=config.getParam("format");
00159         if (format=="SVMLight") outputInstanceSVMLight(v,filename,config);
00160         else {cerr << "E: Format unknown" << endl;}
00161 }
00162 
00163 // Compute speaker session var with full matrix
00164 int PolyExpand(Config & config){
00165   try {
00166         XList inputList(config.getParam("inputFeatureFilename"),config);
00167         XLine * pLine;
00168         bool computeR=config.existsParam("computeR");
00169         bool normalize=config.existsParam("normalize");
00170         String labelSelectedFrames=config.getParam("labelSelectedFrames");
00171         bool verbose=false;
00172         if (config.existsParam("verbose")) verbose=config.getParam("verbose").toBool();
00173         XList xMat;
00174         if (normalize) {
00175                 xMat.load(config.getParam("normalize"),config);
00176         }
00177 // Accumulate avg expansion over multiple files
00178         FrameAccGD avgExp;
00179         while((pLine=inputList.getLine())!=NULL) {
00180                 String filename=pLine->getElement(0);
00181                 if (verbose) cout << "Processing file: ["<<filename<<"] ... ";
00182                 FeatureServer fs(config,filename);      
00183                 SegServer segmentsServer;                                              // Create the segment server for managing the segments/clusters
00184                 LabelServer labelServer;                                               // Create the lable server, for indexing the segments/clusters
00185                 initializeClusters(filename,segmentsServer,labelServer,config); // Reading the segmentation files for each feature input file
00186                 verifyClusterFile(segmentsServer,fs,config);                    // Verify if the segments ending before the end of the feature files...
00187                 unsigned long codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames);// Get the index of the cluster with in interest audio segments
00188                 SegCluster& selectedSegments=segmentsServer.getCluster(codeSelectedFrame); // Gives the cluster of the selected/used segments
00189                 // Accumulates average poly expansion
00190                 computeAndAccumulateExpansion(fs,avgExp,selectedSegments,config);
00191                 if (verbose) cout << "Done" <<endl;
00192                 // get mean exp
00193                 if (!computeR) {
00194                         DoubleVector avgExpVect=avgExp.getMeanVect();
00195                         if (normalize) multiplyByR(avgExpVect,xMat);
00196                         String outFile=config.getParam("vectorFilesPath")+filename+config.getParam("vectorFilesExtension");
00197                         outputInstance(avgExpVect,outFile,config);
00198                         avgExp.reset();
00199                 }
00200         }
00201         if (computeR) {
00202                 const DoubleVector & meanR=avgExp.getMeanVect();
00203                 DoubleVector R=avgExp.getxAccVect();
00204                 computeRSqrt(R,avgExp.getCount());
00205                 // output R matrix;
00206                 outputR(R,meanR,config);
00207         }
00208 }
00209   catch (Exception& e) {cout << e.toString() << endl;}
00210 return 0;
00211 }