Scoring.cpp

Go to the documentation of this file.
00001 /*
00002 This file is part of LIA_RAL which is a set of software based on ALIZE
00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL.
00004 
00005 LIA_RAL project is a development project was initiated by the computer
00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon -
00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it
00008 was supported by two national projects of the French Research Ministry:
00009         - TECHNOLANGUE program [http://www.technolangue.net]
00010         - MISTRAL program [http://mistral.univ-avignon.fr]
00011 
00012 LIA_RAL is free software: you can redistribute it and/or modify
00013 it under the terms of the GNU Lesser General Public License as
00014 published by the Free Software Foundation, either version 3 of
00015 the License, or any later version.
00016 
00017 LIA_RAL is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU Lesser General Public License for more details.
00021 
00022 You should have received a copy of the GNU Lesser General Public
00023 License along with LIA_RAL.
00024 If not, see [http://www.gnu.org/licenses/].
00025 
00026 The LIA team as well as the LIA_RAL project team wants to highlight the
00027 limits of voice authentication in a forensic context.
00028 The "Person Authentification by Voice: A Need of Caution" paper
00029 proposes a good overview of this point (cf. "Person
00030 Authentification by Voice: A Need of Caution", Bonastre J.F.,
00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00032 chagnolleau I., Eurospeech 2003, Genova].
00033 The conclusion of the paper of the paper is proposed bellow:
00034 [Currently, it is not possible to completely determine whether the
00035 similarity between two recordings is due to the speaker or to other
00036 factors, especially when: (a) the speaker does not cooperate, (b) there
00037 is no control over recording equipment, (c) recording conditions are not
00038 known, (d) one does not know whether the voice was disguised and, to a
00039 lesser extent, (e) the linguistic content of the message is not
00040 controlled. Caution and judgment must be exercised when applying speaker
00041 recognition techniques, whether human or automatic, to account for these
00042 uncontrolled factors. Under more constrained or calibrated situations,
00043 or as an aid for investigative purposes, judicious application of these
00044 techniques may be suitable, provided they are not considered as infallible.
00045 At the present time, there is no scientific process that enables one to
00046 uniquely characterize a persones voice or to identify with absolute
00047 certainty an individual from his or her voice.]
00048 
00049 Copyright (C) 2004-2010
00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr]
00051 LIA_RAL admin [alize@univ-avignon.fr]
00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_Scoring_cpp)
00056 #define ALIZE_Scoring_cpp
00057 
00058 #include <iostream>
00059 #include <fstream>  // pour outFile
00060 #include <cstdio>   // pour printf()
00061 #include <cassert> // pour le debug pratique
00062 #include <cmath>
00063 #include "Scoring.h"
00064 #include <liatools.h>
00065 
00066 
00067 using namespace alize;
00068 using namespace std;
00069 
00070 //-------------------------------------------------------------------------
00071 // Take a decision, print t if above threshold f otherwise
00072 String getDecision(double score, double threshold)
00073 {
00074         String decision;
00075         if (score > threshold)
00076         {decision="t";} 
00077         else 
00078         {decision="f";}
00079         return decision;
00080 }
00081 
00082 //-------------------------------------------------------------------------
00083 // Take a decision, print true if above threshold false otherwise
00084 String getLongDecision(double score, double threshold)
00085 {
00086         String decision;
00087         if (score > threshold)
00088         {decision="true";}      
00089         else 
00090         {decision="false";}
00091         return decision;
00092 }
00093 
00094 //-------------------------------------------------------------------------
00095 // Retrieve info in a nist file providing the good fields
00096 void retrieveNISTSegmentInfo(XList & inputList, String & gender, String & clientName, String & seg, unsigned long genderField, unsigned long nameField, unsigned long segField, unsigned long & i) {
00097         if (inputList.getLine(i).getElement(genderField)=="F") {gender="f";} else {gender="m";}
00098         clientName=inputList.getLine(i).getElement(nameField);
00099         seg=inputList.getLine(i).getElement(segField);
00100 }
00101 
00102 //-------------------------------------------------------------------------
00103 // 
00104 unsigned long getIndexOfMaxScore(XList & inputList,unsigned long scoreField, unsigned long segField, unsigned long & i, unsigned long nbListLines)
00105 {
00106         String seg=inputList.getLine(i).getElement(segField);   //this is a new segment
00107         long max_score=-200;
00108         long score;
00109         unsigned long maxIndex=0; 
00110 
00111         while (inputList.getLine(i).getElement(segField)==seg)  // while same segment test
00112         {
00113                 score= inputList.getLine(i).getElement(scoreField).toLong();
00114                 if (score >= max_score) {max_score=score; maxIndex=i;}  // store max score and its index
00115                 i++;
00116                 if (i >= nbListLines) break; // break the loop if end of file
00117         }
00118 return maxIndex;
00119 }
00120 
00121 //-------------------------------------------------------------------------
00122 // Produce a tab with mean and cov by segment (mdtm and etf only)
00123 void getSegmentalMeanCov(XList & inputList, unsigned long nbLoc, double* meanTab, double * covTab)
00124 {
00125         XLine *linep;
00126         unsigned long cpt=0;
00127         unsigned long cpttab=0;
00128         double meanAcc=0.0;
00129         double covAcc=0.0;
00130      
00131         while((linep=inputList.getLine())!=NULL)
00132         {
00133                 double score=linep->getElement(6).toDouble();
00134                 meanAcc+=score;                 // Accumulate mean and cov
00135                 covAcc+=(score*score);
00136                 if (cpt%nbLoc==(nbLoc-1))       // when changing segment
00137                 {
00138                         meanTab[cpttab]=meanAcc;
00139                         covTab[cpttab]=covAcc;
00140                         cpttab++;       
00141                         meanAcc=0.0;
00142                         covAcc=0.0;
00143                 }
00144                 cpt++;
00145         }
00146 }
00147 
00148 //-------------------------------------------------------------------------
00149 // Get a tab with indexes of speakers with maximum likelihood (mdtm and etf only)
00150 void getTarClientIdx(Config & config, XList & inputList, unsigned long nbLoc, unsigned long * tarTab)
00151 {
00152         XLine *linep;
00153         unsigned long cpt=0;
00154         unsigned long cpttab=0;
00155         double minLLK=config.getParam("minLLK").toDouble();
00156         double maxScore=minLLK;
00157         unsigned long idxTar=0;
00158     bool verbose=config.existsParam("verbose");
00159     
00160         while((linep=inputList.getLine())!=NULL)
00161         {
00162                 double score=linep->getElement(6).toDouble();
00163                 if (score>=maxScore)
00164                 {
00165                         maxScore=score;         // this is the maximum score
00166                         idxTar=cpt;                     // index is just the line
00167                         if (verbose) {cout << "giving highest score to " << linep->getElement(1) << " "<<maxScore << endl;} 
00168                 }                         
00169                 if (cpt%nbLoc==(nbLoc-1))       // when changing segment
00170                 {
00171                         tarTab[cpttab]=idxTar;   // idx of the target goes in the tab
00172                         if (verbose) {cout << linep->getElement(1) << " max score: "<<maxScore <<"idx: "<<idxTar<<"cpt: "<<cpt<< endl;}
00173                         cpttab++;       
00174                         maxScore=minLLK;        //reset maxScore
00175                 }
00176                 cpt++;
00177         }
00178 }
00179 
00180 //-------------------------------------------------------------------------
00181 // Produce a tab with mean and cov by segment without the maximum score(mdtm and etf only)
00182 void getSegmentalMeanCovWithoutMax(Config & config, XList & inputList, unsigned long nbLoc, unsigned long * tarTab, double* meanTab, double * covTab)
00183 {
00184         XLine *linep;
00185         unsigned long cpt=0;
00186         unsigned long cpttab=0;
00187         double minLLK=config.getParam("minLLK").toDouble();
00188         double maxScore=minLLK;
00189         double meanAcc=0.0;
00190         double covAcc=0.0;
00191         unsigned long idxTar=0;
00192         bool verbose=config.existsParam("verbose");
00193 
00194         while((linep=inputList.getLine())!=NULL)
00195         {
00196                 double score=linep->getElement(6).toDouble();
00197                 if (score>=maxScore)
00198                 {
00199                         maxScore=score;         // this is the maximum score
00200                         idxTar=cpt;                     // index is just the line
00201                         if (verbose) {cout << "giving highest score to " << linep->getElement(1) << " "<<maxScore << endl;} 
00202                 }
00203                 meanAcc+=score;                 // Accumulate mean and cov
00204                 covAcc+=(score*score);
00205           
00206                 if (cpt%nbLoc==(nbLoc-1))       // when changing segment
00207                 {       
00208                         tarTab[cpttab]=idxTar;
00209                         meanAcc-=maxScore;      //remove max from Stats
00210                         covAcc-=(maxScore*maxScore);
00211                         meanTab[cpttab]=meanAcc;
00212                         covTab[cpttab]=covAcc;
00213                         if (verbose) {cout << linep->getElement(1) << " max score: "<<maxScore <<"idx: "<<idxTar<<"cpt: "<<cpt<< " meanA: "<<meanAcc<<" covA: "<<covAcc<<endl;}
00214                         cpttab++;       
00215                         maxScore=minLLK;
00216                         meanAcc=0.0;
00217                         covAcc=0.0;
00218                 }
00219                 cpt++;
00220         }
00221 }  
00222 
00223 int Scoring(Config& config)
00224 {
00225 
00226         using namespace alize;
00227         using namespace std;
00228 
00229         try{
00230 
00231         if (config.existsParam("debug"))debug=true; else debug=false;  
00232         if (config.existsParam("verbose"))verbose=true; else verbose=false;
00233         String Mode = config.getParam("mode");
00234         String inputFileName = config.getParam("inputFile");
00235         String outputFileName = config.getParam("outputFile");
00236         bool hard=config.existsParam("hardDecision"); //Force a true decision to be taken among the test (identification)
00237         double threshold=0.0;
00238         if (!hard) threshold = config.getParam("threshold").toDouble(); // Ask for a threshold in verification mode
00239         else if (verbose) cout << "W: Force a decision to be taken" << endl;    // else display a warning
00240         XList inputList(inputFileName,config);
00241         ofstream outFile(outputFileName.c_str(),ios::out | ios::trunc);
00242                 
00243                 if ( Mode == "NIST")
00244                 {
00245                         String segTypeTest= config.getParam("segTypeTest");
00246                         String trainTypeTest= config.getParam("trainTypeTest");
00247                         String adaptationMode= config.getParam("adaptationMode");
00248                         unsigned long genderField, decisionField, segField, scoreField, nameField;
00249                         String gender, clientName, seg, decision;
00250                         setLIAInfoFields(genderField, nameField, decisionField, segField, scoreField);  // Set fields position for a LIA file format F name - seg score
00251                         unsigned long i=0;
00252                         unsigned long maxIndex;
00253                         double LLR=0.0;
00254                         
00255                         while (i < inputList.getLineCount())
00256                         {
00257                                 if (!hard) // an ACCPET decision is not compulsory
00258                                         {
00259                                         decision=getDecision(inputList.getLine(i).getElement(scoreField).toDouble(),threshold);
00260                                         retrieveNISTSegmentInfo(inputList, gender, clientName, seg, genderField, nameField, segField, i);
00261                                         LLR=inputList.getLine(i).getElement(scoreField).toDouble();
00262                                         outputResultNIST04Line(trainTypeTest,adaptationMode,segTypeTest,gender,clientName,seg,decision,LLR,outFile);
00263                                         i++;
00264                                 }
00265                                 else {
00266                                         maxIndex=getIndexOfMaxScore(inputList,scoreField,segField,i,inputList.getLineCount());
00267                                         decision="true";                                                // decision is true
00268                                         retrieveNISTSegmentInfo(inputList, gender, clientName, seg, genderField, nameField, segField, maxIndex);
00269                                         LLR=inputList.getLine(maxIndex).getElement(scoreField).toDouble();
00270                                         outputResultNIST04Line(trainTypeTest,adaptationMode,segTypeTest,gender,clientName,seg,decision,LLR,outFile); // just output the interesting lines
00271                                         }
00272                                 
00273                         }
00274                 }
00275                 else if ( Mode == "leaveMaxOutTnorm")
00276                 {
00277                         unsigned long nbLoc=config.getParam("nbLoc").toLong(); 
00278                         unsigned long dimTabs=(unsigned long)(inputList.getLineCount()/nbLoc)+1;        //number of segments is number of lines divided by nb of segments
00279                         unsigned long * tarTab=new unsigned long [dimTabs];             //tab for target speaker idx
00280                         std::fill_n( tarTab, dimTabs, static_cast<unsigned long>( 0 ) );
00281                         double* meanTab=new double [dimTabs];                                    //tab for t-norm mean
00282                         std::fill_n( meanTab, dimTabs, static_cast<double>( 0 ) );
00283                         double* covTab=new double [dimTabs];                                            //tab for t-norm cov
00284                         std::fill_n( covTab, dimTabs, static_cast<double>( 0 ) );    
00285                         double LLR;
00286                         unsigned long tabcpt=0;
00287                         unsigned long cpt=0;
00288                         unsigned long scoreField=6;
00289 
00290                         getSegmentalMeanCovWithoutMax(config, inputList, nbLoc, tarTab, meanTab, covTab); //return target idx in tarTab          
00291                         inputList.rewind();
00292 
00293                         for (unsigned long i=0; i < inputList.getLineCount(); i++)      // Loop in the XList
00294                         {
00295                                 String subType=inputList.getLine(i).getElement(0); 
00296                                 String event=inputList.getLine(i).getElement(1);
00297                                 String channel="1";
00298                                 String source=inputList.getLine(i).getElement(3);
00299                                 String start=inputList.getLine(i).getElement(4);
00300                                 String duration=inputList.getLine(i).getElement(5);     
00301 
00302                                 if (cpt%nbLoc==0 && cpt!=0) 
00303                                         {tabcpt++;}                             // Increments when changing segment
00304                                 double tmpScore=inputList.getLine(i).getElement(scoreField).toDouble();
00305                                 double mean=0.0;
00306                                 double cov=0.0;
00307                                 if (cpt==tarTab[tabcpt]) // if this score is the highest
00308                                         {
00309                                         mean=meanTab[tabcpt]/(nbLoc-1); // don't remove score from mean & cov accumulators
00310                                         cov=covTab[tabcpt]/(nbLoc-1)-(mean*mean); 
00311                                         }
00312                                 else
00313                                         {
00314                                         mean=(meanTab[tabcpt]-tmpScore)/(nbLoc-2); // remove score from mean & cov accumulators
00315                                         cov=(covTab[tabcpt]-(tmpScore*tmpScore))/(nbLoc-2)-(mean*mean); 
00316                                         }       
00317                                 LLR=(tmpScore-mean)/(sqrt(cov));
00318                                 cpt++;
00319                                 outputResultLIARALLine(subType,event,channel,source,start,duration, LLR,outFile);
00320                         }
00321                         delete []meanTab;
00322                         delete []covTab;
00323                         delete []tarTab;
00324                 }
00325 
00326                 else if ( Mode == "ETF")
00327                 {
00328 
00329                         unsigned long nbLoc=config.getParam("nbLoc").toLong(); 
00330                         unsigned long dimTabs=(unsigned long)(inputList.getLineCount()/nbLoc);  //number of segments is number of lines divided by nb of segmentsi
00331                         unsigned long * tarTab=new unsigned long [dimTabs];   //tab for target speaker idx
00332                         std::fill_n( tarTab, dimTabs, static_cast<unsigned long>( 0 ) );
00333                         double LLR;
00334                         String decision="unknown";
00335                         unsigned long scoreField=6;
00336                         unsigned long tabcpt=0;
00337                         unsigned long cpt=0;
00338 
00339                         ofstream TARFile("MAX",ios::out | ios::trunc);
00340                         ofstream NONFile("MIN",ios::out | ios::trunc);
00341 
00342                         if      (config.existsParam("hard"))                            //For Hard Decision, assume tests are not cross-gendered
00343                         {
00344                                 cerr << "W: Applying a hard decision, 1 decision by segment" << endl;
00345                                 getTarClientIdx(config, inputList, nbLoc, tarTab); //return target idx in tarTab                 
00346                                 inputList.rewind();
00347                         }
00348 
00349                         for (unsigned long i=0; i < inputList.getLineCount(); i++)                      // Loop in the XList
00350                         {
00351                                 String type="spk";
00352                                 String subType;
00353                                 if (inputList.getLine(i).getElement(0)=="F") 
00354                                         {subType="female";}
00355                                 else if (inputList.getLine(i).getElement(0)=="M")
00356                                         {subType="male";}
00357                                 else {subType="unknown";}
00358                                 String event=inputList.getLine(i).getElement(1);
00359                                 String channel="1";
00360                                 String source=inputList.getLine(i).getElement(3);
00361                                 String start=inputList.getLine(i).getElement(4);
00362                                 double duration=inputList.getLine(i).getElement(5).toDouble()-inputList.getLine(i).getElement(4).toDouble()-0.01;       
00363 
00364                                 LLR=inputList.getLine(i).getElement(scoreField).toDouble(); // get Score according to the Scorefield
00365                                 if (config.existsParam("hard"))
00366                                 {
00367                                         if (cpt==tarTab[tabcpt])
00368                                         {
00369                                                 decision=getLongDecision(inputList.getLine(i).getElement(scoreField).toDouble(),threshold);
00370                                                 outputResultETFLine(source, channel, start, duration, type, subType, event, LLR, decision, TARFile);
00371                                                 tabcpt++;
00372                                         }
00373                                         else 
00374                                         {       
00375                                                 decision="false";
00376                                                 outputResultETFLine(source, channel, start, duration, type, subType, event, LLR, decision, NONFile);
00377                                         }
00378                                         cpt++;
00379                                 }
00380                                 else
00381                                 {
00382                                         decision=getDecision(inputList.getLine(i).getElement(scoreField).toDouble(),threshold);
00383                                 }
00384                                 outputResultETFLine(source, channel, start, duration, type, subType, event, LLR, decision, outFile);
00385                         }
00386                         delete []tarTab;
00387                 }       
00388                 else if ( Mode == "MDTM")
00389                 {
00390                         unsigned long scoreField=6;
00391                         for (unsigned long i=0; i < inputList.getLineCount(); i++)
00392                         {
00393                                 String type="speaker";
00394                                 String subType;
00395                                 if (inputList.getLine(i).getElement(0)=="F") 
00396                                 {subType="adult_female";} 
00397                                 else if (inputList.getLine(i).getElement(0)=="M")
00398                                 {subType="adult_male";}
00399                                 else if (inputList.getLine(i).getElement(0)=="C")
00400                                 {subType="child";}
00401                                 else {subType="unknown";}
00402                                 String channel=inputList.getLine(i).getElement(1);
00403                                 String source=inputList.getLine(i).getElement(3);
00404                                 String start=inputList.getLine(i).getElement(4);
00405                                 double duration=(inputList.getLine(i).getElement(5).toDouble()-inputList.getLine(i).getElement(4).toDouble());  
00406                                 double LLR=inputList.getLine(i).getElement(scoreField).toDouble();
00407                                 outputResultMDTMLine(source, channel, start, duration, type, LLR, subType, outFile);
00408                         }
00409                 }
00410                 else { cerr << "ERROR: Unknown Mode" << endl; exit(1);}
00411                 outFile.close();
00412 
00413         } // fin try
00414 
00415 catch (Exception& e)
00416         { 
00417                 cout << e.toString().c_str() << endl;
00418         }
00419 return 0;
00420 }
00421 
00422 int WarpScores(Config & config) {
00423         using namespace alize;
00424         using namespace std;
00425         
00426 if (debug) cout <<endl<< "Warp scores"<<endl;
00427 try {
00428         // Defines NIST04 fields  //Nico / remove ASAP, patch in preparation with retrieve info
00429         //unsigned long scoreField=7;
00430         
00431 
00432         if (config.existsParam("debug"))debug=true; else debug=false;  
00433         if (config.existsParam("verbose"))verbose=true; else verbose=false;
00434         String inputFileName = config.getParam("inputNISTFile");
00435         String outputFileName = config.getParam("outputNISTFile");
00436         String inputHistoFilename = config.getParam("inputHisto");
00437         String destHistoFilename = config.getParam("destHisto");
00438         double nonObserved=0.0;
00439         if (config.existsParam("nonObserved")) nonObserved=config.getParam("nonObserved").toDouble();
00440         double area=0.0;
00441         if (config.existsParam("refArea")) area=config.getParam("refArea").toDouble();   
00442         String  trainTypeTest, adaptationMode, segTypeTest, gender, clientName, decision, seg;
00443         double newLLR, LLR=0.0;
00444         unsigned long i=0;
00445         XList inputList(inputFileName,config);
00446 if (debug) cout <<"file["<<inputFileName<<"] loaded"<<endl;
00447         Histo destH;
00448         destH.load(destHistoFilename);
00449         Histo inputH;
00450         inputH.load(inputHistoFilename);
00451         if (debug) cout <<"histo loaded"<<endl;
00452         ofstream outFile(outputFileName.c_str(),ios::out | ios::trunc);
00453         /*      
00454         if (verbose) cerr<< "Compute Input Histo" << endl;
00455         while (i < inputList.getLineCount()) { //building histo of input file
00456                         LLR=inputList.getLine(i).getElement(scoreField).toDouble();
00457                         input.accumulateValue(LLR);
00458                         i++;
00459                         }
00460         input.computeHisto();
00461         */
00462         /*
00463         i=0;
00464         while (i < inputRef.getLineCount()) { //building histo of reference file
00465           LLR=inputRef.getLine(i).getElement(scoreField).toDouble();
00466           ref.accumulateValue(LLR);
00467           i++;
00468         }
00469         ref.computeHisto();
00470         }
00471         */
00472         if (verbose) cerr << "Warping in process...." << endl;
00473         i=0;
00474         while (i < inputList.getLineCount()) {
00475           String trainTypeTest=inputList.getLine(i).getElement(0);
00476           String adaptationMode=inputList.getLine(i).getElement(1);
00477           String segTypeTest=inputList.getLine(i).getElement(2);
00478           unsigned long genderField=3; // remove all this ASAP patch in preparation
00479           unsigned long nameField=4;
00480           unsigned long segField=5;
00481           String decision="-";
00482           retrieveNISTSegmentInfo(inputList, gender, clientName, seg, genderField,nameField, segField, i);
00483           LLR=inputList.getLine(i).getElement(7).toDouble();
00484           newLLR=scoreWarping(LLR, inputH,destH,nonObserved,area);
00485           outputResultNIST04Line(trainTypeTest,adaptationMode,segTypeTest,gender,clientName,seg,decision,newLLR,outFile);
00486           i++; 
00487         }
00488 } // fin try
00489 
00490 catch (Exception& e)
00491         {cout << e.toString().c_str() << endl;}
00492 return 0;       
00493 }
00494 
00495 #endif //!defined(ALIZE_Scoring_cpp)