GeneralTools.cpp

Go to the documentation of this file.
00001 /*
00002 This file is part of LIA_RAL which is a set of software based on ALIZE
00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL.
00004 
00005 LIA_RAL project is a development project was initiated by the computer
00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon -
00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it
00008 was supported by two national projects of the French Research Ministry:
00009         - TECHNOLANGUE program [http://www.technolangue.net]
00010         - MISTRAL program [http://mistral.univ-avignon.fr]
00011 
00012 LIA_RAL is free software: you can redistribute it and/or modify
00013 it under the terms of the GNU Lesser General Public License as
00014 published by the Free Software Foundation, either version 3 of
00015 the License, or any later version.
00016 
00017 LIA_RAL is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU Lesser General Public License for more details.
00021 
00022 You should have received a copy of the GNU Lesser General Public
00023 License along with LIA_RAL.
00024 If not, see [http://www.gnu.org/licenses/].
00025 
00026 The LIA team as well as the LIA_RAL project team wants to highlight the
00027 limits of voice authentication in a forensic context.
00028 The "Person Authentification by Voice: A Need of Caution" paper
00029 proposes a good overview of this point (cf. "Person
00030 Authentification by Voice: A Need of Caution", Bonastre J.F.,
00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00032 chagnolleau I., Eurospeech 2003, Genova].
00033 The conclusion of the paper of the paper is proposed bellow:
00034 [Currently, it is not possible to completely determine whether the
00035 similarity between two recordings is due to the speaker or to other
00036 factors, especially when: (a) the speaker does not cooperate, (b) there
00037 is no control over recording equipment, (c) recording conditions are not
00038 known, (d) one does not know whether the voice was disguised and, to a
00039 lesser extent, (e) the linguistic content of the message is not
00040 controlled. Caution and judgment must be exercised when applying speaker
00041 recognition techniques, whether human or automatic, to account for these
00042 uncontrolled factors. Under more constrained or calibrated situations,
00043 or as an aid for investigative purposes, judicious application of these
00044 techniques may be suitable, provided they are not considered as infallible.
00045 At the present time, there is no scientific process that enables one to
00046 uniquely characterize a persones voice or to identify with absolute
00047 certainty an individual from his or her voice.]
00048 
00049 Copyright (C) 2004-2010
00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr]
00051 LIA_RAL admin [alize@univ-avignon.fr]
00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_GeneralTools_cpp)
00056 #define ALIZE_GeneralTools_cpp
00057 
00058 #include <iostream>
00059 #include <fstream>  // pour outFile
00060 #include <cstdio>   // pour printf()
00061 #include <cassert> // pour le debug pratique
00062 #include <cmath>
00063 
00064 #include "liatools.h"
00065 
00066 //----------------------- ComputeTest stuff --------------------------------------
00067 
00068 // --------------------------------------------------------------------------------
00069 unsigned long TabClientLine::nbClientLine(){
00070   return nbModelsLine; 
00071 }
00072 // --------------------------------------------------------------------------------
00073 String& TabClientLine::getClientName(unsigned long nClient){
00074   //First element of line is the segment
00075   return pline->getElement(nClient+1);
00076 }
00077 // --------------------------------------------------------------------------------
00078 MixtureGD& TabClientLine::getClientModel(unsigned long nClient){
00079   return *tabModel[nClient];   
00080 } 
00081 
00082 // --------------------------------------------------------------------------------
00083 // Should be added - one step initialisation with a complete mixture server - TODO
00084 /*unsigned long initialize(MixtureServer * m,Config &config, unsigned long maxModel=CST_MAX_MODEL_LINE)
00085   { 
00086   return 0; // Number of client models pre-loaded
00087   }
00088 */
00089 // --------------------------------------------------------------------------------
00090 unsigned long TabClientLine::loadLine(XLine* linep,String label,bool useDefaultClientModel,bool byUserRep){
00091   pline=linep; //Take care; the XLine is not stored but only a pointor on it !
00092   String * pModelName;  
00093   nbModelsLine=0;  
00094   bool useLabel=(label!=""); 
00095   pline->getElement(1);
00096   while (((pModelName=pline->getElement()) != NULL) && (nbModelsLine<nbMaxModelLine)){ 
00097     String modelName;
00098     if (useLabel)    
00099       modelName=*pModelName+"_"+label;                            // Use ID+label mode for multipl e models by speaker
00100     else  
00101       modelName=*pModelName;                                      // Normal mode
00102     int indexModel=ms->getMixtureIndex(modelName);
00103     if (indexModel==-1){                                          // the model is not already loaded in the mixture server
00104       String file;
00105       if (byUserRep) file=*pModelName+"/"+modelName;
00106       else file=modelName;
00107       try{  
00108         MixtureGD & m=ms->loadMixtureGD(file);
00109         tabModel[nbModelsLine]=&m; 
00110         nbModelsLine++;                                          // A model is added to the line
00111       }
00112       catch (Exception& e){ 
00113         cout << "WARNING, model ["<<modelName <<"] not found "<< endl;
00114  
00115         if ((label!="")&&(useDefaultClientModel)){
00116           cout << "Trying to use speaker general model["<<*pModelName<<"]"<<endl;
00117           MixtureGD & m=ms->loadMixtureGD(*pModelName);
00118           ms->setMixtureId(m,modelName); 
00119           tabModel[nbModelsLine]=&m;
00120           if (verbose) cout << "model loaded"<<endl;
00121           nbModelsLine++;                                          // A model is added to the line
00122         }
00123       }
00124     }
00125     else {
00126       tabModel[nbModelsLine]=&(ms->getMixtureGD(indexModel));     // the model is already in the memory and is added to the line
00127       nbModelsLine++;
00128     }
00129   }
00130   if (nbModelsLine>=nbMaxModelLine) cerr << "TabClientLine::loadLine() - nb model line > nbMaxModelLine)" << endl;      
00131   return nbModelsLine;
00132 }
00133 
00134 // --------------------------------------------------------------------------------
00135 TabClientLine::TabClientLine(MixtureServer & m, Config &config, unsigned long maxModel)
00136 {
00137   nbMaxModelLine=maxModel;
00138   ms=&m;
00139   conf=&config;
00140   tabModel=new MixtureGD*[nbMaxModelLine];
00141   pline=NULL;
00142 }
00143 //---------------------------------------------------------------------------------     
00144 TabClientLine::~TabClientLine() {delete []tabModel;}
00145 //---------------------------------------------------------------------------------
00146 //----------------------- end of TabClientLine definition -------------------------
00147 
00148 
00149 //---------------------------------------------------------------------------------
00150 //-----------------------  TabHisto definition ----------------------------------
00151 
00152   Histo & TabHisto::getHistoFromVect(unsigned long n) {
00153         Object & tmp=_tabHisto.getObject(n);
00154         return (Histo&)tmp;
00155   }
00156 
00157 void TabHisto::accumulateValueInTab(const String &id,double score){
00158         long n=_id.getIndex(id); // was unsigned long N.S. 23/09/05
00159         if (n==-1) {
00160                 if (debug) cerr << id << " unknown :";
00161                 _id.addElement(id);
00162                 _nb++;
00163                 n=_id.getIndex(id);
00164                 if (debug) cerr << id << " now index "<< n << endl;
00165         }
00166         getHistoFromVect(n).accumulateValue(score);
00167 }
00168 
00169 void TabHisto::computeHistoInTab(const String &id){
00170   return getHisto(id).computeHisto();
00171 }
00172 unsigned long TabHisto::getIndex(const String &id){
00173   long n=_id.getIndex(id);
00174   if (n==-1)throw Exception("out of array", __FILE__, __LINE__);
00175   else return n;
00176 }
00177 String& TabHisto::getId(unsigned long n){
00178   if (n>_nb)  throw Exception("out of array" , __FILE__, __LINE__);
00179   else return _id.getElement(n);
00180 }
00181 Histo & TabHisto::getHisto(const String & id)
00182 {
00183         unsigned long n=getIndex(id);
00184         if (n>_nb)  throw Exception("out of array" , __FILE__, __LINE__);
00185         else {unsigned long n=getIndex(id);
00186         return getHistoFromVect(n);}
00187 }
00188 //---------------------------------------------------------------------------------
00189 //----------------------- end of TabHisto definition ----------------------------
00190 
00191 
00192 //-----------------------------------------------------------------------------------
00193 // Compute the entropy from an Histo
00194 double computeEntropy(Histo & hist)
00195 {
00196         // integral is done by summing with a step (weighted by log(x) function)
00197         double prob=0.0;
00198         double entropy=0.0;
00199         double bound=hist.lowerBound(0);
00200         double step=0.001;
00201         while (bound < hist.higherBound(hist.size()-1)) {
00202                 prob=hist(bound); // get density
00203                 if (prob<1e-20) {entropy+=0.0;} // take care of lim x->0 x*log(x) (=0)
00204                 else {entropy+=-prob*step*log(prob);} //get the area of the bin to get P(x), indeed P(x)=sum_x(p(x))
00205                 bound+=step;
00206                 //cerr << "prob: "<<prob<<" entropy: "<<entropy<<endl;
00207         }
00208 return entropy;
00209 }
00210 
00211 //-----------------------------------------------------------------------------------
00212 // Compute mean from an Histo
00213 double computeMean(Histo & hist)
00214 {
00215         // integral is done by summing with a step (weighted by log(x) function)
00216         double prob=0.0;
00217         double mean=0.0;
00218         double bound=hist.lowerBound(0);
00219         double step=0.001;
00220         while (bound < hist.higherBound(hist.size()-1)) {
00221                 prob=step*hist(bound);
00222                 mean+=bound*prob; //get the area of the bin to get P(x), indeed P(x)=sum_x(p(x))
00223                 bound+=step;
00224                 //cerr << "prob: "<<prob<<" mean: "<<mean<<endl;
00225         }
00226 return mean;
00227 }
00228 
00229 
00230 // --------------------------------------------------------
00231 // Decision function ...
00232 long setDecision(double LLRClient, double decisionThreshold)
00233 {
00234   if (LLRClient>=decisionThreshold) return 1; else return  0;
00235 }
00236 
00237 //---------------------------------------------------------------------------------
00238 //-----------------------  ScoreAccum definition ----------------------------------
00239 
00240 void ScoreAccum::addAndAccumulate(const String &id,double score, unsigned long nbFrames){
00241   double value=score*(double)nbFrames;
00242   long n=_id.getIndex(id);
00243   if (n==-1){
00244     _score.addValue(value);
00245     _nbFrame.addValue(nbFrames);
00246     _id.addElement(id);
00247     _nb++;
00248   }
00249   else{
00250     _score[n]+=value;
00251     _nbFrame[n]+=nbFrames;
00252   }
00253 }
00254 double ScoreAccum::getScore(const String &id){
00255   return getScore(getIndex(id));
00256 }
00257 unsigned long ScoreAccum::getIndex(const String &id){
00258   long n=_id.getIndex(id);
00259   if (n==-1)throw Exception("out of array", __FILE__, __LINE__);
00260   else return n;
00261 }
00262 double  ScoreAccum::getScore(unsigned long n){
00263   if (_nbFrame[n]==0)throw Exception("No score accumulated", __FILE__, __LINE__); // Could also make and exception index out of bound
00264   double ret=_score[n]; // Could return an exception index out of bound
00265   ret/=_nbFrame[n];
00266   return ret;
00267 }
00268 String& ScoreAccum::getId(unsigned long n){
00269   if (n>_nb)  throw Exception("out of array" , __FILE__, __LINE__);
00270   else return _id.getElement(n);
00271 }
00272 //---------------------------------------------------------------------------------
00273 //----------------------- end of ScoreAccum definition ----------------------------
00274 
00275 //
00276 // Component selection functions
00277 int _compF(const void * op1, const void *op2){
00278   if (((TabWeightElem*)op1)->weight>((TabWeightElem*)op2)->weight)
00279     return -1; else return 1;
00280 }
00281 void TabWeight::init(const MixtureGD &model,unsigned long topDistribs){
00282   _size=model.getDistribCount();
00283   _tab=new TabWeightElem[_size];
00284   _sortByWeight(model);
00285   _nbTop=topDistribs;
00286 }
00287 void TabWeight::init(const MixtureGD &model,double threshold){
00288   _size=model.getDistribCount();
00289   _tab=new TabWeightElem[_size];
00290   _sortByWeight(model);
00291   _nbTopDyn(threshold);
00292 }
00293 TabWeight::TabWeight(const MixtureGD &model){
00294   init(model,model.getDistribCount());
00295 }
00296 TabWeight::TabWeight(const MixtureGD &model,unsigned long topDistribs){
00297   init(model,topDistribs);
00298 }
00299 TabWeight::TabWeight(const MixtureGD &model,double threshold){
00300   init(model,threshold);
00301 }
00302 
00303 // Random picking of frames (bagging) functions, based on segment/cluster processing
00304 // It is independent of the segment length
00305 // Try to decrease the number of segments for fasting the world model training
00306 // Used mainly in TrainTools.cpp 
00307 // Author: JFB
00308 // Just a function for selecting or not randomly a frame
00309 bool baggedFrame(double baggedFrameProbability){
00310   // return (drand48()< baggedFrameProbability);
00311   double res= ((double)rand()/ (double)RAND_MAX);
00312   //cout << " baggedProba= " << baggedFrameProbability<<" res=" << res<<endl; //TODO revenir a drand48
00313 return (res < baggedFrameProbability);
00314 }
00315 
00316 unsigned long correctedLength(unsigned long length,unsigned long &minimumLength,unsigned long&maximumLength){
00317   if (length<minimumLength) length=minimumLength;
00318   if (length>maximumLength) length=maximumLength;
00319   return length;
00320 }
00321 
00322 void baggedSegmentsConstraint(SegCluster &selectedSegments,SegCluster &baggedFrameSegment,double baggedProbability,
00323                     unsigned long minimumLength,unsigned long maximumLength){
00324         do{
00325                 baggedSegments(selectedSegments,baggedFrameSegment,baggedProbability,minimumLength,maximumLength);
00326         }while(totalFrame(baggedFrameSegment) == 0);
00327 }
00328 // Works on a set of bagged clusters - only one reading of the
00329 // segments and multiple selections, one by bagged cluster
00330 void baggedSegments(SegCluster &selectedSegments,SegCluster &baggedSeg,unsigned long nbBagged,double & baggedProbability,
00331                     unsigned long minimumLength,unsigned long maximumLength){  
00332  if (debug) cout << "begin of baggedSegments !!!"<<endl;
00333   Seg* seg;                                                     // reset the reader at the begin of the input stream
00334   selectedSegments.rewind();      
00335   seg=selectedSegments.getSeg();
00336   bool end=(seg==NULL);
00337   unsigned long beginSeg=0,lengthSeg=0;
00338   if (!end){
00339     beginSeg=seg->begin();
00340     lengthSeg=seg->length();
00341   }
00342   while(!end){
00343     if (debug) cout << "bagged, current input seg ["<<beginSeg<<","<<lengthSeg<<"]"<<endl;
00344     unsigned long  verifyLength=correctedLength(lengthSeg,minimumLength,maximumLength);
00345     bool moveSeg=true;
00346     unsigned long length=0;
00347     if (lengthSeg<=verifyLength){
00348       moveSeg=true;
00349       length=lengthSeg;
00350       if (debug) cout <<"change seg"<<endl;
00351     }
00352     else{
00353       moveSeg=false;
00354       length=verifyLength;
00355     }
00356     // for all cluster in baggedA
00357     SegServer &segServerOutput=baggedSeg.getServer();
00358     if (length>0){
00359                 for (unsigned long idx=0;idx<nbBagged;idx++) // For each component
00360                 if(baggedFrame(baggedProbability)){
00361                         Seg &newSeg=segServerOutput.createSeg(beginSeg,length,idx,seg->string(),seg->sourceName());       
00362                         baggedSeg.add(newSeg);
00363                         if (debug) cout << "bagged - Adding in bagged["<<idx<<"] the seg ["<<seg->sourceName()<<"]"<<newSeg.begin()<<" "<<newSeg.length()<<endl;   
00364                 }
00365     }
00366     if (moveSeg){
00367         seg=selectedSegments.getSeg();
00368       end=(seg==NULL);
00369       if (!end){
00370         beginSeg=seg->begin();
00371         lengthSeg=seg->length();
00372       }
00373     }
00374     else{
00375       lengthSeg-=length;
00376       beginSeg+=length;
00377     }
00378   } 
00379   
00380   if ((debug) || (verboseLevel>3)){
00381     cout <<"Bagged segments"<<endl;
00382         showCluster(baggedSeg);
00383     }
00384   if (verbose){
00385     unsigned long total=totalFrame(selectedSegments);
00386         unsigned long selected=totalFrame(baggedSeg);
00387         double percent=(double)selected*100/(double) total;
00388         cout <<"Bagged segments, Initial frames["<<total<<"] Selected frames["<<selected<<"] % selected["<<percent<<"]"<<endl;
00389   }
00390 }
00391 /*
00392 void baggedSegments(SegCluster &selectedSegments,RefVector<SegCluster> &baggedA,double & baggedProbability,
00393                     unsigned long minimumLength,unsigned long maximumLength){  
00394   Seg* seg;                                                     // reset the reader at the begin of the input stream
00395   selectedSegments.rewind();      
00396   seg=selectedSegments.getSeg();
00397   bool end=(seg==NULL);
00398   unsigned long beginSeg=0,lengthSeg=0;
00399   if (!end){
00400     beginSeg=seg->begin();
00401     lengthSeg=seg->length();
00402   }
00403   while(!end){
00404     if (debug) cout << "bagged, current input seg ["<<beginSeg<<","<<lengthSeg<<"]"<<endl;
00405     unsigned long  verifyLength=correctedLength(lengthSeg,minimumLength,maximumLength);
00406     bool moveSeg=true;
00407     unsigned long length=0;
00408     if (lengthSeg<=verifyLength){
00409       moveSeg=true;
00410       length=lengthSeg;
00411       if (debug) cout <<"change seg"<<endl;
00412     }
00413     else{
00414       moveSeg=false;
00415       length=verifyLength;
00416     }
00417     // for all cluster in baggedA
00418     if (length>0)
00419         for (unsigned long idx=0;idx<baggedA.size();idx++) // For each component
00420             if(baggedFrame(baggedProbability)){
00421                 SegServer &segServerOutput=baggedA[idx].getServer();
00422                 Seg &newSeg=segServerOutput.createSeg(beginSeg,length,0,seg->string(),seg->sourceName());       
00423                 baggedA[idx].add(newSeg);
00424                 if (debug) cout << "bagged - Adding in bagged["<<idx<<"] the seg ["<<seg->sourceName()<<"]"<<newSeg.begin()<<" "<<newSeg.length()<<endl;   
00425             }
00426     if (moveSeg){
00427         seg=selectedSegments.getSeg();
00428       end=(seg==NULL);
00429       if (!end){
00430         beginSeg=seg->begin();
00431         lengthSeg=seg->length();
00432       }
00433     }
00434     else{
00435       lengthSeg-=length;
00436       beginSeg+=length;
00437     }
00438   } 
00439   if ((debug) || (verboseLevel>3)){
00440     cout <<"Bagged segments"<<endl;
00441     for (unsigned long idx=0;idx<baggedA.size();idx++){
00442         cout << "Bagged cluster["<<idx<<"]"<<endl;
00443         showCluster(baggedA[idx]);
00444     }
00445   }
00446   if (verbose){
00447     unsigned long total=totalFrame(selectedSegments);
00448     for (unsigned long idx=0;idx<baggedA.size();idx++){
00449         unsigned long selected=totalFrame(baggedA[idx]);
00450         double percent=(double)selected*100/(double) total;
00451         cout <<"Bagged segments["<<idx<<"] Initial frames["<<total<<"] Selected frames["<<selected<<"] % selected["<<percent<<"]"<<endl;
00452     }
00453   }
00454 }*/
00455 void baggedSegments(SegCluster &selectedSegments,SegCluster &baggedFrameSegment,double baggedProbability,
00456                     unsigned long minimumLength,unsigned long maximumLength){  
00457   SegServer &segServerOutput=baggedFrameSegment.getServer();
00458   Seg* seg;                                                     // reset the reader at the begin of the input stream
00459   selectedSegments.rewind();      
00460   seg=selectedSegments.getSeg();
00461   bool end=(seg==NULL);
00462   unsigned long beginSeg=0,lengthSeg=0;
00463   if (!end){
00464     beginSeg=seg->begin();
00465     lengthSeg=seg->length();
00466   }
00467   while(!end){
00468     if (debug) cout << "bagged, current input seg ["<<beginSeg<<","<<lengthSeg<<"]"<<endl;
00469     unsigned long  verifyLength=correctedLength(lengthSeg,minimumLength,maximumLength);
00470     double segBaggedProbability=baggedProbability;
00471     bool moveSeg=true;
00472     unsigned long length=0;
00473     if (lengthSeg<=verifyLength){
00474       moveSeg=true;
00475       length=lengthSeg;
00476       if (debug) cout <<"change seg"<<endl;
00477     }
00478     else{
00479       moveSeg=false;
00480       length=verifyLength;
00481     }
00482     if ((length>0) &&(baggedFrame(segBaggedProbability))){
00483       Seg &newSeg=segServerOutput.createSeg(beginSeg,length,0,seg->string(),seg->sourceName());       
00484       baggedFrameSegment.add(newSeg);
00485       if (debug) cout << "bagged - Adding the seg ["<<seg->sourceName()<<"]"<<newSeg.begin()<<" "<<newSeg.length()<<endl;   
00486     }
00487     if (moveSeg){
00488       seg=selectedSegments.getSeg();
00489       end=(seg==NULL);
00490       if (!end){
00491         beginSeg=seg->begin();
00492         lengthSeg=seg->length();
00493       }
00494     }
00495     else{
00496       lengthSeg-=length;
00497       beginSeg+=length;
00498     }
00499   } 
00500   if ((debug) || (verboseLevel>3)){
00501     cout <<"Bagged segments"<<endl;
00502     showCluster(baggedFrameSegment);
00503   }
00504   if (verbose){
00505     unsigned long total=totalFrame(selectedSegments);
00506     unsigned long selected=totalFrame(baggedFrameSegment);
00507     double percent=(double)selected*100/(double) total;
00508     cout <<"Bagged segments, Initial frames["<<total<<"] Selected frames["<<selected<<"] % selected["<<percent<<"]"<<endl;
00509   }
00510 }
00511 // Same but returns both selected and unselected clusters
00512 // Take care, both clusters should be created in the same server - NOT TESTED
00513 void baggedSegments(SegCluster &selectedSegments,SegCluster &baggedSelected,SegCluster &baggedUnselected,double baggedProbability,
00514                     unsigned long minimumLength,unsigned long maximumLength){  
00515   SegServer &segServerOutput=baggedSelected.getServer();
00516   Seg* seg;                                                     // reset the reader at the begin of the input stream
00517   selectedSegments.rewind();      
00518   seg=selectedSegments.getSeg();
00519   bool end=(seg==NULL);
00520   unsigned long beginSeg=0,lengthSeg=0;
00521   if (!end){
00522     beginSeg=seg->begin();
00523     lengthSeg=seg->length();
00524   }
00525   while(!end){
00526     if (debug) cout << "bagged, current input seg ["<<beginSeg<<","<<lengthSeg<<"]"<<endl;
00527     unsigned long  verifyLength=correctedLength(lengthSeg,minimumLength,maximumLength);
00528     double segBaggedProbability=baggedProbability;
00529     bool moveSeg=true;
00530     unsigned long length=0;
00531     if (lengthSeg<=verifyLength){
00532       moveSeg=true;
00533       length=lengthSeg;
00534       if (debug) cout <<"change seg"<<endl;
00535     }
00536     else{
00537       moveSeg=false;
00538       length=verifyLength;
00539     }
00540     if (length>0){
00541         Seg &newSeg=segServerOutput.createSeg(beginSeg,length,0,seg->string(),seg->sourceName());       
00542         if(baggedFrame(segBaggedProbability)){
00543          baggedSelected .add(newSeg);
00544          if (debug) cout << "baggedSelected - Adding the seg ["<<seg->sourceName()<<"]"<<newSeg.begin()<<" "<<newSeg.length()<<endl;   
00545         }
00546         else{
00547           baggedUnselected.add(newSeg);
00548           if (debug) cout << "baggedUnselected - Adding the seg ["<<seg->sourceName()<<"]"<<newSeg.begin()<<" "<<newSeg.length()<<endl;   
00549          }
00550     }
00551     if (moveSeg){
00552       seg=selectedSegments.getSeg();
00553       end=(seg==NULL);
00554       if (!end){
00555         beginSeg=seg->begin();
00556         lengthSeg=seg->length();
00557       }
00558     }
00559     else{
00560       lengthSeg-=length;
00561       beginSeg+=length;
00562     }
00563   } 
00564   if ((debug) || (verboseLevel>3)){
00565     cout <<"BaggedSelected"<<endl;
00566     showCluster(baggedSelected);
00567     cout <<"BaggedUnselected"<<endl;
00568     showCluster(baggedUnselected);
00569   }
00570   if (verboseLevel>1){
00571     unsigned long total=totalFrame(selectedSegments);
00572     unsigned long selected=totalFrame(baggedSelected);
00573     double percent=(double)selected*100/(double) total;
00574     cout <<"Bagged segments, Initial frames["<<total<<"] Selected frames["<<selected<<"] % selected["<<percent<<"]"<<endl;
00575   }
00576 }
00577 
00578 //-------------------------------------------------------------------------
00579 //-- Compute the mean log likelihood for the Selected frames and a given model
00580 double meanLikelihood(StatServer &ss,FeatureServer &fs,MixtureGD &model,unsigned long idxBeginFrame,unsigned long nbFrames,Config &config)  {
00581   MixtureStat &llkAcc=ss.createAndStoreMixtureStat(model);
00582   llkAcc.resetLLK();
00583   accumulateStatLLK(ss,fs,llkAcc,idxBeginFrame,nbFrames,config); 
00584   double llk=llkAcc.getMeanLLK();
00585   ss.deleteMixtureStat(llkAcc);
00586   return llk;
00587 }
00588 // one a cluster
00589 double meanLikelihood(StatServer &ss,FeatureServer &fs,MixtureGD &model,SegCluster &selectedSegments,
00590                       Config &config){
00591   MixtureStat &llkAcc=ss.createAndStoreMixtureStat(model);
00592   llkAcc.resetLLK();
00593   accumulateStatLLK(ss,fs,llkAcc,selectedSegments,config);
00594   double llk=llkAcc.getMeanLLK();
00595   ss.deleteMixtureStat(llkAcc);
00596   return llk;
00597 }
00598 // on a set of input streams
00599 double meanLikelihood(StatServer &ss,FeatureServer **fsTab,SegCluster **segTab,unsigned long nbStream,MixtureGD &model,Config &config){
00600   MixtureStat &llkAcc=ss.createAndStoreMixtureStat(model);
00601   llkAcc.resetLLK();
00602   for (unsigned long stream=0;stream<nbStream;stream++)
00603     accumulateStatLLK(ss,*fsTab[stream],llkAcc,*segTab[stream],config);
00604   double llk=llkAcc.getMeanLLK();
00605   ss.deleteMixtureStat(llkAcc);
00606   return llk;
00607 }
00608 
00609 //A.P.
00610 double meanLikelihood(StatServer & ss, ObjectRefVector & FeatServ,
00611   ObjectRefVector & ClusterSeg, MixtureGD & model, DoubleVector & decision,
00612   Config & config)
00613 {
00614   MixtureStat & llkAcc = ss.createAndStoreMixtureStat(model);
00615   llkAcc.resetLLK();
00616   for (unsigned long nbFs = 0; nbFs < FeatServ.size(); nbFs++)
00617     accumulateStatLLK(ss,
00618       (static_cast < FeatureServer & >(FeatServ.getObject(nbFs))), llkAcc,
00619       (static_cast < SegCluster & >(ClusterSeg.getObject(nbFs))),
00620       decision[nbFs], config);
00621   double llk = llkAcc.getMeanLLK();
00622   ss.deleteMixtureStat(llkAcc);
00623   return llk;
00624 }
00625 
00626 
00627 //-------------------------------------------------------------------------
00628 //-- Compute the mean and cov of selected the data using segmental mode
00629 void globalMeanCov (FeatureServer &fs,SegCluster &selectedSegments,FrameAcc & globalFrameAcc,Config &config)  {
00630   globalFrameAcc.reset();  
00631   accumulateStatFrame(globalFrameAcc,fs,selectedSegments,config);  
00632 }
00633 // On a complete feature stream
00634 void globalMeanCov (FeatureServer &fs,FrameAcc & globalFrameAcc,Config &config)  {
00635   globalFrameAcc.reset();  
00636   accumulateStatFrame(globalFrameAcc,fs,0,fs.getFeatureCount(),config);  
00637 }
00638 
00639 // ----------------------------------------------------------------------------------------------------------
00640 // Feature Warping giving a source(tab of histo, one by coeff) and a target distribution 
00641 // for a segment and cluster (segment is the minimum time unit to perform this)
00642 void computeWarp(Histo *histoT,Histo &destH,FeatureServer & fs,unsigned long begin, unsigned long length,Config &config) {  
00643   unsigned long vectsize=fs.getVectSize();                                       // Get the vect size (number of coeff)
00644   Feature f;
00645   fs.seekFeature(begin);
00646   for (unsigned long idxFrame=0;idxFrame<length;idxFrame++){                     // for all the features of the segment
00647     fs.readFeature(f,0);  
00648     // Get the feature;
00649     for (unsigned int i = 0; i < vectsize; i++){    // For each coeff
00650       f[i]=warping(f[i],histoT[i],destH);           // Apply the warping function
00651     }
00652     fs.writeFeature(f);
00653   }
00654 }
00655 // on a segment
00656 void computeWarp(Histo *histoT,Histo &destH, FeatureServer & fs, Seg* seg,Config & config){
00657   unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); // Idx of the first frame of the current file in the feature server
00658   computeWarp(histoT,destH,fs,begin,seg->length(),config);      
00659 }
00660 // on a cluster
00661 void computeWarp(Histo *histoT,Histo &destH, FeatureServer & fs, SegCluster & selectedSegments, Config & config){
00662   Seg *seg;                                                             // current selectd segment
00663   selectedSegments.rewind();                                            // reset the reader at the begin of the input stream
00664   while((seg=selectedSegments.getSeg())!=NULL)                          // For each of the selected segments
00665     computeWarp(histoT,destH,fs,seg,config);                            // Normalize the features
00666 }
00667 
00668 // ----------------------------------------------------------------------------------------------------------
00669 // Feature Mean subtraction and Cov reduction for a segment and cluster (segment is considerred to be the minimum time unit to perform this).
00670 void computeZeroOne(const DoubleVector &featureMean,const DoubleVector &featureStd,FeatureServer & fs,unsigned long begin, unsigned long length,Config &config) {
00671   unsigned long vectsize=fs.getVectSize();                                       // Get the vect size (number of coeff)
00672   Feature f;
00673   fs.seekFeature(begin);
00674   for (unsigned long idxFrame=0;idxFrame<length;idxFrame++){                     // for all the features of the segment
00675     fs.readFeature(f,0);  
00676     // Get the feature;
00677     for (unsigned int i = 0; i < vectsize; i++)       {                  // For each coeff
00678       f[i]=(f[i]-featureMean[i])/featureStd[i];    // Apply the 0 mean 1 cov normalisation
00679     }
00680     fs.writeFeature(f);
00681   }
00682 }
00683 void computeZeroOne(FrameAccGD &frameAccu,FeatureServer & fs,unsigned long begin, unsigned long length,Config &config) {  
00684   const DoubleVector & featureMean = frameAccu.getMeanVect();                     // Get the mean vector
00685   const DoubleVector & featureStd = frameAccu.getStdVect();                       // Get the std vector (sqrt(cov))
00686   computeZeroOne(featureMean,featureStd,fs,begin,length,config);
00687 }
00688 // on a segment
00689 void computeZeroOne(FrameAccGD & frameAccu, FeatureServer & fs, Seg* seg, Config & config){
00690   const DoubleVector & featureMean = frameAccu.getMeanVect();                     // Get the mean vector
00691   const DoubleVector & featureStd = frameAccu.getStdVect();                       // Get the std vector (sqrt(cov))
00692   unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName());          // Idx of the first frame of the current file in the feature server
00693   computeZeroOne(featureMean,featureStd,fs,begin,seg->length(),config);                      // Normalize the feature to fit 0 mean, 1 cov
00694 }
00695 void computeZeroOne(const DoubleVector &featureMean,const DoubleVector &featureStd, FeatureServer & fs, Seg* seg, Config & config){
00696   unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName());          // Idx of the first frame of the current file in the feature server
00697   computeZeroOne(featureMean,featureStd,fs,begin,seg->length(),config);                      // Normalize the feature to fit 0 mean, 1 cov
00698 }
00699 // on a cluster
00700 void computeZeroOne(FrameAccGD & frameAccu, FeatureServer & fs, SegCluster & selectedSegments, Config & config){
00701   const DoubleVector & featureMean = frameAccu.getMeanVect();                     // Get the mean vector
00702   const DoubleVector & featureStd = frameAccu.getStdVect();                       // Get the std vector (sqrt(cov))
00703   computeZeroOne(featureMean,featureStd,fs,selectedSegments,config); 
00704 }
00705 void computeZeroOne(const DoubleVector &featureMean,const DoubleVector &featureStd, FeatureServer & fs, SegCluster & selectedSegments, Config & config){
00706   if (verbose) cout << "(General Tools) Compute CMS on Feature Server" << endl;
00707   Seg *seg;                                                                         // current selectd segment
00708   selectedSegments.rewind();                                                      // reset the reader at the begin of the input stream
00709   while((seg=selectedSegments.getSeg())!=NULL) {                 // For each of the selected segments
00710     computeZeroOne(featureMean,featureStd,fs,seg,config);}                      // Normalize the feature to fit 0 mean, 1 cov
00711 }
00712 
00713 void cms(String & featureFileName,FeatureServer &fs,Config &config) {
00714         unsigned long begin=fs.getFirstFeatureIndexOfASource(featureFileName);
00715         fs.seekFeature(begin);
00716         SegServer segmentsServer;
00717         LabelServer labelServer;
00718         initializeClusters(featureFileName,segmentsServer,labelServer,config);
00719         verifyClusterFile(segmentsServer,fs,config);
00720         unsigned long codeSelectedFrame=labelServer.getLabelIndexByString(config.getParam("labelSelectedFrames"));
00721         SegCluster& selectedSegments=segmentsServer.getCluster(codeSelectedFrame);  
00722         selectedSegments.rewind();  
00723         RealVector <double> mean,cov;
00724         FrameAccGD frameAccu;
00725         frameAccu.reset();   
00726         accumulateStatFrame(frameAccu,fs, selectedSegments, config);
00727         mean = frameAccu.getMeanVect();     // Get the mean vector
00728         cov  = frameAccu.getStdVect();      // Get the std vector
00729         computeZeroOne(mean,cov,fs, selectedSegments, config);  
00730 }
00731 
00732 // Feature writing in an output stream w - could be used for multiple segmen,ts from multiple files to one file
00733 void outputFeatureFile(Config &config, FeatureServer &fs, Feature & f, FeatureFileWriter &w) { 
00734   fs.readFeature(f);
00735   w.writeFeature(f); 
00736 }
00737 // on a part of a file
00738 void outputFeatureFile(Config &config, FeatureServer &fs, unsigned long begin,unsigned long length, FeatureFileWriter &w) {
00739   Feature f;
00740   fs.seekFeature(begin);
00741   for (unsigned long idxFrame=0;idxFrame<length;idxFrame++){
00742     outputFeatureFile(config, fs, f,w);
00743   }  
00744 }
00745 // on a segment
00746 void outputFeatureFile(Config &config, FeatureServer &fs, Seg * seg, FeatureFileWriter &w) {
00747   unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName());          // Idx of the first frame of the current file in the feature server
00748   if (verbose) cout <<"(GeneralTools) Writing ["<<seg->sourceName()<<"]"<<" begin:"<<begin<<" length:"<<seg->length()<<endl;
00749   outputFeatureFile(config,fs,begin,seg->length(),w);
00750 }
00751 
00752 // on a cluster
00753 void outputFeatureFile(Config &config, FeatureServer &fs, SegCluster & selectedSegments, FeatureFileWriter &w) {
00754   Seg *seg;                                                                         // current selectd segment
00755   selectedSegments.rewind();                                                        // reset the reader at the begin of the input stream
00756   while((seg=selectedSegments.getSeg())!=NULL){
00757     outputFeatureFile(config, fs, seg, w); 
00758   }
00759 }
00760 
00761 // Feature Mapping related functions
00762 unsigned long getBestGaussian(Mixture & M, Feature & f) {
00763   double vrais=0.0;
00764   double _v=0.0;
00765   unsigned long idx=0;
00766   unsigned long model_size=M.getDistribCount();
00767   for (unsigned long g=0;g<model_size;g++){
00768     _v=M.getDistrib(g).computeLK(f)*M.weight(g);
00769     if(_v>vrais) {
00770       idx=g;
00771       vrais=_v;
00772     }
00773   }
00774   return idx;
00775 }
00776 
00777 void mapDataToDistrib(double & data, const double meanData, const double covData, const double meanMap, const double covMap) {
00778   //cout<< data << " mu,std: "<<meanData<<","<<covData<<" "<<meanMap<<","<<covMap << endl;
00779   data=sqrt(covMap/covData)*(data-meanData)+meanMap;
00780 }
00781 
00782 void featureMapping(MixtureServer & ms, Feature & f,Config &config) {
00783   unsigned long vectsize=f.getVectSize();                                       // Get the vect size (number of coeff)
00784   unsigned long idCD=getBestGaussian(ms.getMixture(1),f);                                               // Finding the best gaussian in the sub-model   
00785   DistribGD & DCD=ms.getMixtureGD(1).getDistrib(idCD); // get the gaussian in both models
00786   DistribGD & DCI=ms.getMixtureGD(0).getDistrib(idCD);
00787   for (unsigned int i = 0; i < vectsize; i++) {                                 // map
00788     if (debug && (i==0 || i==1)) cout<<"C:"<<idCD<<"["<<i<<"] CDm,v:"<<DCD.getMean(i)<<","<<DCD.getCov(i)<<" CIm,v: "<<DCI.getMean(i)<<","<<DCI.getCov(i)<< endl;       
00789     mapDataToDistrib(f[i],DCD.getMean(i),DCD.getCov(i),DCI.getMean(i),DCI.getCov(i));}
00790 }
00791 
00792 // on a segment
00793 void featureMapping(MixtureServer & ms, FeatureServer & fs,Seg * seg,Config &config) {
00794   unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); // Idx of the first frame of the current file in the feature server
00795   fs.seekFeature(begin);
00796   Feature f;
00797   for (unsigned long idxFrame=0;idxFrame<seg->length();idxFrame++){                          // for all the features of the segment
00798     fs.readFeature(f,0);                
00799     featureMapping(ms,f,config);
00800     fs.writeFeature(f);
00801   }
00802 }
00803 
00804 // on a cluster
00805 void featureMapping(MixtureServer & ms, FeatureServer & fs,SegCluster & selectedSegments,Config &config) {
00806   Seg *seg;                                                                         // current selectd segment
00807   selectedSegments.rewind();                                                      // reset the reader at the begin of the input stream
00808   while((seg=selectedSegments.getSeg())!=NULL){                                   // For each of the selected segments
00809     featureMapping(ms,fs,seg,config);
00810   }     
00811 }
00812 
00813 // Model based distance
00814 // Authors: JF Bonastre  - Driss Matrouf
00815 //
00816 double likelihoodGD(const DistribGD& d,const DistribGD &m){
00817   double partial=0;
00818   double *dMean=d.getMeanVect().getArray();
00819   double *mMean=m.getMeanVect().getArray();
00820   double *dCov=d.getCovVect().getArray();
00821   double *mCov=m.getCovVect().getArray();
00822   for (unsigned long idxC=0;idxC<d.getVectSize();idxC++){
00823     double meanDiff=dMean[idxC]-mMean[idxC];
00824     partial+=(dCov[idxC]+(meanDiff*meanDiff))/mCov[idxC];
00825   }
00826   return m.getCst()*exp(-0.5*partial);
00827 }
00828 double likelihoodGD(const MixtureGD& data,const MixtureGD &model,TabWeight &tabWD,TabWeight &tabWM){
00829   // could work with a subset of components in the data model
00830   double result=0;
00831   TabWeightElem *dataArray=tabWD.getArray();
00832   TabWeightElem *modelArray=tabWM.getArray();
00833 
00834   for (unsigned long idxDataG=0;idxDataG<tabWD.getNbTop();idxDataG++){
00835     if (debug) cout <<"Distrib Data["<<tabWD.getDistrib(idxDataG)<<"] weight["<<tabWD.getWeight(idxDataG)<<" "<<dataArray[idxDataG].weight<<"]"<<endl;
00836     DistribGD &d=*(dataArray[idxDataG].distribP);
00837     double lkCompData=0.0;
00838     for  (unsigned long idxModelG=0;idxModelG<tabWM.getNbTop();idxModelG++){
00839       if (debug) cout <<"Distrib Model["<<tabWM.getDistrib(idxModelG)<<"] weight["<<tabWM.getWeight(idxModelG)<<" "<<modelArray[idxModelG].weight<<"]"<<endl;
00840       DistribGD &m=*(modelArray[idxModelG].distribP);
00841       double tmp=modelArray[idxModelG].weight*likelihoodGD(d,m);
00842       lkCompData+=tmp; 
00843     }
00844     result+=dataArray[idxDataG].weight*log(lkCompData);
00845   }
00846   return result;
00847 }
00848 double likelihoodGD(const MixtureGD& data,const MixtureGD &model,TabWeight &tabWD){
00849   TabWeight tabWM(model);
00850   return likelihoodGD(data,model,tabWD,tabWM);
00851 }
00852 double likelihoodGD(const MixtureGD& data,const MixtureGD &model){
00853   TabWeight tabWM(model);
00854   TabWeight tabWD(data);
00855   return likelihoodGD(data,model,tabWD,tabWM);
00856 }
00857 #endif //!defined(ALIZE_GeneralTools_cpp)