LIA_RAL: LIA_SpkTools/src/UnsupervisedTools.cpp Source File

Go to the documentation of this file.
00001 /*
00002 This file is part of LIA_RAL which is a set of software based on ALIZE
00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL.
00004 
00005 LIA_RAL project is a development project was initiated by the computer
00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon -
00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it
00008 was supported by two national projects of the French Research Ministry:
00009         - TECHNOLANGUE program [http://www.technolangue.net]
00010         - MISTRAL program [http://mistral.univ-avignon.fr]
00011 
00012 LIA_RAL is free software: you can redistribute it and/or modify
00013 it under the terms of the GNU Lesser General Public License as
00014 published by the Free Software Foundation, either version 3 of
00015 the License, or any later version.
00016 
00017 LIA_RAL is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU Lesser General Public License for more details.
00021 
00022 You should have received a copy of the GNU Lesser General Public
00023 License along with LIA_RAL.
00024 If not, see [http://www.gnu.org/licenses/].
00025 
00026 The LIA team as well as the LIA_RAL project team wants to highlight the
00027 limits of voice authentication in a forensic context.
00028 The "Person Authentification by Voice: A Need of Caution" paper
00029 proposes a good overview of this point (cf. "Person
00030 Authentification by Voice: A Need of Caution", Bonastre J.F.,
00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00032 chagnolleau I., Eurospeech 2003, Genova].
00033 The conclusion of the paper of the paper is proposed bellow:
00034 [Currently, it is not possible to completely determine whether the
00035 similarity between two recordings is due to the speaker or to other
00036 factors, especially when: (a) the speaker does not cooperate, (b) there
00037 is no control over recording equipment, (c) recording conditions are not
00038 known, (d) one does not know whether the voice was disguised and, to a
00039 lesser extent, (e) the linguistic content of the message is not
00040 controlled. Caution and judgment must be exercised when applying speaker
00041 recognition techniques, whether human or automatic, to account for these
00042 uncontrolled factors. Under more constrained or calibrated situations,
00043 or as an aid for investigative purposes, judicious application of these
00044 techniques may be suitable, provided they are not considered as infallible.
00045 At the present time, there is no scientific process that enables one to
00046 uniquely characterize a persones voice or to identify with absolute
00047 certainty an individual from his or her voice.]
00048 
00049 Copyright (C) 2004-2010
00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr]
00051 LIA_RAL admin [alize@univ-avignon.fr]
00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_UnsupervisedTools_cpp)
00056 #define ALIZE_UnsupervisedTools_cpp
00057 
00058 #include <iostream>
00059 #include <fstream>              // pour outFile
00060 #include <cstdio>               // pour printf()
00061 #include <cassert>              // pour le debug pratique
00062 #include <cmath>
00063 #include <liatools.h>
00064 #include <DoubleSquareMatrix.h>
00065 #include <RealVector.h>
00066 #include "FileInfo.h"
00067 #include "UnsupervisedTools.h"
00068 
00069 // WindowLLR class - deals with LLR outputed by set of frames  RECOPIE DE COMPUTETEST.H
00070 class WindowLLR{
00071     bool _set;               // flag, indicates if the windowmode is on
00072     unsigned long _size;     // size of the window, in frames
00073     unsigned long _dec;      // shift of the window, in frames, gives the number of outputs
00074     unsigned long _nClient;  // number of different client, 1 by default;
00075     Matrix <double> *_llrM;  // contains the LLR for the window
00076     DoubleVector *_accLlrA;   // contains the accumulated LLR for the window
00077     ULongVector *_idxA;      // contains the idx of frames in the window
00078     unsigned long _bIdx;     // idx of first frame in the circular window
00079     unsigned long _count;    // nb of saved values in the circular window
00080     void _initMem();         // internal use, init the mem booking for score window
00081     void _freeMem();         // internal use, free the memory for
00082 
00083 public:
00084     WindowLLR(Config &config); 
00085     ~WindowLLR();
00086     bool isSet(){return _set;}  
00087     void setNbClient(unsigned long nClient){_nClient=nClient;_initMem();}
00088     unsigned long getIdxBegin(){return (*_idxA)[_bIdx];}
00089     unsigned long getIdxEnd(){return (*_idxA)[(_bIdx+_count-1)%_size];}
00090     void showConfig();
00091     void accLLR(unsigned long clientIdx,double llr);
00092     double getLLR(unsigned long clientIdx);
00093     bool isEnd();
00094     unsigned long wCount(); // gives the number of data/frame in the window 
00095     void dec(unsigned long idxFrame);
00096 };
00097 // windowLLr functions
00098 void WindowLLR::_initMem(){
00099     _freeMem();
00100     _idxA= new ULongVector(_size,_size);
00101     _accLlrA = new DoubleVector(_nClient,_nClient);
00102     _llrM= new Matrix <double>(_size,_nClient);
00103     for (unsigned long idxC=0;idxC<_nClient;idxC++){
00104         for (unsigned long idxF=0;idxF<_size;idxF++)
00105             (*_llrM)(idxF,idxC)=0;
00106         (*_accLlrA)[idxC]=0;
00107     }
00108    
00109     _bIdx=0;
00110     _count=0;
00111 }
00112 void WindowLLR::_freeMem(){
00113     if (_llrM) {
00114         delete _llrM;
00115         delete _accLlrA;
00116         delete _idxA;
00117     }
00118     _llrM=NULL;
00119 }
00120 WindowLLR::WindowLLR(Config &config){
00121     _set=false;
00122     _size=0;
00123     _dec=0;
00124     _bIdx=0;
00125     _count=0;
00126     _nClient=0;
00127     _llrM=NULL;
00128     if (config.existsParam("windowLLR")) _set=config.getParam("windowLLR").toBool();
00129     if (_set){
00130         if (config.existsParam("windowLLRSize")) _size=config.getParam("windowLLRSize").toLong();
00131         else _size=30;
00132         if (config.existsParam("windowLLRDec")) _dec=config.getParam("windowLLRDec").toLong();
00133         else _dec=_size;        
00134         _nClient=1;
00135         _initMem();
00136     }
00137 }
00138 WindowLLR::~WindowLLR(){
00139     _freeMem();}
00140 void WindowLLR::showConfig(){
00141     if (_set) cout<<"windowLLR mode size["<<_size<<"] dec["<<_dec<<"]"<<endl; 
00142 }
00143 unsigned long WindowLLR::wCount(){
00144     return (_count);
00145 }
00146 void WindowLLR::dec(unsigned long idxFrame){
00147     if (_count<_size){       //window is not full
00148         _count++;
00149         unsigned long eIdx=(_bIdx+_count-1)%_size;
00150         (*_idxA)[eIdx]=idxFrame;
00151     }
00152     else{// window is full, real dec (shift the window, step _dec frame)
00153         for (unsigned long wIdx=0;wIdx<_dec;wIdx++){
00154             //suppress the begin value
00155             for (unsigned long cIdx=0;cIdx<_nClient;cIdx++)
00156                 (*_accLlrA)[cIdx]-=(*_llrM)(_bIdx,cIdx);                    
00157             _bIdx=(_bIdx+1)%_size;
00158         }
00159         _count-=(_dec-1);
00160         (*_idxA)[(_bIdx+_count-1)%_size]=idxFrame;      
00161     }   
00162 }
00163 void WindowLLR::accLLR(unsigned long clientIdx,double llr){
00164     (*_llrM)((_bIdx+_count-1)%_size,clientIdx)=llr;
00165     (*_accLlrA)[clientIdx]+=llr;
00166 }
00167 double WindowLLR::getLLR(unsigned long clientIdx){
00168     return (*_accLlrA)[clientIdx]/(double)_size;
00169 }   
00170 bool WindowLLR::isEnd(){
00171     return (wCount()==_size);
00172 }
00173 
00174 
00175 //-------------------------------------------------------------------------------------------------------
00176 
00177 
00178 //-- Accumulate the occupation for the selected frames and a given model
00179 
00180 void accumulateStatLK(StatServer & ss, FeatureServer & fs, MixtureStat & acc,
00181   unsigned long idxBeginFrame, unsigned long nbFrames, Config & config)
00182 {
00183     fs.seekFeature(idxBeginFrame);      // go to the frame in the buffer (and load it if needed)
00184     for (unsigned long n = 0; n < nbFrames; n++)
00185     {
00186         Feature f;
00187         if (fs.readFeature(f) == false)
00188             cout << "No more features" << endl;
00189         acc.computeAndAccumulateLLK(f);
00190     }
00191 }
00192 
00193 // one a Segment
00194 void
00195 accumulateStatLK(StatServer & ss, FeatureServer & fs, MixtureStat & acc,
00196   Seg * seg, Config & config)
00197 {
00198     unsigned long begin = seg->begin() + fs.getFirstFeatureIndexOfASource(seg->sourceName());   // Find the index of the first frame of the file in the buffer
00199     accumulateStatLK(ss, fs, acc, begin, seg->length(), config);
00200 }
00201 
00202 // One on Cluster
00203 void accumulateStatLK(StatServer & ss, FeatureServer & fs, MixtureStat & acc,
00204   SegCluster & selectedSegments, Config & config)
00205 {
00206     Seg *seg;                   // reset the reader at the begin of the input stream
00207     selectedSegments.rewind();
00208     while ((seg = selectedSegments.getSeg()) != NULL)   // For each of the selected segments
00209         accumulateStatLK(ss, fs, acc, seg, config);
00210 }
00211 
00212 
00213 
00214 
00215 
00216 
00217 // Estimation client model from different Feature server with a weight associated
00218 // Using EM and MAP 
00219 void adaptModel(Config & config, StatServer & ss, MixtureServer & ms,
00220   ObjectRefVector & FeatServ, ObjectRefVector & ClusterSeg,
00221   MixtureGD & aprioriModel, MixtureGD & clientMixture,
00222   DoubleVector & decision)
00223 {
00224     MAPCfg mapCfg(config);
00225     if (verbose)
00226         mapCfg.showConfig(cout);
00227     if (verboseLevel > 1)
00228         cout << "Mean LLK Init = " << meanLikelihood(ss, FeatServ, ClusterSeg, clientMixture, decision, config) << endl;
00229     
00230     for (unsigned long trainIt = 0; trainIt < mapCfg.getNbTrainIt(); trainIt++)
00231     {                           // Begin the initial adaptation loop (with bagged frames)
00232         // Create a statistic accumulator using the curent model
00233         MixtureStat & emAcc = ss.createAndStoreMixtureStat(clientMixture);      
00234         emAcc.resetEM();
00235         double llkPreviousIt = 0;
00236         for (unsigned long nbFs = 0; nbFs < FeatServ.size(); nbFs++)
00237         {
00238 
00239             SegServer segServer;        // Create a local segment server 
00240             // Create the cluster for describing the selected frames
00241             SegCluster & baggedFramesCluster = segServer.createCluster(1, "", "");      
00242             baggedSegments((static_cast < SegCluster & >(ClusterSeg.getObject(nbFs))),baggedFramesCluster, mapCfg.getBaggedFrameProbability());
00243 
00244             if (verboseLevel > 2)
00245                 cout <<"Accumulate statistics on the feature server weighted by : " << decision[nbFs] << endl;
00246             
00247             llkPreviousIt += accumulateStatEM(ss, (static_cast < FeatureServer & >(FeatServ.getObject(nbFs))), emAcc, baggedFramesCluster, decision[nbFs], config);     // Accumulate the EM statistics
00248 
00249         }
00250         
00251         clientMixture = emAcc.getEM();  // Get the EM estimate   
00252         unsigned long frameCount = (unsigned long) emAcc.getEMFeatureCount();
00253         cout << "Total Frames :" << frameCount << endl;
00254         llkPreviousIt = llkPreviousIt / (double) frameCount;
00255         if (verbose)
00256             cout << "ML (partial) estimate it[" << trainIt <<"] (take care, it corresponds to the previous it,0 means init likelihood) = "
00257           << llkPreviousIt << endl;
00258         
00259         computeMAP(ms, aprioriModel, clientMixture, frameCount, config);        // Bayesian Adaptation client=MAP(aprioriModel,client)
00260         if (mapCfg.getNormalizeModel())
00261             normalizeMixture(clientMixture, mapCfg, config);    // Normalize/fit the model if needed
00262         ss.deleteMixtureStat(emAcc);
00263 
00264         if (verboseLevel > 2)
00265             cout << "Likelihood on all frames =" << meanLikelihood(ss, FeatServ,
00266           ClusterSeg, clientMixture, decision, config) << endl;
00267 
00268     }
00269 
00270   if (verboseLevel > 1)
00271       cout << "Final likelihood on all frames =" << meanLikelihood(ss, FeatServ,ClusterSeg, clientMixture, decision, config) << endl;
00272 
00273   if (debug)
00274       cout << "adaptModel nb distrib:" << ms.getDistribCount() << "nb mixt:" << ms.getMixtureCount() << endl;
00275 }
00276 
00277 
00278 
00279 
00280 
00281 
00282 
00283 
00284 
00285 
00286 //------------------------------------------------------------------------
00287 // ** New training algo based on a true EM/ML estimate of the training data before to apply MAP
00288 void modelBasedadaptModelEM(Config & config, StatServer & ss,
00289   MixtureServer & ms, FeatureServer & fs, SegCluster & selectedSegments,
00290   FeatureServer & fsTests, SegCluster & selectedSegmentsTests,
00291   MixtureGD & aprioriModel, MixtureGD & clientMixture, MixtureGD & initModel)
00292 {
00293     MAPCfg mapCfg(config);
00294     if (verbose)
00295     {
00296         cout << "Model adaptation based on true EM/ML estimate of training data"<< endl;
00297 
00298     }
00299     MixtureServer msTmp(config);
00300     MixtureGD & data = msTmp.duplicateMixture(initModel, DUPL_DISTRIB);
00301 
00302     /*unsigned long totalFrameCount1 = totalFrame(selectedSegments);
00303      unsigned long totalFrameCount2 = totalFrame(selectedSegmentsTests);
00304      unsigned long totalFrameCount = totalFrameCount1 + totalFrameCount2; */
00305     
00306     if (verboseLevel > 1)
00307     cout << "Mean LLK Init = " << meanLikelihood(ss, fs, data,selectedSegments, config) << endl;
00308     
00309     for (unsigned long emIt = 0; emIt < mapCfg.getNbEmIt(); emIt++)
00310     {   
00311         // begin the true EM/ML estimate of the adpatation data 
00312             
00313         // Create a statistic accumulator using the curent model
00314         MixtureStat & emAcc = ss.createAndStoreMixtureStat(data);       
00315         SegServer segServer;    // Create a local segment server
00316             
00317         // Create the cluster for describing the selected frames    
00318         SegCluster & baggedFramesCluster = segServer.createCluster(1, "", "");  
00319         baggedSegments(selectedSegments, baggedFramesCluster,
00320         mapCfg.getBaggedFrameProbability());
00321         SegServer segServerTests;       // Create a local segment server
00322             
00323         // Create the cluster for describing the selected frames
00324         SegCluster & baggedFramesClusterTests = segServerTests.createCluster(1, "", "");        
00325         baggedSegments(selectedSegmentsTests, baggedFramesClusterTests,
00326         mapCfg.getBaggedFrameProbability());
00327         emAcc.resetEM();
00328             
00329         // Accumulate the EM statistics on the first feature server
00330         double llkPreviousIt = accumulateStatEM(ss, fs, emAcc, baggedFramesCluster, config);
00331             
00332         // Accumulate the EM statistics on the second feature server
00333         llkPreviousIt += accumulateStatEM(ss, fsTests, emAcc, baggedFramesClusterTests, config);        
00334         data = emAcc.getEM();   // Get the EM estimate         
00335         unsigned long frameCount = (unsigned long) emAcc.getEMFeatureCount();
00336         llkPreviousIt = llkPreviousIt / (double) frameCount;
00337         
00338         if (verbose)
00339             cout << "ML (partial) estimate it[" << emIt <<"] (take care, it corresponds to the previous it,0 means init likelihood) = "
00340           << llkPreviousIt << endl;
00341         
00342         ss.deleteMixtureStat(emAcc);
00343     }
00344     // Begin the estimation of the statistic using the EM/ML model of the adaptation data
00345     unsigned long modelNbComp = aprioriModel.getDistribCount();
00346     
00347     // Begin the estimation of the statistic using the EM/ML model of the adaptation data
00348     
00349     // Complete log likelihood of the adaptation data given the apriori model
00350     unsigned long vectSize = fs.getVectSize();  
00351     for (unsigned long idxModel = 0; idxModel < modelNbComp; idxModel++)
00352     {   
00353         // Initialize the client mixture
00354         DistribGD & c = clientMixture.getDistrib(idxModel);
00355             
00356         for (unsigned long idxC = 0; idxC < vectSize; idxC++)
00357         {
00358             c.setMean(0.00, idxC);
00359             c.setCov(0.00, idxC);
00360         }
00361     }
00362     DoubleVector apProbaTot(modelNbComp, modelNbComp);
00363     apProbaTot.setAllValues(0.0);
00364     for (unsigned long idxData = 0; idxData < data.getDistribCount(); idxData++)
00365     {
00366         if (debug)
00367         cout << "Distrib Data[" << idxData << "]" << endl;
00368         DistribGD & d = data.getDistrib(idxData);
00369         double totLk = 0.0;     // Likelihood of the current data component given the apriori model
00370         DoubleVector apProba(modelNbComp, modelNbComp);
00371         apProba.setAllValues(0.0);
00372         
00373         for (unsigned long idxModel = 0; idxModel < modelNbComp; idxModel++)
00374         {
00375             if (debug)
00376                 cout << "Distrib A Priori model[" << idxModel << "]" << endl;
00377             
00378             DistribGD & m = aprioriModel.getDistrib(idxModel);
00379             apProba[idxModel] = aprioriModel.weight(idxModel) * likelihoodGD(d, m);
00380             totLk += apProba[idxModel];
00381         }
00382         for (unsigned long idxModel = 0; idxModel < modelNbComp; idxModel++)
00383         {
00384             DistribGD & c = clientMixture.getDistrib(idxModel);
00385             apProba[idxModel] /= totLk;
00386                 
00387             for (unsigned long idxC = 0; idxC < vectSize; idxC++)
00388             {
00389                 c.setMean(c.getMean(idxC) +(d.getMean(idxC) * apProba[idxModel] * data.weight(idxData)),
00390                 idxC);
00391                 c.setCov(c.getCov(idxC) +((d.getMean(idxC) * d.getMean(idxC)) * apProba[idxModel] *
00392                   data.weight(idxData)), idxC);
00393             }
00394             
00395             apProbaTot[idxModel] += apProba[idxModel] * data.weight(idxData);
00396         }
00397     }
00398   for (unsigned long idxModel = 0; idxModel < modelNbComp; idxModel++)
00399     {
00400       DistribGD & c = clientMixture.getDistrib(idxModel);
00401       for (unsigned long idxC = 0; idxC < vectSize; idxC++)
00402         {
00403           c.setMean(c.getMean(idxC) / apProbaTot[idxModel], idxC);
00404           //        c.setCov(c.getCov(idxC),idxC);
00405         }
00406     }
00407   for (unsigned long idxModel = 0; idxModel < modelNbComp; idxModel++)
00408     {
00409       DistribGD & c = clientMixture.getDistrib(idxModel);
00410       c.computeAll();
00411 
00412     }
00413   //
00414 
00415 
00416 }
00417 
00418 //-------------------------------------------------------------------------
00419 
00420 
00421 
00422 
00423 void modelBasedadaptModelEM(Config & config, StatServer & ss,
00424   MixtureServer & ms, FeatureServer & fs, SegCluster & selectedSegments,
00425   MixtureGD & aprioriModel, MixtureGD & clientMixture, MixtureGD & initModel)
00426 {
00427   MAPCfg mapCfg(config);
00428   if (verbose)
00429   {
00430       cout << "Model adaptation based on true EM/ML estimate of training data"
00431         << endl;
00432 
00433   }
00434   MixtureServer msTmp(config);
00435   MixtureGD & data = msTmp.duplicateMixture(initModel, DUPL_DISTRIB);
00436 
00437   //  unsigned long totalFrameCount = totalFrame(selectedSegments);
00438   //if (verboseLevel>1) cout << "Mean LLK Init = " << meanLikelihood(ss,fs,data,selectedSegments,config)<< endl;    
00439   for (unsigned long emIt = 0; emIt < mapCfg.getNbEmIt(); emIt++)
00440   {
00441       // begin the true EM/ML estimate of the adpatation data 
00442           
00443       // Create a statistic accumulator using the curent model  
00444       MixtureStat & emAcc = ss.createAndStoreMixtureStat(data); 
00445       SegServer segServer;      // Create a local segment server
00446           
00447       // Create the cluster for describing the selected frames
00448       SegCluster & baggedFramesCluster = segServer.createCluster(1, "", "");
00449           
00450       baggedSegments(selectedSegments, baggedFramesCluster,mapCfg.getBaggedFrameProbability());
00451       emAcc.resetEM();
00452           
00453       // Accumulate the EM statistics
00454       double llkPreviousIt = accumulateStatEM(ss, fs, emAcc, baggedFramesCluster, config);
00455           
00456       data = emAcc.getEM();     // Get the EM estimate         
00457       unsigned long frameCount = (unsigned long) emAcc.getEMFeatureCount();
00458       llkPreviousIt = llkPreviousIt / (double) frameCount;
00459           
00460       if (verbose)
00461           cout << "ML (partial) estimate it[" << emIt << "] (take care, it corresponds to the previous it,0 means init likelihood) = "
00462           << llkPreviousIt << endl;
00463       
00464       ss.deleteMixtureStat(emAcc);
00465     }
00466     unsigned long modelNbComp = aprioriModel.getDistribCount();
00467     // Begin the estimation of the statistic using the EM/ML model of the adaptation data
00468     
00469     // Complete log likelihood of the adaptation data given the apriori model
00470     unsigned long vectSize = fs.getVectSize();  
00471     
00472     for (unsigned long idxModel = 0; idxModel < modelNbComp; idxModel++)
00473     {                           // Initialize the client mixture
00474         DistribGD & c = clientMixture.getDistrib(idxModel);
00475         for (unsigned long idxC = 0; idxC < vectSize; idxC++)
00476         {
00477             c.setMean(0.00, idxC);
00478             c.setCov(0.00, idxC);
00479         }
00480     }
00481     
00482     DoubleVector apProbaTot(modelNbComp, modelNbComp);
00483     apProbaTot.setAllValues(0.0);
00484     for (unsigned long idxData = 0; idxData < data.getDistribCount(); idxData++)
00485     {
00486         if (debug)
00487             cout << "Distrib Data[" << idxData << "]" << endl;
00488         
00489         DistribGD & d = data.getDistrib(idxData);
00490         double totLk = 0.0;     // Likelihood of the current data component given the apriori model
00491         
00492         DoubleVector apProba(modelNbComp, modelNbComp);
00493         apProba.setAllValues(0.0);
00494         for (unsigned long idxModel = 0; idxModel < modelNbComp; idxModel++)
00495         {
00496             if (debug)
00497                 cout << "Distrib A Priori model[" << idxModel << "]" << endl;
00498             DistribGD & m = aprioriModel.getDistrib(idxModel);
00499             apProba[idxModel] = aprioriModel.weight(idxModel) * likelihoodGD(d, m);
00500             totLk += apProba[idxModel];
00501         }
00502       for (unsigned long idxModel = 0; idxModel < modelNbComp; idxModel++)
00503       {
00504           DistribGD & c = clientMixture.getDistrib(idxModel);
00505           apProba[idxModel] /= totLk;
00506                 
00507           for (unsigned long idxC = 0; idxC < vectSize; idxC++)
00508             {
00509                 c.setMean(c.getMean(idxC) +(d.getMean(idxC) * apProba[idxModel] * data.weight(idxData)),idxC);
00510                     
00511                 c.setCov(c.getCov(idxC) + ((d.getMean(idxC) * d.getMean(idxC)) * apProba[idxModel] * data.weight(idxData)), idxC);
00512             }
00513             
00514           apProbaTot[idxModel] += apProba[idxModel] * data.weight(idxData);
00515       }
00516   }
00517   
00518   for (unsigned long idxModel = 0; idxModel < modelNbComp; idxModel++)
00519   {
00520       DistribGD & c = clientMixture.getDistrib(idxModel);
00521           
00522       for (unsigned long idxC = 0; idxC < vectSize; idxC++)
00523         {
00524             c.setMean(c.getMean(idxC) / apProbaTot[idxModel], idxC);
00525             //        c.setCov(c.getCov(idxC),idxC);
00526         }
00527   }
00528   
00529   for (unsigned long idxModel = 0; idxModel < modelNbComp; idxModel++)
00530   {
00531       DistribGD & c = clientMixture.getDistrib(idxModel);
00532       c.computeAll();
00533   }
00534 
00535 
00536 }
00537 
00538 
00539 // adapt model function to compute MAP from a given EM estimate
00540 void adaptModelMAP(Config & config, StatServer & ss, MixtureServer & ms,
00541   MixtureGD & aprioriModel, MixtureGD & clientMixture, MAPCfg & mapCfg,
00542   unsigned long &frameCount)
00543 {
00544   
00545     for (unsigned long trainIt = 0; trainIt < mapCfg.getNbTrainIt(); trainIt++)
00546     {   
00547         // Begin the initial adaptation loop (with bagged frames)
00548             
00549         // Bayesian Adaptation client=MAP(aprioriModel,client)
00550         computeMAP(ms, aprioriModel, clientMixture, frameCount, config);        
00551         
00552         if (mapCfg.getNormalizeModel())
00553             normalizeMixture(clientMixture, mapCfg, config);    // Normalize/fit the model if needed
00554      
00555     }
00556  
00557     if (debug)
00558         cout << "adaptModel nb distrib:" << ms.getDistribCount() << "nb mixt:" << ms.getMixtureCount() << endl;
00559 }
00560 
00561 void adaptModelMAP(Config & config, StatServer & ss, MixtureServer & ms,
00562   MixtureGD & aprioriModel, MixtureGD & clientMixture,
00563   unsigned long &frameCount)
00564 {
00565   MAPCfg mapCfg(config);
00566   adaptModelMAP(config, ss, ms, aprioriModel, clientMixture, mapCfg,frameCount);
00567 }
00568 
00569 
00570 
00571 
00572 // Adapt model function to compute an EM estimate 
00573 
00574 void adaptModelEM(Config & config, StatServer & ss, MixtureServer & ms,
00575   FeatureServer & fs, SegCluster & selectedSegments, MixtureGD & aprioriModel,
00576   MixtureGD & clientMixture, MAPCfg & mapCfg)
00577 {
00578     unsigned long frameCount;   //A.P.
00579     if (verbose)
00580         cout << "Model adaptation based on true EM/ML estimate of training data" << endl;
00581 
00582     if (verboseLevel > 1)
00583     cout << "Mean LLK Init = " << meanLikelihood(ss, fs, clientMixture,
00584       selectedSegments, config) << endl;
00585 
00586     for (unsigned long trainIt = 0; trainIt < mapCfg.getNbTrainIt(); trainIt++)
00587     {   
00588         // Begin the initial adaptation loop (with bagged frames)
00589             
00590         // Create a statistic accumulator using the curent model    
00591         MixtureStat & emAcc = ss.createAndStoreMixtureStat(clientMixture);      
00592         SegServer segServer;    // Create a local segment server 
00593             
00594         // Create the cluster for describing the selected frames    
00595         SegCluster & baggedFramesCluster = segServer.createCluster(1, "", "");  
00596         baggedSegments(selectedSegments, baggedFramesCluster,mapCfg.getBaggedFrameProbability());
00597         emAcc.resetEM();
00598             
00599         // Accumulate the EM statistics
00600         double llkPreviousIt = accumulateStatEM(ss, fs, emAcc, baggedFramesCluster, config);    
00601         clientMixture = emAcc.getEM();
00602             
00603         // Get the EM estimate       
00604         frameCount = (unsigned long) emAcc.getEMFeatureCount();
00605         llkPreviousIt = llkPreviousIt / (double) frameCount;
00606             
00607         if (verbose)
00608             cout << "ML (partial) estimate it[" << trainIt <<"] (take care, it corresponds to the previous it,0 means init likelihood) = "
00609           << llkPreviousIt << endl;
00610         
00611         ss.deleteMixtureStat(emAcc);
00612         
00613         if (verboseLevel > 2)
00614             cout << "Likelihood on all frames= " << meanLikelihood(ss, fs, clientMixture, selectedSegments, config) << endl;
00615 
00616     }
00617     if (verboseLevel == 2)
00618         cout << "Final likelihood on all frames= " << meanLikelihood(ss, fs,clientMixture, selectedSegments, config) << endl;
00619 
00620 }
00621 
00622 void adaptModelEM(Config & config, StatServer & ss, MixtureServer & ms,
00623   FeatureServer & fs, SegCluster & selectedSegments, MixtureGD & aprioriModel,
00624   MixtureGD & clientMixture)
00625 {
00626     MAPCfg mapCfg(config);
00627     adaptModelEM(config, ss, ms, fs, selectedSegments, aprioriModel,clientMixture, mapCfg);
00628 }
00629 
00630 // Adapt model function to compute an EM estimate from 2 feature server
00631 
00632 void adaptModelEM(Config & config, StatServer & ss, MixtureServer & ms,
00633   FeatureServer & fs, SegCluster & selectedSegments, FeatureServer & fs2,
00634   SegCluster & selectedSegments2, MixtureGD & aprioriModel,
00635   MixtureGD & clientMixture, MAPCfg & mapCfg)
00636 {
00637     unsigned long frameCount;   //A.P.
00638     if (verbose)
00639         cout << "Model adaptation based on true EM/ML estimate of training data" << endl;
00640     if (verboseLevel > 1)
00641         cout << "Mean LLK Init = " << meanLikelihood(ss, fs, clientMixture,selectedSegments, config) << endl;
00642 
00643     for (unsigned long trainIt = 0; trainIt < mapCfg.getNbTrainIt(); trainIt++)
00644     {   
00645         // Begin the initial adaptation loop (with bagged frames)
00646         double llkPreviousIt = 0;
00647             
00648         // Create a statistic accumulator using the curent model
00649         MixtureStat & emAcc = ss.createAndStoreMixtureStat(clientMixture);
00650         SegServer segServer;    // Create a local segment server 
00651         SegServer segServer2;   // Create a local segment server 
00652             
00653         // Create the cluster for describing the selected frames
00654         SegCluster & baggedFramesCluster = segServer.createCluster(1, "", "");  
00655             
00656         // Create the cluster for describing the selected frames
00657         SegCluster & baggedFramesCluster2 = segServer2.createCluster(1, "", "");
00658             
00659         baggedSegments(selectedSegments, baggedFramesCluster,mapCfg.getBaggedFrameProbability());
00660         baggedSegments(selectedSegments2, baggedFramesCluster2, mapCfg.getBaggedFrameProbability());
00661             
00662         emAcc.resetEM();
00663         // Accumulate the EM statistics on the 1st FS
00664         llkPreviousIt += accumulateStatEM(ss, fs, emAcc, baggedFramesCluster, config);  
00665             
00666         // Accumulate the EM statistics on the 2nd FS
00667         llkPreviousIt += accumulateStatEM(ss, fs2, emAcc, baggedFramesCluster2, config);
00668         
00669         // Get the EM estimate 
00670         clientMixture = emAcc.getEM();
00671              
00672         frameCount = (unsigned long) emAcc.getEMFeatureCount();
00673 
00674         llkPreviousIt = llkPreviousIt / (double) frameCount;
00675         if (verbose)
00676             cout << "ML (partial) estimate it[" << trainIt <<"] (take care, it corresponds to the previous it,0 means init likelihood) = "
00677           << llkPreviousIt << endl;
00678         
00679         ss.deleteMixtureStat(emAcc);
00680         
00681         if (verboseLevel > 2)
00682             cout << "Likelihood on all frames= " << meanLikelihood(ss, fs, clientMixture, selectedSegments, config) << endl;
00683 
00684     }
00685     if (verboseLevel == 2)
00686         cout << "Final likelihood on all frames= " << meanLikelihood(ss, fs,
00687       clientMixture, selectedSegments, config) << endl;
00688 
00689 }
00690 
00691 
00692 void adaptModelEM(Config & config, StatServer & ss, MixtureServer & ms,
00693   FeatureServer & fs, SegCluster & selectedSegments, FeatureServer & fs2,
00694   SegCluster & selectedSegments2, MixtureGD & aprioriModel,
00695   MixtureGD & clientMixture)
00696 {
00697     MAPCfg mapCfg(config);
00698     adaptModelEM(config, ss, ms, fs, selectedSegments, fs2, selectedSegments2,
00699     aprioriModel, clientMixture, mapCfg);
00700 }
00701 
00702 
00703 
00704 
00705 
00706 
00707 
00708 double computeLLR(StatServer & ss, FeatureServer & fsTests, MixtureGD & world,
00709   MixtureGD & clientMixture, SegCluster & selectedSegmentsTests)
00710 {
00711     ss.resetLLK(world);         // Reset the world LLK accumulator
00712     ss.resetLLK(clientMixture); // ss.resetLLK(tabClientLine.getClientModel(i));                                   // Reset client LLK accumulator
00713     Seg *seg;                   // reset the reader at the begin of the input stream
00714     selectedSegmentsTests.rewind();
00715         
00716     while ((seg = selectedSegmentsTests.getSeg()) != NULL)
00717     {   
00718         // For each of the selected segments
00719         unsigned long idxBeginFrame = seg->begin() + fsTests.getFirstFeatureIndexOfASource(seg->sourceName());
00720         fsTests.seekFeature(idxBeginFrame);
00721         Feature f;
00722             
00723         for (unsigned long idxFrame = 0; idxFrame < seg->length(); idxFrame++)
00724         {                       // For each frame of the segment
00725             fsTests.readFeature(f);
00726             // Determine the top components and compute wrld LLK
00727             ss.computeAndAccumulateLLK(world, f, DETERMINE_TOP_DISTRIBS);       
00728             ss.computeAndAccumulateLLK(clientMixture, f, USE_TOP_DISTRIBS);
00729         }
00730     }
00731 
00732     double LLKWorld  =  ss.getMeanLLK(world);   // Take the world LLK
00733     double LLKClient =  ss.getMeanLLK(clientMixture);   // Get the mean LLK 
00734     double LLRClient =  LLKClient - LLKWorld;   // Compute the LLR
00735     
00736     if ((verbose) && (verboseLevel == 2))
00737     {
00738         cout << "LLKWorld => " << LLKWorld << endl;
00739         cout << "LLKClient => " << LLKClient << endl;
00740         cout << "LLR => " << LLRClient << endl;
00741     }
00742 
00743     return LLRClient;
00744 }
00745 
00746 
00747 
00748 double computeLLR(Config & config, StatServer & ss, FeatureServer & fsTests,
00749   MixtureGD & world, MixtureGD & clientMixture,
00750   SegCluster & selectedSegmentsTests, String & idTest)
00751 {
00752   cout << "compute LLR" << endl;
00753   FileInfo FI(idTest);          //create file to write top components info
00754   ss.resetLLK(world);           // Reset the world LLK accumulator
00755   ss.resetLLK(clientMixture);   // ss.resetLLK(tabClientLine.getClientModel(i));                                   
00756   Seg *seg;                     // reset the reader at the begin of the input stream
00757   selectedSegmentsTests.rewind();
00758   RefVector <DoubleVector> stockLKVect(fsTests.getFeatureCount());
00759   while ((seg = selectedSegmentsTests.getSeg()) != NULL)
00760   {   
00761       // For each of the selected segments
00762       unsigned long idxBeginFrame =seg->begin() +fsTests.getFirstFeatureIndexOfASource(seg->sourceName());
00763       fsTests.seekFeature(idxBeginFrame);
00764       Feature f;
00765       for (unsigned long idxFrame = 0; idxFrame < seg->length(); idxFrame++)
00766       {                 // For each frame of the segment
00767           fsTests.readFeature(f);
00768           // Determine the top components and compute wrld LLK
00769           ss.computeAndAccumulateLLK(world, f, DETERMINE_TOP_DISTRIBS); 
00770           
00771           //STOCK the vector with the top selected component for reuse 
00772           const LKVector & lkv = ss.getTopDistribIndexVector();
00773               
00774           //Stock all LKVectors for writing once in the file    
00775           RealVector <real_t> & tmp= *new DoubleVector(config.getParam("topDistribsCount").toLong()+2,config.getParam("topDistribsCount").toLong()+2);
00776           
00777           for (unsigned long i=0;i<(unsigned long)config.getParam("topDistribsCount").toLong();i++){
00778               tmp[i]=lkv.getArray()[i].idx;
00779           }
00780           
00781           tmp[config.getParam("topDistribsCount").toLong()]=(lkv.sumNonTopDistribLK);
00782           tmp[config.getParam("topDistribsCount").toLong()+1]=(lkv.sumNonTopDistribWeights);
00783           stockLKVect.addObject(tmp);
00784           ss.computeAndAccumulateLLK(clientMixture, f, USE_TOP_DISTRIBS);
00785      }
00786   }
00787   for (unsigned long i=0;i<stockLKVect.size();i++){                     //write top info 
00788       FI.writeTopInfo(stockLKVect.getObject(i), config);
00789   }
00790   
00791   FI.close();
00792   stockLKVect.deleteAllObjects();
00793   
00794   double LLKWorld  =  ss.getMeanLLK(world);     // Take the world LLK
00795   double LLKClient =  ss.getMeanLLK(clientMixture);     // Get the mean LLK 
00796   double LLRClient =  LLKClient - LLKWorld;     // Compute the LLR
00797   
00798   if ((verbose) && (verboseLevel == 2))
00799   {
00800       cout << "LLKWorld => " << LLKWorld << endl;
00801       cout << "LLKClient => " << LLKClient << endl;
00802       cout << "LLR => " << LLRClient << endl;
00803   }
00804 
00805   return LLRClient;
00806 }
00807 
00808 
00809 
00810 
00811 
00812 double computeFastLLR(StatServer & ss, FeatureServer & fsTests,
00813   MixtureGD & world, MixtureGD & clientMixture,
00814   SegCluster & selectedSegmentsTests, String & idTest, Config & config)
00815 {
00816   if (debug) cout << "FAST LLR COMPUTATION" << endl;
00817   FileInfo FI(idTest);          //load file to read top components info
00818   ss.resetLLK(world);           // Reset the world LLK accumulator
00819   ss.resetLLK(clientMixture);   // ss.resetLLK(tabClientLine.getClientModel(i));                                   // Reset client LLK accumulator
00820   Seg *seg;                     // reset the reader at the begin of the input stream
00821   selectedSegmentsTests.rewind();
00822   unsigned long id = 0;         //id of the frame
00823   while ((seg = selectedSegmentsTests.getSeg()) != NULL)        // For each of the selected segments
00824     {
00825       unsigned long idxBeginFrame =seg->begin() +fsTests.getFirstFeatureIndexOfASource(seg->sourceName());
00826       fsTests.seekFeature(idxBeginFrame);
00827       Feature f;
00828       for (unsigned long idxFrame = 0; idxFrame < seg->length(); idxFrame++)    // For each frame of the segment
00829         {
00830           fsTests.readFeature(f);
00831           FI.loadTopInfo(ss, id, config);
00832           ss.computeAndAccumulateLLK(world, f, USE_TOP_DISTRIBS);       // uses the top components and compute wrld LLK
00833           id++;                 
00834           ss.computeAndAccumulateLLK(clientMixture, f, USE_TOP_DISTRIBS);
00835         }
00836     }
00837   FI.close();
00838   double LLKWorld = ss.getMeanLLK(world);       // Take the world LLK
00839   double LLKClient = ss.getMeanLLK(clientMixture);      // Get the mean LLK 
00840   double LLRClient = LLKClient - LLKWorld;      // Compute the LLR
00841   if ((verbose) && (verboseLevel == 2))
00842     {
00843       cout << "LLKWorld => " << LLKWorld << endl;
00844       cout << "LLKClient => " << LLKClient << endl;
00845       cout << "LLR => " << LLRClient << endl;
00846     }
00847 
00848   return LLRClient;
00849 }
00850 
00851 
00852 double computeLLRGD(Config & config, MixtureGD & clientMixture,
00853   MixtureGD & world, MixtureGD & dataTest)
00854 {
00855   unsigned long topDistribs = 128;      //config.getParam ("topDistribsCount");
00856   TabWeight TabWeightData(dataTest, (unsigned long) 512);
00857   TabWeight TabWeightClient(clientMixture, topDistribs);
00858         
00859   double LLKClient = likelihoodGD(dataTest, clientMixture, TabWeightData, TabWeightClient);
00860   double LLKWorld  = likelihoodGD(dataTest, world, TabWeightClient, TabWeightClient);   //LLR between models 
00861   double LLRClient = LLKClient - LLKWorld;      // Compute the LLR
00862   if ((verbose) && (verboseLevel == 2))
00863   {
00864       cout << "LLKWorld => " << LLKWorld << endl;
00865       cout << "LLKClient => " << LLKClient << endl;
00866       cout << "LLR => " << LLRClient << endl;
00867   }
00868 
00869   return LLRClient;
00870 }
00871 
00872 
00873 
00874 void expandLLR(DoubleVector & decision, Config & configTest)
00875 {
00876   cout <<
00877     " Choice of logistic regression to compute Feature Server Probabilities"
00878     << endl;
00879   double THETA = configTest.getParam("THETA").toDouble();
00880   double BETA = configTest.getParam("BETA").toDouble();
00881   for (unsigned long e = 0; e < decision.size(); e++)
00882   {
00883       cout << " LLR num[" << e << "] = " << decision[e] << endl;
00884       decision[e] =
00885         exp(THETA + BETA * decision[e]) / (1 + exp(THETA +
00886           BETA * decision[e]));
00887       cout << " After logistic regression [" << e << "] = " << decision[e] <<
00888         endl;
00889     }
00890 
00891 }
00892 
00893 
00894 
00895 
00896 
00897 
00898 
00899 
00900 
00901 void WMAP(DoubleVector & decision, Config & configTest)
00902 {
00903   cout << " Choice of WMAP to compute Feature Server Probabilities" << endl;
00904   double den = 0.0, num = 0.0;
00905   double pi = 3.14159;
00906   //From TARGET and IMPOSTOR score distributions
00907 
00908   double SIGMAclient = configTest.getParam("SIGMAtarget").toDouble();
00909   double SIGMAimp    = configTest.getParam("SIGMAimp").toDouble();
00910   double MUclient    = configTest.getParam("MUtarget").toDouble();
00911   double MUimp       = configTest.getParam("MUimp").toDouble();
00912   double poidsImp    = configTest.getParam("IMPweight").toDouble();
00913   double poidsTar    = configTest.getParam("TARweight").toDouble();
00914   double seuilMin    = configTest.getParam("thrMin").toDouble();
00915   double seuilMax    = configTest.getParam("thrMax").toDouble();
00916         
00917   for (unsigned long e = 0; e < decision.size(); e++)
00918   {
00919       if (e == 0)
00920         decision[e] = 1;        //Fixed proba for the train data = 1
00921       else
00922       {
00923           cout << " LLR num[" << e << "] = " << decision[e] << endl;
00924           //calcul de la proba de llr de x sachant que l'acc�s est client. P(client)=0.1 et calcul de la proba de llr de x sachant que l'acc�s est imposteur. P(imp)=0.9
00925           if (decision[e] < seuilMin)
00926               decision[e] = poidsTar;   //set a priori proba for LLR < -0.5 PB to Fix.
00927           
00928           else if (decision[e] > seuilMax)
00929               decision[e] = 1;  //For train data pb to Fix 
00930           
00931           else
00932           {
00933               num =
00934                 (1 / (SIGMAclient * sqrt(2 * pi))) * exp(-0.5 *
00935                 ((decision[e] - MUclient) / SIGMAclient) * ((decision[e] -
00936                     MUclient) / SIGMAclient)) * poidsTar;
00937               num *= 100000;    //for precision
00938               den =
00939                 (1 / (SIGMAimp * sqrt(2 * pi))) * exp(-0.5 * ((decision[e] -
00940                     MUimp) / SIGMAimp) * ((decision[e] -
00941                     MUimp) / SIGMAimp)) * poidsImp;
00942               den *= 100000;    //for precision
00943               num = 1 + floor(num);
00944               den = 1 + floor(den);
00945               decision[e] = num / (num + den);
00946           }
00947           cout << " After WMAP [" << e << "] = " << decision[e] << endl;
00948       }
00949   }
00950 }
00951 
00952 String getFullFileName(String & id, Config & c)
00953 {
00954   String ext = c.getParam("InfoExtension");
00955   String path = c.getParam("InfoPath");
00956   String fullFileName = path + id + ext;
00957   return fullFileName;
00958 }
00959 String getFullMixtureName(String & id, Config & c)
00960 {
00961   String ext = c.getParam("loadMixtureFileExtension");
00962   String path = c.getParam("mixtureFilesPath");
00963   String fullFileName = path + id + ext;
00964   return fullFileName;
00965 }
00966 
00967 
00968 bool FileExists(String & fullFileName)
00969 {
00970   bool result;
00971   ifstream outFile(fullFileName.c_str(), ios::binary);
00972   if (outFile.is_open())
00973     result= true;
00974   else
00975     result= false;
00976   outFile.close();
00977   return result;
00978 
00979 }
00980 
00981 
00982 
00983 // WMAPGMM with Fixed priors by config : TARweight and IMPweight
00984 
00985 void WMAPGMMFixedPriors(DoubleVector & decision, Config & configTest,
00986   MixtureGD & tar, MixtureGD & non, StatServer & ss)
00987 {
00988   cout << " Choice of WMAP GMM to compute Feature Server Probabilities" <<
00989     endl;
00990   double poidsImp = configTest.getParam("IMPweight").toDouble();
00991   double poidsTar = configTest.getParam("TARweight").toDouble();
00992   Feature f(1);
00993   double llkTar = 0.0;
00994   double llkNon = 0.0;
00995   for (unsigned long e = 0; e < decision.size(); e++)
00996   {
00997      /* if (e == 0)
00998         decision[0] = 1;        //the first LLR is the train data on the target model, we give WMAP=1. 
00999       else
01000         {*/
01001       f[0] = decision[e];       // to compute the LK between the score and the GMM learnt on scores we set a feature = value of the score. 
01002       if (e == 0)
01003           decision[0] = 1;      //train data : weight=1  
01004       else
01005       {
01006           llkTar = (ss.computeLLK(tar, f));
01007           llkNon = (ss.computeLLK(non, f));
01008           if (llkNon < configTest.getParam("LLKthreshold").toLong())
01009           {
01010               if (debug)
01011                   cout << "Flooring LLK at " << configTest.
01012               getParam("LLKthreshold").toLong() << endl;
01013               llkNon = configTest.getParam("LLKthreshold").toLong();
01014           }
01015           if (llkTar < configTest.getParam("LLKthreshold").toLong())
01016           {
01017               if (debug)
01018                   cout << "Flooring LLK at " << configTest.getParam("LLKthreshold").toLong() << endl;
01019                   llkTar = configTest.getParam("LLKthreshold").toLong();
01020           }
01021           
01022           llkTar = exp(llkTar);
01023           llkNon = exp(llkNon);
01024           decision[e] =llkTar * poidsTar / (llkTar * poidsTar + llkNon * poidsImp);
01025 
01026        }
01027        if (verbose && verboseLevel >0)
01028            cout << "LLR : " << f[0] << " , After WMAP GMM [" << e << "] = "<< decision[e] << endl;
01029 
01030 
01031 
01032   }
01033 }
01034 
01035 
01036 //WMAP GMM with adaptative priors
01037 
01038 void WMAPGMM(DoubleVector & decision, Config & configTest, MixtureGD & tar,
01039   MixtureGD & non, StatServer & ss)
01040 {
01041   cout << " Choice of WMAP GMM to compute Feature Server Probabilities" <<endl;
01042 
01043   DoubleVector priorImp(decision.size(), decision.size());
01044   DoubleVector priorTar(decision.size(), decision.size());
01045   computePriors(decision, priorImp, priorTar, configTest);
01046 
01047   Feature f(1);
01048   double llkTar = 0.0;
01049   double llkNon = 0.0;
01050   for (unsigned long e = 0; e < decision.size(); e++)
01051   {
01052       /*if(e==0) decision[0]=1;               //the first LLR is the train data on the target model, we give WMAP=1. 
01053          else{        */
01054       f[0] = decision[e];       // to compute the LK between the score and the GMM learnt on scores we set a feature = value of the score. 
01055           
01056       llkTar = (ss.computeLLK(tar, f));
01057       llkNon = (ss.computeLLK(non, f));
01058           
01059       if (llkNon < configTest.getParam("LLKthreshold").toLong())
01060       {
01061           if (debug)
01062               cout << "Flooring LLK at " << configTest.getParam("LLKthreshold").
01063               toLong() << endl;
01064           llkNon = configTest.getParam("LLKthreshold").toLong();
01065       }
01066       if (llkTar < configTest.getParam("LLKthreshold").toLong())
01067       {
01068           if (debug)
01069               cout << "Flooring LLK at " << configTest.getParam("LLKthreshold").
01070               toLong() << endl;
01071           llkTar = configTest.getParam("LLKthreshold").toLong();
01072       }
01073       
01074       llkTar = exp(llkTar);
01075       llkNon = exp(llkNon);
01076       
01077       decision[e] = llkTar * priorTar[e] / (llkTar * priorTar[e] + llkNon * priorImp[e]);
01078 
01079       if (verbose && verboseLevel >0)
01080         cout << "LLR : " << f[0] << " , After WMAP GMM [" << e << "] = " <<
01081           decision[e] << endl;
01082           //}
01083 
01084   }
01085 }
01086 
01087 
01088 
01089 void computePriors(DoubleVector & decision, DoubleVector & priorImp,
01090   DoubleVector & priorTar, Config & configTest)
01091 {
01092   //the prior computation include the current trial
01093   double initPriorTar = configTest.getParam("initPriorTar").toDouble();
01094   double initPriorImp = configTest.getParam("initPriorImp").toDouble();
01095   priorTar.setAllValues(initPriorTar);
01096   priorImp.setAllValues(initPriorImp);
01097   double optiScore = configTest.getParam("OptimalScore").toDouble();
01098   for (unsigned long e = 1; e < decision.size(); e++)
01099     {
01100       if (decision[e] > optiScore)
01101         {                       //target trial
01102           initPriorTar = initPriorTar + 1;
01103           //cout <<"prioTAR "<< initPriorTar<<endl;
01104         }
01105       else
01106         {                       //Imp trial
01107           initPriorImp = initPriorImp + 1;
01108           // cout <<"prioIMP "<< initPriorImp<<endl;
01109         }
01110 
01111       priorTar[e] = initPriorTar / (initPriorTar + initPriorImp);
01112       priorImp[e] = 1 - priorTar[e];
01113       if (verbose && verboseLevel > 2)
01114         cout << "Priors after test : " << e << ", target = " << priorTar[e] <<
01115           " impostors = " << priorImp[e] << endl;
01116 
01117 
01118     }
01119 
01120 
01121 
01122 }
01123 
01124 
01125 
01126 
01127 
01128 //fusing EM models and do a MAP  : for faster execution
01129 
01130 void computeMAPmodelFromEMones(Config & config, StatServer & ss,
01131   MixtureServer & ms, DoubleVector & nbFramesSelected,
01132   MixtureGD & aprioriModel, MixtureGD & clientMixture, MixtureGD & aux,
01133   MixtureGD & tmp, DoubleVector & decision, XLine & testsToCompute)
01134 {
01135   if (verbose && verboseLevel > 1)
01136     cout << "Fusing..." << endl;
01137   int i = 0;
01138   int nbModels = decision.size();       //number of EM models
01139   int index = ms.getMixtureIndex(testsToCompute.getElement(0)); //get the first model , its name is stocked in the XLine
01140   if (index == -1) aux =ms.loadMixtureGD(testsToCompute.getElement(0));
01141   else aux = ms.getMixtureGD(ms.getMixtureIndex(testsToCompute.getElement(0)));   //IF STOCKED (ALREADY COMPUTED) LOAD MODEL
01142 
01143   unsigned long auxNbFrame = (unsigned long) (nbFramesSelected[0] * decision[0]);       //the elements in the ObjectRefvector follow the XLine order 
01144   //i.e. the objectRefvector and the XLine are reseted after each client                  
01145 
01146   for (i = 0; i < nbModels - 1; i++)
01147   {
01148       if (debug)
01149       {
01150           cout << "Fusing models : " << testsToCompute. getElement(i)<<"  NbFrames = " <<auxNbFrame << " with " << testsToCompute.getElement(i +
01151           1) <<" NbFrames = "<< (unsigned long) (nbFramesSelected[i + 1] * decision[i + 1])<< endl;
01152              
01153       }
01154      
01155       index=ms.getMixtureIndex(testsToCompute.getElement(i + 1));
01156       if(index==-1) fuseModels(aux, auxNbFrame,ms.loadMixtureGD(testsToCompute.getElement(i + 1)),(unsigned long) (nbFramesSelected[i + 1] * decision[i + 1]), tmp);
01157       else fuseModels(aux, auxNbFrame,ms.getMixtureGD(ms.getMixtureIndex(testsToCompute.getElement(i + 1))),(unsigned long) (nbFramesSelected[i + 1] * decision[i + 1]), tmp);
01158       aux = tmp;
01159       auxNbFrame +=(unsigned long) (nbFramesSelected[i + 1] * decision[i + 1]);
01160       
01161       /*CA PLANTE, PKOI???
01162       ms.deleteMixtures(ms.getMixtureIndex(testsToCompute.getElement(i + 1)),ms.getMixtureIndex(testsToCompute.getElement(i + 1)));
01163       ms.deleteUnusedDistribs();*/
01164       
01165     }
01166     
01167    /* double regFactorAdapted=(auxNbFrame*config.getParam("MAPRegFactorMean").toDouble())/(nbFramesSelected[0] * decision[0]);
01168     if (verbose && verboseLevel >1) cout <<"New REG Factor = "<<regFactorAdapted<<endl;
01169     Config configMAP(config);
01170     char  regValue[256];
01171     sprintf(regValue,"%f",regFactorAdapted);
01172     configMAP.setParam("MAPRegFactorMean",regValue);
01173     adaptModelMAP(configMAP, ss, ms, aprioriModel, tmp, auxNbFrame);*/
01174     
01175     
01176     adaptModelMAP(config, ss, ms, aprioriModel, aux, auxNbFrame);
01177     clientMixture = aux;
01178 
01179 }
01180 
01181 
01182 //retrun the nb of frames in a cluster
01183 double SegClusterFrame(SegCluster & SegC)
01184 {
01185   double total = 0;
01186   SegC.rewind();
01187   Seg *p;
01188   while ((p = SegC.getSeg()) != NULL)
01189     total += p->length();
01190   return total;
01191 }
01192 
01193 
01194 
01195 
01196 class Norm:public Object
01197 {
01198   public:String idTest;
01199   double mu;
01200   double sigma;
01201   String getClassName() const
01202   {
01203     return "Norm";
01204   };
01205 
01206 };
01207 
01208 //load impostors scores for TNORM computation from imp_seg.res file
01209 //Must concatenate imp_seg.res and imp_imp.res if you want to do ZTNORM.
01210 
01211 void loadTnormParam(String & inputTestListFileName, String & testFileTnorm,
01212   ObjectRefVector & stockTnorm, Config & config)
01213 {
01214 
01215   int fieldId = 3;
01216   int fieldScore = 4;
01217 
01218   double nbTests = 0;
01219   //load imp_seg_male.res
01220 
01221 
01222   String idTest;
01223 
01224   XList testsTnorm(testFileTnorm, config);      // read the Id for each test
01225   XList clientsTnorm(inputTestListFileName, config);    //NDX target_seg
01226   XLine *line, *linep;
01227 
01228   while ((line = clientsTnorm.getLine()))
01229     {
01230       idTest = line->getElement(0);     //id client
01231       //norm tnorm=new norm();
01232 
01233       double accumScore = 0.0;
01234       double accumScore_2 = 0.0;
01235       double add = 0.0;
01236       while ((linep = testsTnorm.getLine()))
01237         {
01238           if (linep->getElement(fieldId) == idTest)
01239             {
01240               add = (linep->getElement(fieldScore)).toDouble();
01241               accumScore = accumScore + add;
01242               accumScore_2 = accumScore_2 + (add * add);
01243               nbTests++;
01244             }
01245 
01246         }
01247       Norm & tnorm = *new Norm();       //don't forget to delete! use refVector.deleteAllObjects()
01248       testsTnorm.rewind();
01249       tnorm.idTest = (idTest);
01250       tnorm.mu = ((accumScore / nbTests));
01251       tnorm.sigma = sqrt((accumScore_2 / (nbTests) - (tnorm.mu * tnorm.mu)));
01252       stockTnorm.addObject(tnorm);
01253       if (debug)
01254         cout << "Test : " << tnorm.
01255           idTest << " , TNORM Param : mu = " << tnorm.
01256           mu << " sigma = " << tnorm.sigma << endl;
01257       nbTests = 0;
01258 
01259     }
01260 }
01261 
01262 
01263 //Do normalization on a score, look for test name in refVector to load mu and std variables.
01264 void normalizeScore(String & test, double &decision, ObjectRefVector & stockNorm)
01265 {
01266 
01267   if (verbose && verboseLevel > 2)
01268     cout << " Applying NORM, score =  " << decision << endl;
01269 
01270   for (unsigned long i = 0; i < stockNorm.size(); i++)
01271     {
01272       if (static_cast < Norm & >(stockNorm.getObject(i)).idTest == test)
01273         decision =
01274           (decision - static_cast <
01275           Norm & >(stockNorm.getObject(i)).mu) / static_cast <
01276           Norm & >(stockNorm.getObject(i)).sigma;
01277     }
01278   if (verbose && verboseLevel > 1)
01279     cout << " Normed score =  " << decision << endl;
01280 
01281 }
01282 
01283 //Do normalization on a score, look for test name in refVector to load mu and std variables.
01284 void normalizeScore(String & test, double &decision, ObjectRefVector & stockNorm,double &shift)
01285 {
01286 
01287   if (verbose && verboseLevel > 2)
01288     cout << " Applying NORM, score =  " << decision << endl;
01289 
01290   for (unsigned long i = 0; i < stockNorm.size(); i++)
01291     {
01292       if (static_cast < Norm & >(stockNorm.getObject(i)).idTest == test)
01293         decision =
01294           (decision - (static_cast < Norm & >(stockNorm.getObject(i)).mu + shift)) / static_cast <
01295           Norm & >(stockNorm.getObject(i)).sigma;
01296     }
01297   if (verbose && verboseLevel > 1)
01298     cout << " Normed score =  " << decision << endl;
01299 
01300 }
01301 
01302 
01303 void computeAndStoreZnormParam(StatServer &ss, String & inputImpListFileName, String &idclient, MixtureGD &clientMixture,
01304   ObjectRefVector & stockZnorm, MixtureGD &world, Config & config, bool &ztnorm, ObjectRefVector & stockTnorm ){
01305           
01306     // read the Id for each impostor test , format : one column with impostor tests names 
01307     XList testsZnorm(inputImpListFileName, config);
01308     XLine *line;
01309     String fullFileName;
01310     String *idImp = NULL;         
01311     double LLRRatio = 0.0, nbTests = 0.0, accumScore = 0.0 , accumScore_2 = 0.0;
01312           
01313           
01314     //Loop on each impostor tests
01315     while ((line = testsZnorm.getLine()))
01316     {
01317         line->getElement();   //impostor name LOST because not used
01318             
01319         idImp = line->getElement();
01320         fullFileName = getFullFileName(*idImp, config);
01321         
01322         if (verbose && verboseLevel > 2)
01323             cout << " IMPOSTOR SEGMENT TESTED   [" << *idImp << "]" << endl;
01324         String labelSelectedFrames =  config.getParam("labelSelectedFrames");
01325         
01326         //IMPOSTOR DATA STUFF
01327         FeatureServer fs(config, *idImp);
01328         
01329         // Create the segment server for managing the segments/clusters
01330         SegServer segmentsServer;
01331         
01332         // Create the lable server, for indexing the segments/clusters  
01333         LabelServer labelServer;
01334         
01335         // Reading the segmentation files for each feature input file
01336         initializeClusters(*line, segmentsServer, labelServer, config);
01337         
01338         // Verify if the segments ending before the end of the feature files...
01339         verifyClusterFile(segmentsServer, fs, config);
01340         
01341         // Get the index of the cluster with in interest audio segments
01342         long codeSelectedFrame = labelServer.getLabelIndexByString(labelSelectedFrames);
01343         
01344         if (codeSelectedFrame == -1)
01345         {               // No data for this model !!!!!!!!!!!!!!
01346             cout << " WARNING - NO DATA FOR [" << *idImp<< "]";
01347         }
01348         else
01349         { 
01350             SegCluster& selectedSegments = segmentsServer.getCluster(codeSelectedFrame);
01351             
01352             //TEST IF TOP TEN INFO FILE ARE ALREADY COMPUTED AND STORED ON THE HARD DRIVE       
01353             if (FileExists(fullFileName)  ==  false  )
01354             {   
01355                 LLRRatio  =     computeLLR(config, ss, fs, world, clientMixture, selectedSegments, fullFileName);    
01356             }
01357                 
01358             else
01359             {
01360                 LLRRatio  =     computeFastLLR(ss, fs, world, clientMixture, selectedSegments, fullFileName, config);    
01361             } 
01362             
01363             //IF ZTNORM is used scores must be tnormed before ZNORM computation
01364             if(ztnorm)
01365             {   
01366                 if (verbose && verboseLevel > 1) 
01367                     cout <<" ZTNorm activated : T-norm score before Z-norm"<<endl;
01368                 normalizeScore(*idImp, LLRRatio, stockTnorm);
01369                 
01370             }
01371                     
01372             accumScore = accumScore + LLRRatio;
01373             accumScore_2 = accumScore_2 + (LLRRatio * LLRRatio);
01374             nbTests++;
01375         }
01376         
01377     }
01378     Norm & znorm = *new Norm(); //don't forget to delete! use refVector.deleteAllObjects()
01379     testsZnorm.rewind();
01380     znorm.idTest = idclient;
01381     znorm.mu = ((accumScore / nbTests));
01382     znorm.sigma = sqrt((accumScore_2 / (nbTests) - (znorm.mu * znorm.mu)));
01383     stockZnorm.addObject(znorm);
01384     if (verbose && verboseLevel > 1)
01385         cout << "Test : " << znorm.idTest << " , TNORM Param : mu = " << znorm.mu << " sigma = " << znorm.sigma << endl;
01386         
01387 }
01388 
01389 //Reset the vector which contains LLR or WMAP weights ( do not reset the first value : LLR(train model/train data).
01390 void resetWeights(DoubleVector & decision)
01391 {
01392 
01393   //set all weights to 0 : TEST should be equal to baseline
01394   cout << "Reset all WMAP weights" << endl;
01395   for (unsigned long e = 1; e < decision.size(); e++)
01396     {
01397       decision[e] = 0;          //FOR DEBUGGING
01398     }
01399 }
01400 
01401  //look for a true target trials in the targetTests file, if ok set score to 1 or WMAP else set to 0.
01402  // DO NOT SET wmap, regress or wmapgmm to true when using Oracle.
01403 
01404 void Oracle(String & idTar, String & idTest, double &score, Config & config,
01405   MixtureGD & tar, MixtureGD & non, StatServer & ss)
01406 {
01407 
01408   bool wmap = config.getParam("wmapOracleType").toBool();
01409   bool one = config.getParam("classicalOracleType").toBool();;
01410   String model, test;
01411   XLine *line;
01412   bool find = false;
01413   String targetTestsList = config.getParam("targetTests");
01414   XList targetTests(targetTestsList, config);
01415 
01416   while ((line = targetTests.getLine()))
01417     {
01418       model = line->getElement(0);      //id client in file
01419       test = line->getElement(2);       //id test in file
01420 
01421       if (idTar == model && test == idTest)     //test = target test
01422         {
01423           if (wmap)
01424             {
01425               cout << "Find true target trial, set weight to WMAP computation"
01426                 << endl;
01427               //HERE : leave the WMAP weight
01428               DoubleVector tmp;
01429               tmp.addValue(0);  //The first element is not evaluated by WMAP.
01430               tmp.addValue(score);
01431               WMAPGMMFixedPriors(tmp, config, tar, non, ss);
01432               score = tmp[1];
01433             }
01434           else if (one)
01435             {
01436               cout << "Find true target trial, set weight to 1" << endl;
01437               score = 1;        //true Oracle with weight = 1  
01438             }
01439 
01440 
01441           find = true;
01442         }
01443 
01444 
01445 
01446     }
01447 
01448   if (!find)
01449     {
01450       cout << "Impostor trial, set weight to 0" << endl;
01451       score = 0;                //test != test target, set proba to 0;  
01452     }
01453 
01454 
01455 
01456 }
01457 
01458 //Jack knife on data, WARNING ONLY ONE EM AND MAP ITERATION IS DONE
01459 void crossValid(Config & configTest, StatServer & ss,  MixtureServer & ms, FeatureServer & fs, SegCluster & selectedSegments,
01460   MixtureGD & aprioriModel,MixtureGD &bestModel,  SegCluster& selectedSegmentsBagged, String & idTest)
01461 {
01462   MixtureGD & clientMixture = ms.duplicateMixture(aprioriModel, DUPL_DISTRIB);
01463   MixtureGD & clientMixtureEM = ms.duplicateMixture(aprioriModel, DUPL_DISTRIB);
01464   double LLR = 0.0,previousLLR=100000;
01465   MAPCfg mapCfg(configTest);
01466   double trainSelected = configTest.getParam("SelectedTrain").toDouble();
01467   cout << "Compute Model on " << (trainSelected *  100) << "% of data" << endl;
01468  unsigned long baggedMinimalLength =3;
01469   if (configTest.existsParam("baggedMinimalLength"))
01470     baggedMinimalLength = configTest.getParam("baggedMinimalLength").toLong();
01471     unsigned long baggedMaximalLength =7;
01472   if (configTest.existsParam("baggedMaximalLength"))
01473     baggedMaximalLength = configTest.getParam("baggedMaximalLength").toLong();
01474         
01475   for (int It = 0; It < configTest.getParam("AverageIt").toLong(); It++)                        //compute AverageIt times
01476     {   
01477           if (debug) cout << "Iteration "<<It<<endl;
01478           if (verboseLevel > 1)
01479             cout << "Mean LLK Init = " << meanLikelihood(ss, fs, clientMixture,   selectedSegments, configTest) << endl;
01480              MixtureStat & emAcc = ss.createAndStoreMixtureStat(clientMixture); // Create a statistic accumulator using the curent model
01481               SegCluster & baggedSelected =  (selectedSegments.getServer()).createCluster(1, "", "");   // Create the cluster for describing the selected frames
01482               SegCluster & baggedUnselected = (selectedSegments.getServer()).createCluster(1, "", "");  // Create the cluster for describing the selected frames
01483               baggedSegments(selectedSegments, baggedSelected, baggedUnselected,trainSelected,baggedMinimalLength,baggedMaximalLength); //Train the client model on trainSelected Frame
01484               emAcc.resetEM();
01485               double llkPreviousIt = accumulateStatEM(ss, fs, emAcc, baggedSelected, configTest);       // Accumulate the EM statistics
01486               clientMixture = emAcc.getEM();    // Get the EM estimate 
01487               clientMixtureEM=clientMixture;  
01488               unsigned long frameCount = (unsigned long) emAcc.getEMFeatureCount();
01489               llkPreviousIt = llkPreviousIt / (double) frameCount;
01490               if (verbose) cout << "ML (partial) estimate it[0] (take care, it corresponds to the previous it,0 means init likelihood) = " << llkPreviousIt << endl;
01491               computeMAP(ms, aprioriModel, clientMixture, frameCount, configTest);      // Bayesian Adaptation client=MAP(aprioriModel,client)
01492               if (mapCfg.getNormalizeModel())
01493                 normalizeMixture(clientMixture, mapCfg, configTest);    // Normalize/fit the model if needed
01494               ss.deleteMixtureStat(emAcc);
01495               if (verboseLevel > 2)
01496                 cout << "Likelihood on all frames= " << meanLikelihood(ss, fs, clientMixture, selectedSegments, configTest) << endl;
01497                  if (verboseLevel > 1)
01498                     cout << "Final likelihood on all frames= " << meanLikelihood(ss, fs, clientMixture, selectedSegments, configTest) << endl;
01499                   if(debug)  cout << "Compute LLR of  " << ((1-trainSelected) * 100) << "% of train data on the model learnt on " << (trainSelected* 100) << "%  data" << endl;
01500                   LLR = computeLLR(ss, fs, aprioriModel, clientMixture, baggedUnselected); //compute LLR with unselected data
01501                  if (debug) cout << "LLR for IT "<<It<<" = "<< LLR<<endl;
01502                  if(LLR < previousLLR) {
01503                           bestModel=clientMixtureEM;    // Get the EM estimate 
01504                         //recopy cluster
01505                           copyCluster(baggedSelected,selectedSegmentsBagged);
01506                  }
01507                   (selectedSegments.getServer()).remove(baggedSelected);
01508                   (selectedSegments.getServer()).remove(baggedUnselected);
01509                   previousLLR=LLR;
01510                   clientMixture=aprioriModel;   
01511                 
01512                                 
01513     }
01514   
01515     bestModel.save(idTest,configTest);
01516   
01517     //delete temporary mixture
01518     
01519     ms.deleteMixture(clientMixtureEM);
01520     ms.deleteMixture(clientMixture);
01521     ms.deleteUnusedDistribs();
01522 
01523 }
01524 
01525 
01526 //2 purposes : do not recalculate LLR of test data on a client model, and possibility to use RES file from a different recognition system to compute adaptation weights
01527 double searchLLRFromResFile(String & idTar, String & test,
01528   String & inputResFilename, Config & config)
01529 {
01530   double LLR = 0.0;
01531   cout << "Search from file : " << inputResFilename << " for the LLR : " <<
01532     idTar << " / " << test << endl;
01533   String idTest, idClient;
01534   XList listLLR(inputResFilename, config);
01535   XLine *line;
01536   while ((line = listLLR.getLine()))
01537     {
01538 
01539       idClient = line->getElement(1, 0);
01540       idTest = line->getElement(3, 0);
01541 
01542       if ((idClient == idTar) && (idTest == test))
01543         LLR = line->getElement(4, 0).toDouble();
01544 
01545     }
01546   if (LLR != 0.0)
01547     return LLR;
01548   else
01549     {
01550       cout << "LLR not found in :" << inputResFilename << " , return -1" <<
01551         endl;
01552       return -1;
01553     }
01554 
01555 }
01556 
01557 
01558 
01559 
01560 
01561 
01562 unsigned long adaptModelEMweightedFrames(String &labelSelectedFrames,XLine & featureFileName,StatServer &ss,MixtureGD &world, MixtureGD &tar, MixtureGD &non,MixtureGD &MixtureforLLR,MixtureGD &MixtureEMOutput, Config &config, FeatureServer &fs, String &fullFileName,String &idTest){
01563         unsigned long frameCount=0;
01564         WindowLLR windowLLR(config); // Initialize the windowLLR mode if requested
01565         if(verbose && verboseLevel >1) windowLLR.showConfig();
01566         if (windowLLR.isSet()) windowLLR.setNbClient(1); // set one to nb client for window LLR ???A ENLEVER????
01567         FileInfo FI(fullFileName);              //file to read or write top components info
01568         bool fileispresent=true;
01569         if (FileExists(fullFileName) == false ) fileispresent=false;
01570         DoubleVector FrameWeights(0,0); 
01571         MixtureGDStat &worldAcc=ss.createAndStoreMixtureGDStat(world);
01572         worldAcc.resetLLK();               
01573         MixtureGDStat &clientAccforLLR=ss.createAndStoreMixtureGDStat(MixtureforLLR);
01574         MixtureGDStat &clientAcc=ss.createAndStoreMixtureGDStat(MixtureEMOutput);
01575         clientAccforLLR.resetLLK();     
01576         SegServer segmentsServer;                                                             // Create the segment server for managing the segments/clusters
01577         LabelServer labelServer;                                                              // Create the lable server, for indexing the segments/clusters
01578         initializeClusters(featureFileName,segmentsServer,labelServer,config);                // Reading the segmentation files for each feature input file
01579         verifyClusterFile(segmentsServer,fs,config);                                          // Verify if the segments ending before the end of the feature files...
01580         long codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames);        // Get the index of the cluster with in interest audio segments
01581         if (codeSelectedFrame==-1)                                                            // The file is empty
01582                 cout << "ATTENTION, TEST FILE ["<<idTest<<"] is empty"<<endl;
01583         else{
01584                 SegCluster& selectedSegments=segmentsServer.getCluster(codeSelectedFrame); // Gives the cluster of the selected/used segments   
01585                 //compute LLR for each feature server test frames
01586                 Seg *seg;                       // reset the reader at the begin of the input stream
01587                 selectedSegments.rewind();
01588                 unsigned long idFrame=0;
01589                 while ((seg = selectedSegments.getSeg()) != NULL)
01590                         {                               // For each of the selected segments
01591                         unsigned long idxBeginFrame =seg->begin() +fs.getFirstFeatureIndexOfASource(seg->sourceName());
01592                         fs.seekFeature(idxBeginFrame);
01593                         Feature f;
01594                         for (unsigned long idxFrame = 0; idxFrame < seg->length(); idxFrame++)
01595                         {                       // For each frame of the segment
01596                                 double llkw=0.0;
01597                                 double llkc=0.0;
01598                                 fs.readFeature(f);
01599                                 if(!fileispresent){
01600                                         llkw=worldAcc.computeAndAccumulateLLK(f,1.0,DETERMINE_TOP_DISTRIBS);    // Determine the top components and compute wrld LLK
01601                                         //STOCK the vector with the top selected component for reuse 
01602                                         const LKVector & lkv = ss.getTopDistribIndexVector();
01603                                         FI.writeTopInfo(lkv, config);
01604                                 }
01605                                 else {
01606                                         FI.loadTopInfo(ss, idFrame, config);
01607                                         llkw=worldAcc.computeAndAccumulateLLK(f,1.0,USE_TOP_DISTRIBS);       // Determine the top components and compute wrld LLK
01608                                         idFrame++;
01609                                 }
01610                            
01611                                 if (windowLLR.isSet()) windowLLR.dec(idxBeginFrame+idxFrame);                 
01612                                 llkc=clientAccforLLR.computeAndAccumulateLLK(f,1.0,USE_TOP_DISTRIBS);
01613                                 if (windowLLR.isSet()) windowLLR.accLLR(0,llkc-llkw);
01614                                 if (windowLLR.isSet() && windowLLR.isEnd()){                            //fin window on adapte
01615                                         FrameWeights.addValue(windowLLR.getLLR(0));                             //for each segments of window size the LLR is stocked
01616                                         
01617                                 }
01618                                 
01619                         }
01620                 }
01621                 WMAPGMMFixedPriors(FrameWeights,config,tar,non,ss);                                                             //compute WMAP for each LLR
01622                 //Create EM model with a weight for each segment 
01623                 clientAcc.resetEM();
01624                 double llkAcc = 0.0;
01625                 unsigned long cpt=0;
01626                 selectedSegments.rewind();
01627                 while ((seg = selectedSegments.getSeg()) != NULL)
01628                         {                               // For each of the selected segments
01629                         unsigned long idxBeginFrame =seg->begin() +fs.getFirstFeatureIndexOfASource(seg->sourceName());
01630                         fs.seekFeature(idxBeginFrame);
01631                         Feature f;
01632                         for (unsigned long idxFrame = 0; idxFrame < seg->length(); idxFrame++)
01633                         {                       // For each frame of the segment
01634                                 fs.readFeature(f);
01635                                 if (windowLLR.isSet()) windowLLR.dec(idxBeginFrame+idxFrame);  
01636                                 if (windowLLR.isSet() && windowLLR.isEnd())     cpt++;
01637                                 llkAcc += log(clientAcc.computeAndAccumulateEM(f, FrameWeights[cpt]))*FrameWeights[cpt];                //PB si un seg < 30 trames
01638                                                 
01639                         }
01640                         
01641                             
01642         }
01643         MixtureEMOutput=clientAcc.getEM();
01644         frameCount = (unsigned long) clientAcc.getEMFeatureCount();
01645         if(verbose && verboseLevel >1) cout << "NBFRAMES  for Acc= "<< frameCount <<endl;
01646         if(verbose && verboseLevel >1) cout << "ML estimate Likelihood  = "<< (llkAcc/frameCount) <<endl;
01647                             
01648         }
01649         return frameCount;
01650 }
01651         
01652         
01653         
01654 String selectNearestTarModel(String &TARListFilename, String & fullFileName,Config &config, StatServer & ss, FeatureServer & fs,
01655   MixtureGD & world,  SegCluster & selectedSegments,MixtureServer &ms){
01656         DoubleVector LLR; 
01657         double LLRvalue= 0.0;
01658         XList TARXList(TARListFilename, config);        // read the Id + filenames for each client
01659         XLine *line;
01660         while ((line = TARXList.getLine()) != NULL){
01661                   String *idmodel = line->getElement(); // Get the TAR model ID 
01662                   MixtureGD & TARmodel = ms.loadMixtureGD(*idmodel);
01663                   if (FileExists(fullFileName) == false)
01664                             {   //first time seeing this test
01665                               LLRvalue = computeLLR(config, ss, fs, world, TARmodel, selectedSegments, fullFileName);   //For proba, take the decision on the original target model
01666                             }
01667                   else
01668                               LLRvalue = computeFastLLR(ss, fs, world, TARmodel, selectedSegments, fullFileName, config);       //For proba, take the decision on the original target model
01669                 
01670         cout <<"Model : "<<*idmodel <<" LLR = " <<LLRvalue<<endl;
01671         LLR.addValue(LLRvalue);
01672         ms.deleteMixture(TARmodel);     
01673         ms.deleteUnusedDistribs();
01674         }
01675         
01676         unsigned long index=LLR.getIndexOfLargestValue();
01677         TARXList.rewind();
01678         return *(TARXList.getLine(index)).getElement();
01679         
01680   
01681                   
01682 }
01683 //Need all models in memory
01684 //Not optimised with top ten info file
01685 void computeLLRmatrix(DoubleMatrix &LLR, XLine &models, XList &features, Config &config, StatServer &ss, MixtureGD & world, MixtureServer &ms,String &labelSelectedFrames){
01686         String *idmodel;
01687         XLine *line;
01688         int j=0;
01689         int i=0;
01690         LLR.setDimensions(models.getElementCount(),models.getElementCount());
01691         double LLRvalue=0.0;
01692         while((idmodel=models.getElement())){                   //loop on each target model
01693                         
01694                 while((line = features.getLine()) != NULL){             //loop on each target model
01695                         
01696                       FeatureServer fs(config,*line);                                            // Reading the features (from several files)
01697                       SegServer segmentsServer;                                                             // Create the segment server for managing the segments/clusters
01698                       LabelServer labelServer;                                                              // Create the lable server, for indexing the segments/clusters
01699                       initializeClusters(*line,segmentsServer,labelServer,config);               // Reading the segmentation files for each feature input file
01700                       verifyClusterFile(segmentsServer,fs,config);                                          // Verify if the segments ending before the end of the feature files...
01701                       long codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames);        // Get the index of the cluster with in interest audio segments
01702                       if (codeSelectedFrame==-1){                                                           // No data for this model !!!!!!!!!!!!!!
01703                                 cout << " WARNING - NO DATA FOR TRAINING ["<<*idmodel<<"]";
01704                       }
01705                       else{
01706                                 SegCluster& selectedSegments=segmentsServer.getCluster(codeSelectedFrame); // Gives the cluster of the selected/used segments                                   
01707                                 MixtureGD & TARmodel = ms.loadMixtureGD(*idmodel);
01708                                 if (debug) cout << "Model : "<<*idmodel<<" feature" << line->toString()<<endl;
01709                                 LLRvalue = computeLLR(ss, fs, world, TARmodel, selectedSegments);       
01710                                 LLR(i,j) = LLRvalue;
01711                                 j++;
01712                                 ms.deleteMixture(TARmodel);
01713                                 ms.deleteUnusedDistribs();
01714                                  
01715                         } 
01716                                 
01717                         fs.reset();     
01718                         labelServer.clear();
01719                         segmentsServer.removeAllClusters();
01720                         segmentsServer.removeAllSegs();
01721                 }
01722                 features.rewind();
01723                 i++;
01724                 j=0;
01725                 
01726         }
01727         
01728         LLR.save("LLRmatrix.mat",config);
01729         
01730 }
01731 
01732 
01733 void addLineInXList(XLine &line,XList &list){
01734         unsigned long cpt=0;
01735         list.addLine();
01736         XLine *tmp=list.getLine();
01737         while(cpt!=line.getElementCount()){
01738                 (*tmp).addElement(*line.getElement());
01739                 cpt++;
01740         }
01741         list.rewind();
01742         line.rewind();
01743         
01744         
01745 }
01746 
01747 
01748 
01749 //weighted fusion of MAP models  
01750 
01751 void fuseMAP(Config & config, StatServer & ss,  MixtureServer & ms, MixtureGD & aprioriModel, MixtureGD & clientMixture, MixtureGD & aux,
01752   MixtureGD & tmp, DoubleVector & decision, XLine & testsToCompute)
01753 {
01754   if (verbose && verboseLevel > 1)
01755     cout << "MAP Fusion" << endl;
01756   int i = 0;
01757   int nbModels = decision.size();       //number of models
01758   int index = ms.getMixtureIndex(testsToCompute.getElement(0)); //get the first model , its name is stocked in the XLine
01759   if (index == -1) aux =ms.loadMixtureGD(testsToCompute.getElement(0));
01760   else aux = ms.getMixtureGD(ms.getMixtureIndex(testsToCompute.getElement(0)));   //IF STOCKED (ALREADY COMPUTED) LOAD MODEL
01761 
01762   double auxWeight = decision[0];       //first weight, usually 1 for train data
01763   //i.e. the objectRefvector and the XLine are reseted after each client                  
01764 
01765   for (i = 0; i < nbModels - 1; i++)
01766     {
01767       if (debug){
01768         cout << "Fusing MAP models : " << testsToCompute.getElement(i)<<" with " << testsToCompute.getElement(i + 1) << endl;
01769         }
01770 
01771       index=ms.getMixtureIndex(testsToCompute.getElement(i + 1));
01772       if(index==-1) fuseMAPMeans(aux, auxWeight,ms.loadMixtureGD(testsToCompute.getElement(i + 1)),decision[i + 1], tmp);
01773       else fuseMAPMeans(aux, auxWeight,ms.getMixtureGD(ms.getMixtureIndex(testsToCompute.getElement(i + 1))),decision[i + 1], tmp);
01774       aux = tmp;
01775       auxWeight +=decision[i + 1];
01776 
01777     }
01778   clientMixture = tmp;
01779 
01780 }
01781 
01782 
01783 
01784 void fuseMAPMeans(const MixtureGD & model1,double &w1,const MixtureGD & model2,double &w2,MixtureGD &result){
01785   unsigned long vectSize=model1.getVectSize();  
01786   if (vectSize!=model2.getVectSize()) throw Exception("Feature vector size should be the same" , __FILE__, __LINE__);
01787   unsigned long nbDistrib=model1.getDistribCount();
01788   if (nbDistrib!=model2.getDistribCount()) throw Exception("Number of components should be the same" , __FILE__, __LINE__);
01789   for (unsigned long idx=0;idx<nbDistrib;idx++){
01790     DistribGD & d1=model1.getDistrib(idx);
01791     DistribGD & d2=model2.getDistrib(idx);
01792     DistribGD & dr=result.getDistrib(idx);
01793 
01794     for (unsigned long c=0;c<vectSize;c++){
01795         dr.setMean(((d1.getMean(c)*w1)+(d2.getMean(c)*w2))/(w1+w2),c);
01796         dr.setCov((d1.getCov(c)),c);// COPY of the model 1 covariance, should be equal to world (MAP mean only)
01797             }
01798     dr.computeAll();
01799     result.weight(idx)=model1.weight(idx);// COPY of the model 1 weight, should be equal to world (MAP mean only)
01800    }
01801 }
01802 
01803 
01804 double computeWeightedLLR(Config & config, StatServer & ss, FeatureServer &fsTests, MixtureServer & ms, MixtureGD & aprioriModel, SegCluster & selectedSegmentsTests,MixtureGD & aux, DoubleVector & decision, XLine & testsToCompute, String &idTest, String &fullFileName){
01805 
01806 if (verbose && verboseLevel > 1)
01807     cout << "Weighted SUM of LLR" << endl;
01808   int i = 0;
01809   int nbModels = decision.size();       //number of models
01810   double Wllr=0.0,weights=0.0,Wllrtot=0.0;
01811                   
01812 
01813   for (i = 0; i < nbModels - 1; i++)    //DON'T TAKE THE LAST MODEL because it is the current test model
01814     {
01815       if (debug){
01816         cout << "Weighted SUM of LLR model : " << testsToCompute.getElement(i)<<" with segment " << idTest << endl;
01817         }
01818       int index=ms.getMixtureIndex(testsToCompute.getElement(i));
01819       if(index==-1) aux=ms.loadMixtureGD(testsToCompute.getElement(i));
01820       else aux=ms.getMixtureGD(ms.getMixtureIndex(testsToCompute.getElement(i)));
01821       Wllr+=(computeFastLLR(ss, fsTests, aprioriModel, aux, selectedSegmentsTests, fullFileName, config)*decision[i]);
01822       weights+=decision[i];
01823         cout <<"LLR accumulated = "<<Wllr<< "sum weights = "<<weights;
01824     }
01825   Wllrtot=Wllr/weights;
01826     return Wllrtot;
01827           
01828   }
01829 
01830   
01831 void copyCluster(SegCluster &source, SegCluster &dest) {
01832         dest.removeAll(); // clean the destination
01833         source.rewind();
01834         Seg *p;
01835         while((p=source.getSeg())!=NULL)
01836                 dest.add(*p);
01837 }
01838 
01839 int findClusterInRefvector(ObjectRefVector &SegServ,String & id){
01840         for (unsigned long i=0;i<SegServ.size();i++){
01841                 if((static_cast <SegCluster & >(SegServ.getObject(i))).sourceName() == id)
01842                         return i;
01843                         
01844         }
01845         return -1;
01846 }
01847 
01848 
01849 
01850 void learnEMimpostorModels(Config & config, DoubleVector &NbFramesSelectedImp,MixtureServer &ms,StatServer &ss,MixtureGD &world){
01851 XLine *line;
01852 String ImpostorList = config.getParam("impCohortFile");
01853 XList Imp(ImpostorList, config);
01854         
01855 while ((line = Imp.getLine()))
01856     {
01857           MixtureGD & impMixtureEM = ms.duplicateMixture(world, DUPL_DISTRIB);
01858           String  *idImp = line->getElement();  // Get the Tests ID (id)
01859           String fullMixtureName=getFullMixtureName(*idImp,config);
01860             
01861           if (FileExists(fullMixtureName) == false) {
01862                   
01863                   XLine featureFileListImp = line->getElements();       // Get the list of feature file for the tests (end of the line)
01864                   if (verbose)
01865                     cout << "Train IMPOSTOR model  [" << *idImp << "]" << endl;
01866                   String labelSelectedFrames = config.getParam("labelSelectedFrames");
01867         
01868                    //IMPOSTOR DATA STUFF
01869                   FeatureServer fsImp(config, featureFileListImp);
01870                   SegServer segmentsServerImp;  // Create the segment server for managing the segments/clusters
01871                   LabelServer labelServerImp;   // Create the lable server, for indexing the segments/clusters
01872                   initializeClusters(featureFileListImp, segmentsServerImp, labelServerImp, config);    // Reading the segmentation files for each feature input file
01873                   verifyClusterFile(segmentsServerImp, fsImp, config);  // Verify if the segments ending before the end of the feature files...
01874                   long codeSelectedFrame = labelServerImp.getLabelIndexByString(labelSelectedFrames);   // Get the index of the cluster with in interest audio segments
01875                   if (codeSelectedFrame == -1)
01876                     {           // No data for this model !!!!!!!!!!!!!!
01877                       cout << " WARNING - NO DATA FOR TRAINING [" << *idImp
01878                         << "]";
01879                     }
01880                   else
01881                     { 
01882                         SegCluster& selectedSegmentsImp = segmentsServerImp.getCluster(codeSelectedFrame);
01883                         NbFramesSelectedImp.addValue(SegClusterFrame(selectedSegmentsImp));                             //used for fuseModel
01884                         adaptModelEM(config, ss, ms, fsImp, selectedSegmentsImp, world, impMixtureEM);  //create the EM model for the impostor
01885                         ms.setMixtureId(impMixtureEM, *idImp);
01886                         impMixtureEM.save(*idImp,config);               //SAVE test mixture 
01887                         ms.deleteMixture(impMixtureEM); 
01888                         ms.deleteUnusedDistribs();
01889                     }
01890                     
01891             }
01892                     
01893    }
01894 
01895 }
01896 
01897 //Learn all impostor models by classical MAP adaptation
01898 
01899 void learnMAPimpostorModels(Config & config, DoubleVector &NbFramesSelectedImp,MixtureServer &ms,StatServer &ss,MixtureGD &world){
01900 XLine *line;
01901 String ImpostorList = config.getParam("impCohortFile");
01902 XList Imp(ImpostorList, config);
01903         
01904 while ((line = Imp.getLine()))
01905     {
01906           MixtureGD & impMixture = ms.duplicateMixture(world, DUPL_DISTRIB);
01907           String  *idImp = line->getElement();  // Get the Tests ID (id)
01908           String fullMixtureName=getFullMixtureName(*idImp,config);
01909             
01910            //IF IMP MODEL WAS NOT CREATED YET 
01911           if (FileExists(fullMixtureName) == false) {
01912                   
01913                   XLine featureFileListImp = line->getElements();       // Get the list of feature file for the tests (end of the line)
01914                   if (verbose)
01915                     cout << "Train IMPOSTOR model  [" << *idImp << "]" << endl;
01916                   String labelSelectedFrames = config.getParam("labelSelectedFrames");
01917         
01918                    //IMPOSTOR DATA STUFF
01919                   
01920                   FeatureServer fsImp(config, featureFileListImp);
01921                   
01922                   // Create the segment server for managing the segments/clusters
01923                   SegServer segmentsServerImp;
01924                   
01925                   // Create the lable server, for indexing the segments/clusters
01926                   LabelServer labelServerImp;   
01927                   
01928                   // Reading the segmentation files for each feature input file
01929                   initializeClusters(featureFileListImp, segmentsServerImp, labelServerImp, config);
01930                   
01931                   // Verify if the segments ending before the end of the feature files...
01932                   verifyClusterFile(segmentsServerImp, fsImp, config);
01933                   
01934                   // Get the index of the cluster with in interest audio segments
01935                   long codeSelectedFrame = labelServerImp.getLabelIndexByString(labelSelectedFrames);
01936                   
01937                   if (codeSelectedFrame == -1)
01938                     {           // No data for this model !!!!!!!!!!!!!!
01939                       cout << " WARNING - NO DATA FOR TRAINING [" << *idImp
01940                         << "]";
01941                     }
01942                   else
01943                     { 
01944                         SegCluster& selectedSegmentsImp = segmentsServerImp.getCluster(codeSelectedFrame);
01945                         NbFramesSelectedImp.addValue(SegClusterFrame(selectedSegmentsImp));
01946                         //create the EM model for the impostor
01947                         adaptModel(config, ss, ms, fsImp, selectedSegmentsImp, world, impMixture);      
01948                         ms.setMixtureId(impMixture, *idImp);
01949                         impMixture.save(*idImp,config);         //SAVE mixture 
01950                         ms.deleteMixture(impMixture);   
01951                         ms.deleteUnusedDistribs();
01952                     }
01953                     
01954             }
01955                     
01956    }
01957 
01958 }
01959 
01960 
01961 
01962 
01963 double computeAdaptedTnorm(Config & configTest, MixtureServer & ms,StatServer &ss, MixtureGD &world, MixtureGD &auxMixture, MixtureGD &tmpMixture, double &nonorm_score, int &countTests, DoubleVector &decision, DoubleVector &NbFramesSelected, XLine &testsToCompute,
01964 DoubleVector &NbFramesSelectedImp, FeatureServer &fsTests, SegCluster &selectedSegmentsTests, String &idTest){
01965         
01966 //load mod�le EM imposteur n-1 iteration
01967 XLine *line;
01968 String ImpostorList = configTest.getParam("ImpList");
01969 XList Imp(ImpostorList, configTest);
01970 //recopie nb trames
01971 DoubleVector NbFramesSelectedForAdaptation=NbFramesSelected;
01972 XLine testsToComputeForAdaptation;
01973 double accumScore = 0.0;
01974 double accumScore_2 = 0.0;
01975 double add = 0.0;
01976 int nbTests=0;
01977 double mu=0.0, sigma=0.0, score=0.0;    
01978 MixtureGD & impMixture = ms.duplicateMixture(world, DUPL_DISTRIB);
01979 
01980 while ((line = Imp.getLine())){
01981         String *idImp = line->getElement();     // Get the Tests ID (id)    
01982         testsToComputeForAdaptation.addElement(*idImp);
01983         for (unsigned long e=1;e>testsToCompute.getElementCount();e++)
01984                 testsToComputeForAdaptation.addElement(testsToCompute.getElement(e,false));
01985         
01986         NbFramesSelectedForAdaptation[0]=NbFramesSelectedImp[countTests-1];
01987         computeMAPmodelFromEMones(configTest, ss, ms, NbFramesSelectedForAdaptation, world, impMixture, auxMixture, tmpMixture, decision, testsToComputeForAdaptation);
01988         add= computeFastLLR(ss, fsTests, world, impMixture, selectedSegmentsTests,idTest, configTest);
01989         accumScore = accumScore + add;
01990         accumScore_2 = accumScore_2 + (add * add);
01991         nbTests++;
01992         testsToComputeForAdaptation.reset();
01993         impMixture=world;
01994 }
01995 
01996 mu = ((accumScore / nbTests));
01997 sigma = sqrt((accumScore_2 / (nbTests) - (mu * mu)));    
01998 score=(nonorm_score-mu)/sigma;
01999 
02000 return score;   
02001 
02002 }
02003 
02004 //find the nearest LLR in a matrix of LLR learnt on different training duration 
02005 double findNearestLLRInMatrix(Matrix <double> &Mat, unsigned long &index, double &LLR){
02006         
02007 double delta=0.0;
02008 double distance=0.0;
02009 double previous=1000000;
02010 unsigned long ind_sav=0;        
02011 for (unsigned long i=0; i<Mat.rows(); i++){             //parcours la colonne index correspondant au nombre de session
02012         distance =LLR-Mat(i,index-1);
02013         if ((abs(distance))< abs(previous))
02014         {
02015                 previous=distance;
02016                 delta=Mat(i,index-1)-Mat(i,0);                  //return the LLR shift between index session and 1 session
02017                 if (delta<0)            //BAD SHIFT, SHOULD INCREASE
02018                         delta=0;                //for the moment =0 instead of elimnating the locutor in matrix
02019                 ind_sav=i;
02020         }
02021                 
02022 }
02023 //if(debug)
02024 cout << "LLR " << LLR <<" ; find nearest " <<  Mat(ind_sav,index-1)<< " for "<< index << " sessions"<<endl;
02025 return delta;
02026 }
02027 
02028 
02029 
02030 /***************************************************************/
02031 /* Use of the train data to assess the adaptation choice        */
02032 /********************************************************************/
02033 void assessAdaptation(StatServer &ss,MixtureGD & adaptedMixture ,SegCluster &selectedSegments, 
02034 Config &configTest, FeatureServer &fs, MixtureGD &world, DoubleVector &tmp, String &fullFileNameTrain, 
02035 int countTests, double &tarScore){
02036                 
02037 double LLRtrain = 0.0,
02038        IntUp    = 0.0,
02039        IntDown  = 0.0;
02040         
02041 //FROM CONFIG BUT COULD BE TAKEN FROM A GMM OF SCORES   
02042 double stdTARscores = configTest.getParam("VarTarScores").toDouble();
02043 
02044 LLRtrain = computeFastLLR(ss, fs, world, adaptedMixture, selectedSegments,fullFileNameTrain,configTest);
02045 
02046 IntUp   = tarScore + stdTARscores;
02047 IntDown = tarScore - stdTARscores;
02048         
02049 cout << "LLR for Train data on adapted model = "<<LLRtrain <<endl;
02050         
02051 //OUT OF THE TAR DISTRIB
02052 if (LLRtrain > IntUp || LLRtrain < IntDown )
02053 { 
02054     //should decrease the WMAP weight because of adaptation error       
02055     //score SHOULD BE a score for which the weight equal to 0 (mean of IMP distrib)!!! TO CHANGE!!!
02056     
02057     tmp[countTests-1]=configTest.getParam("meanImp").toDouble();
02058 
02059 }
02060 
02061 
02062 
02063 }
02064 
02065 
02066 
02067 
02068 
02069 /*
02070 Try to detect Baseline and unsupervised mode divergence on decision
02071 case 1: baseline accept trials as a true target trial
02072         unsupervised refuses it
02073 
02074 case 2 : baseline refuses trial
02075          unsupervised accepts
02076 
02077 The other cases (both systems agree) is not interesting
02078 
02079 */
02080 
02081 void divDetect(double &LLRbaseline,double &LLRadapted, double &thresBaseline, double & thresAdapted){
02082         //case 1
02083         if(LLRbaseline > thresBaseline && LLRadapted < thresAdapted)
02084         {
02085                 
02086         }
02087         
02088         //case 2
02089         if(LLRbaseline < thresBaseline && LLRadapted > thresAdapted)
02090         {
02091             LLRadapted = LLRbaseline;
02092         }
02093 }
02094 
02095 
02096 
02097 #endif //!defined(ALIZE_UnsupervisedTools_cpp)