TrainTools.h

Go to the documentation of this file.
00001 /*
00002 This file is part of LIA_RAL which is a set of software based on ALIZE
00003 toolkit for speaker recognition. ALIZE toolkit is required to use LIA_RAL.
00004 
00005 LIA_RAL project is a development project was initiated by the computer
00006 science laboratory of Avignon / France (Laboratoire Informatique d'Avignon -
00007 LIA) [http://lia.univ-avignon.fr <http://lia.univ-avignon.fr/>]. Then it
00008 was supported by two national projects of the French Research Ministry:
00009         - TECHNOLANGUE program [http://www.technolangue.net]
00010         - MISTRAL program [http://mistral.univ-avignon.fr]
00011 
00012 LIA_RAL is free software: you can redistribute it and/or modify
00013 it under the terms of the GNU Lesser General Public License as
00014 published by the Free Software Foundation, either version 3 of
00015 the License, or any later version.
00016 
00017 LIA_RAL is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU Lesser General Public License for more details.
00021 
00022 You should have received a copy of the GNU Lesser General Public
00023 License along with LIA_RAL.
00024 If not, see [http://www.gnu.org/licenses/].
00025 
00026 The LIA team as well as the LIA_RAL project team wants to highlight the
00027 limits of voice authentication in a forensic context.
00028 The "Person Authentification by Voice: A Need of Caution" paper
00029 proposes a good overview of this point (cf. "Person
00030 Authentification by Voice: A Need of Caution", Bonastre J.F.,
00031 Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00032 chagnolleau I., Eurospeech 2003, Genova].
00033 The conclusion of the paper of the paper is proposed bellow:
00034 [Currently, it is not possible to completely determine whether the
00035 similarity between two recordings is due to the speaker or to other
00036 factors, especially when: (a) the speaker does not cooperate, (b) there
00037 is no control over recording equipment, (c) recording conditions are not
00038 known, (d) one does not know whether the voice was disguised and, to a
00039 lesser extent, (e) the linguistic content of the message is not
00040 controlled. Caution and judgment must be exercised when applying speaker
00041 recognition techniques, whether human or automatic, to account for these
00042 uncontrolled factors. Under more constrained or calibrated situations,
00043 or as an aid for investigative purposes, judicious application of these
00044 techniques may be suitable, provided they are not considered as infallible.
00045 At the present time, there is no scientific process that enables one to
00046 uniquely characterize a persones voice or to identify with absolute
00047 certainty an individual from his or her voice.]
00048 
00049 Copyright (C) 2004-2010
00050 Laboratoire d'informatique d'Avignon [http://lia.univ-avignon.fr]
00051 LIA_RAL admin [alize@univ-avignon.fr]
00052 Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_TrainTools_h)
00056 #define ALIZE_TrainTools_h
00057 
00058 #if defined(_WIN32)
00059 #if defined(LIA_SPKTOOLS_EXPORTS)
00060 #define LIA_SPKTOOLS_API __declspec(dllexport)
00061 #else
00062 #define LIA_SPKTOOLS_API __declspec(dllimport)
00063 #endif
00064 #else
00065 #define LIA_SPKTOOLS_API
00066 #endif
00067 
00068 #include <alize.h>
00069 #include "liatools.h"
00070 
00071 using namespace alize;
00072 using namespace std;
00073 
00074 class LIA_SPKTOOLS_API MAPCfg {
00075   String _method;
00076   double _r[3];
00077   bool _mean,_var,_weight;
00078   unsigned long _nbTrainIt; 
00079   unsigned long _nbEmIt; // used only for modelBasedadaptMode
00080   bool  _normalizeModel;
00081   bool _normalizeModelMeanOnly;
00082   unsigned long _normalizeModelNbIt;
00083   double _baggedFrameProbability;
00084 public:
00085   MAPCfg(Config &);
00086   String &getMethod(){return _method;}
00087   void showConfig(ostream & st);
00088   bool getMeanAdapt() { return _mean;}
00089   bool getVarAdapt() { return _var;}
00090   bool getWeightAdapt(){ return _weight;}
00091   double getMeanReg() { return _r[0];}
00092   double getVarReg() { return _r[1];}
00093   double getWeightReg() { return _r[2];}
00094   double getMeanAlpha() { return _r[0];}
00095   double getVarAlpha() { return _r[1];}
00096   double getWeightAlpha() { return _r[2];}
00097   unsigned long getNbTrainIt(){return _nbTrainIt;}
00098   unsigned long getNbEmIt(){return _nbEmIt;} // used only for modelBasedadaptMode
00099   bool getNormalizeModel(){return  _normalizeModel;}
00100   bool getNormalizeModelMeanOnly(){return _normalizeModelMeanOnly;}
00101   unsigned long getNormalizeModelNbIt(){return _normalizeModelNbIt;}
00102   double getBaggedFrameProbability(){return _baggedFrameProbability;}
00103   void setMethod(String v) {_method=v;}
00104   void setMeanAdapt(bool v) {_mean=v;}
00105   void setVarAdapt(bool v) { _var=v;}
00106   void setWeightAdapt(bool v){ _weight=v;}
00107   void setMeanReg(double v) { _r[0]=v;}
00108   void setVarReg(double v) { _r[1]=v;}
00109   void setWeightReg(double v) { _r[2]=v;}
00110   void setMeanAlpha(double v) { _r[0]=v;}
00111   void setVarAlpha(double v) { _r[1]=v;}
00112   void setWeightAlpha(double v) { _r[2]=v;}  
00113   void setNbTrainIt(unsigned long v){_nbTrainIt=v;}
00114   void setNbEmIt(unsigned long v){_nbEmIt=v;} // used only for modelBasedadaptMode
00115   void setNormalizeModel(bool v){_normalizeModel=v;}
00116   void setNormalizeModelMeanOnly(bool v){_normalizeModelMeanOnly=v;}  
00117   void setNormalizeModelNbIt(unsigned long v){_normalizeModelNbIt=v;}
00118   void setBaggedFrameProbability(double v){_baggedFrameProbability=v;}
00119 };
00120 
00121 
00122 class LIA_SPKTOOLS_API TrainCfg{
00123   double _initVarianceFlooring; 
00124   double _initVarianceCeiling; 
00125   double _finalVarianceFlooring;
00126   double _finalVarianceCeiling; 
00127   unsigned long _nbTrainIt; 
00128   bool  _normalizeModel;
00129   bool _normalizeModelMeanOnly;
00130   unsigned long _normalizeModelNbIt;
00131   double _baggedFrameProbability;
00132   double _baggedFrameProbabilityInit;
00133   bool _componentReduction;
00134   unsigned long _targetDistribCount;
00135  public:
00136   TrainCfg(Config &);
00137   double getInitVarFloor(){return _initVarianceFlooring;}
00138   double getInitVarCeil(){return _initVarianceCeiling;}
00139   double getFinalVarFloor(){return _finalVarianceFlooring;}
00140   double getFinalVarCeil(){return _finalVarianceCeiling;}
00141   unsigned long getNbTrainIt(){return _nbTrainIt;}
00142   bool getNormalizeModel(){return  _normalizeModel;}
00143   bool getNormalizeModelMeanOnly(){return _normalizeModelMeanOnly;}
00144   unsigned long getNormalizeModelNbIt(){return _normalizeModelNbIt;}
00145   double getBaggedFrameProbability(){return _baggedFrameProbability;}
00146   double getBaggedFrameProbabilityInit(){return _baggedFrameProbabilityInit;}
00147   bool getComponentReduction(){return _componentReduction;}
00148   unsigned long getTargetDistribCount(){return _targetDistribCount;}
00149   void setInitVarFlooring (double v){_initVarianceFlooring=v;}
00150   void setInitVarCeiling (double v){_initVarianceCeiling=v;} 
00151   void setFinalVarFlooring (double v){_finalVarianceFlooring=v;}
00152   void setFinalVarCeiling (double v){_finalVarianceCeiling=v;} 
00153   void setNbTrainIt(unsigned long v){_nbTrainIt=v;}
00154   void setNormalizeModel(bool v){_normalizeModel=v;}
00155   void setNormalizeModelMeanOnly(bool v){_normalizeModelMeanOnly=v;}  
00156   void setNormalizeModelNbIt(unsigned long v){_normalizeModelNbIt=v;}
00157   void setBaggedFrameProbability(double v){_baggedFrameProbability=v;}
00158   void setBaggedFrameProbabilityInit(double v){_baggedFrameProbabilityInit=v;}
00159   void showConfig(ostream &);
00160 };
00161 // Mixture and ditrib tools
00162 LIA_SPKTOOLS_API void fuseModels(const MixtureGD &,unsigned long,const MixtureGD &,unsigned long,MixtureGD &);
00163 LIA_SPKTOOLS_API void copyMixture(DistribGD &,DistribGD &);
00164 LIA_SPKTOOLS_API unsigned long selectComponent(bool selectCompA[],XList & distribL, MixtureGD &inputM);
00165 LIA_SPKTOOLS_API unsigned long selectComponent(bool selectCompA[],double wFactor,MixtureGD &inputM);
00166 LIA_SPKTOOLS_API unsigned long selectComponent(bool selectCompA[],unsigned long nbTop,MixtureGD &inputM);
00167 LIA_SPKTOOLS_API double reduceModel(bool selectCompA[],MixtureGD &inputM,MixtureGD &outputM);
00168 LIA_SPKTOOLS_API void normalizeWeights(MixtureGD &outputM);
00169 
00170 // gaussian/mixture fusion using less likelihood loss criterion
00171 LIA_SPKTOOLS_API void gaussianFusion(const DistribGD &g1,double w1,const DistribGD & g2,double w2, DistribGD &res,double &w);
00172 LIA_SPKTOOLS_API void mixtureFusion(const MixtureGD &mixt,DistribGD &res,double &wres);
00173 
00174 // normalizeMixture() normmalizes  the mixture in order to fit
00175 // the data distribution
00176 // Usually used with a mean=0, cov=1 distribution
00177 LIA_SPKTOOLS_API void normalizeMixture(MixtureGD &mixt,const DoubleVector &meanSignal,const DoubleVector &covSignal,bool zeroOne,
00178                       unsigned long nbIt, bool meanOnly,Config &config);
00179 LIA_SPKTOOLS_API void normalizeMixture(MixtureGD &mixt,const DoubleVector &meanSignal,const DoubleVector &covSignal,Config &config);
00180 // for compatibility reasons only, same function but the target is always D(mean=0,std=1)
00181 LIA_SPKTOOLS_API void normalizeMixture(MixtureGD &mixt,Config &config);
00182 LIA_SPKTOOLS_API void normalizeMixture(MixtureGD & mixt, MAPCfg & mapCfg, Config & config);
00183 
00184 LIA_SPKTOOLS_API double likelihoodLoss(const DistribGD &g1,double w1,const DistribGD &g2,double w2);
00185 LIA_SPKTOOLS_API double *outputMAPTransformation(MixtureGD & world, MixtureGD & clientMixture,
00186   Config & config);
00187 //-------------------------------------------------------------------------
00188 // MAP computation functions 
00189 // the main MAP functions
00190 LIA_SPKTOOLS_API void computeMAP(MixtureServer &ms,const MixtureGD& world,MixtureGD &client,unsigned long frameCount,Config &config);
00191 LIA_SPKTOOLS_API void computeMAP(MixtureServer &ms,const MixtureGD& world,MixtureGD &client,unsigned long frameCount,MAPCfg &cfg);
00192 
00193 // Model manipulation tools
00194 LIA_SPKTOOLS_API void copyMean(MixtureGD & mixtS, MixtureGD & mixtD);
00195 LIA_SPKTOOLS_API void copyVar(MixtureGD & mixtS, MixtureGD & mixtD);
00196 LIA_SPKTOOLS_API void copyWeight(MixtureGD & mixtS, MixtureGD & mixtD);
00197 
00198 //-------------------------------------------------------------------------
00199 LIA_SPKTOOLS_API double setItParameter(double begin, double end, int nbIt, int it);
00200 
00201 //-------------------------------------------------------------------------
00202 LIA_SPKTOOLS_API void varianceControl(MixtureGD& model,double flooring,double ceiling,const DoubleVector &covSignal);
00203 
00204 // **********************************************************************
00205 // Mixture Initialization stuff
00206 //***********************************************************************
00207 // cov and mean initialization
00208 LIA_SPKTOOLS_API unsigned long computeMeanCov(Config &config,FeatureServer **fsTab,SegCluster ** segTab,unsigned long nbStream,DoubleVector &mean,DoubleVector &cov);
00209 LIA_SPKTOOLS_API unsigned long computeMeanCov(Config &config,FeatureServer &fs,SegCluster &seg,DoubleVector &mean,DoubleVector &cov);
00210 LIA_SPKTOOLS_API void initialize01(unsigned long vectSize,DoubleVector &mean,DoubleVector &cov);
00211 // Mixture initialisation, based on random picking of frames
00212 LIA_SPKTOOLS_API MixtureGD &mixtureInit(MixtureServer &ms,FeatureServer &fs,MixtureGD &world,
00213                        SegCluster &selectedSegments,const DoubleVector &globalCov, Config& config);
00214 LIA_SPKTOOLS_API MixtureGD &mixtureInit(MixtureServer &ms,FeatureServer &fs,MixtureGD &world,
00215                        SegCluster &selectedSegments,const DoubleVector &globalCov, Config& config,TrainCfg & trainCfg);
00216 LIA_SPKTOOLS_API MixtureGD &mixtureInit(MixtureServer &ms,FeatureServer **fsTab, SegCluster **segTab,double *weightTab,unsigned long nbStream,MixtureGD &world,
00217                        const DoubleVector &globalCov, Config& config,TrainCfg &trainCfg);
00218 LIA_SPKTOOLS_API MixtureGD &mixtureInit(MixtureServer &ms,FeatureServer **fsTab, SegCluster **segTab,double*weightTab,unsigned long nbStream,MixtureGD &world,
00219                        const DoubleVector &globalCov, Config& config);
00220 // The main function for estimate a client model by bayesian adaptattion of a world model
00221 // Using EM and MAP 
00222 LIA_SPKTOOLS_API void adaptModel(Config& config,StatServer &ss,MixtureServer &ms,FeatureServer &fs,SegCluster& selectedSegments,MixtureGD &world,MixtureGD &clientMixture);//A.P.
00223 LIA_SPKTOOLS_API void adaptModel(Config& config,StatServer &ss,MixtureServer &ms,FeatureServer &fs,SegCluster& selectedSegments,MixtureGD &world,MixtureGD &clientMixture, MAPCfg &cfg);
00224 // ** New training algo based on a true EM/ML estimate of the training data before to apply MAP
00225 LIA_SPKTOOLS_API void modelBasedadaptModel(Config& config,StatServer &ss,MixtureServer &ms,FeatureServer &fs,SegCluster& selectedSegments,
00226                           MixtureGD &aprioriModel,MixtureGD &clientMixture, MixtureGD &initModel);
00227 
00228                 
00229 
00230                 
00231 
00232 //-------------------------------------------------------------------------
00233 // TrainModel is the main function for model training
00234 // It works on an initialized model and on a clmuster of segments
00235 LIA_SPKTOOLS_API void trainModel(Config& config,StatServer &ss,FeatureServer &fs,SegCluster& selectedSegments,
00236                 DoubleVector & globalMean,DoubleVector &globalCov,MixtureGD &world);
00237 LIA_SPKTOOLS_API void trainModel(Config& config,StatServer &ss,FeatureServer &fs,SegCluster& selectedSegments,
00238                 DoubleVector & globalMean,DoubleVector &globalCov,MixtureGD &world,TrainCfg &trainCfg);
00239 LIA_SPKTOOLS_API void trainModelStream(Config& config,MixtureServer &ms,StatServer &ss,FeatureServer **fsTab,SegCluster** segTab,double *weightTab,unsigned long nbStream,
00240                 DoubleVector & globalMean,DoubleVector &globalCov,MixtureGD* &world,TrainCfg &trainCfg);
00241 LIA_SPKTOOLS_API void trainModel(Config& config,MixtureServer &ms,StatServer &ss,FeatureServer **fsTab,SegCluster** segTab,double *weightTab,unsigned long nbStream,
00242                 DoubleVector & globalMean,DoubleVector &globalCov,MixtureGD* &world);
00243                 
00244 
00245 //  Function for adapting a model by MLLR
00246 LIA_SPKTOOLS_API Matrix<double> computeMLLR (MixtureGD &inM,MixtureGD& outM,unsigned long frameCount, Config &config);
00247 
00248 
00249 #endif //!defined(ALIZE_TrainTools_h)