MixtureGFStat.cpp

Go to the documentation of this file.
00001 /*
00002         This file is part of ALIZE which is an open-source tool for 
00003         speaker recognition.
00004 
00005     ALIZE is free software: you can redistribute it and/or modify
00006     it under the terms of the GNU Lesser General Public License as 
00007     published by the Free Software Foundation, either version 3 of 
00008     the License, or any later version.
00009 
00010     ALIZE is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU Lesser General Public License for more details.
00014 
00015     You should have received a copy of the GNU Lesser General Public 
00016     License along with ALIZE.
00017     If not, see <http://www.gnu.org/licenses/>.
00018         
00019         ALIZE is a development project initiated by the ELISA consortium
00020         [alize.univ-avignon.fr/] and funded by the French Research 
00021         Ministry in the framework of the TECHNOLANGUE program 
00022         [www.technolangue.net]
00023 
00024         The ALIZE project team wants to highlight the limits of voice
00025         authentication in a forensic context.
00026         The "Person  Authentification by Voice: A Need of Caution" paper 
00027         proposes a good overview of this point (cf. "Person  
00028         Authentification by Voice: A Need of Caution", Bonastre J.F., 
00029         Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00030         chagnolleau I., Eurospeech 2003, Genova].
00031         The conclusion of the paper of the paper is proposed bellow:
00032         [Currently, it is not possible to completely determine whether the 
00033         similarity between two recordings is due to the speaker or to other 
00034         factors, especially when: (a) the speaker does not cooperate, (b) there 
00035         is no control over recording equipment, (c) recording conditions are not 
00036         known, (d) one does not know whether the voice was disguised and, to a 
00037         lesser extent, (e) the linguistic content of the message is not 
00038         controlled. Caution and judgment must be exercised when applying speaker 
00039         recognition techniques, whether human or automatic, to account for these 
00040         uncontrolled factors. Under more constrained or calibrated situations, 
00041         or as an aid for investigative purposes, judicious application of these 
00042         techniques may be suitable, provided they are not considered as infallible.
00043         At the present time, there is no scientific process that enables one to 
00044         uniquely characterize a person=92s voice or to identify with absolute 
00045         certainty an individual from his or her voice.]
00046         Contact Jean-Francois Bonastre for more information about the licence or
00047         the use of ALIZE
00048 
00049         Copyright (C) 2003-2010
00050         Laboratoire d'informatique d'Avignon [lia.univ-avignon.fr]
00051         ALIZE admin [alize@univ-avignon.fr]
00052         Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_MixtureGFStat_cpp)
00056 #define ALIZE_MixtureGFStat_cpp
00057 
00058 #include <new>
00059 #include "MixtureGFStat.h"
00060 #include "alizeString.h"
00061 #include "Feature.h"
00062 #include "DistribGF.h"
00063 #include "Mixture.h"
00064 #include "MixtureGF.h"
00065 #include "DistribRefVector.h"
00066 #include "Config.h"
00067 #include "Exception.h"
00068 #include "StatServer.h"
00069 
00070 using namespace alize;
00071 typedef MixtureGFStat M;
00072 
00073 //-------------------------------------------------------------------------
00074 M::MixtureGFStat(const K&, StatServer& ss, const MixtureGF& m, const Config& c)
00075 :MixtureStat(ss, m, c), _pMixForAccumulation(NULL), _pMixtureForEM(NULL) {}
00076 //-------------------------------------------------------------------------
00077 MixtureGFStat& M::create(const K&, StatServer& ss,
00078                                      const MixtureGF& m, const Config& c)
00079 {
00080   MixtureGFStat* p = new (std::nothrow) MixtureGFStat(K::k, ss, m, c);
00081   assertMemoryIsAllocated(p, __FILE__, __LINE__);
00082   return *p;
00083 }
00084 //-------------------------------------------------------------------------
00085 void M::resetEM()
00086 {
00087   assert(_pMixture->getDistribCount() == _distribCount);
00088   resetOcc();
00089 
00090   // copy the original mixture and its ditributions
00091   if (_pMixtureForEM != NULL)
00092     delete _pMixtureForEM;
00093   _pMixtureForEM = &static_cast<MixtureGF&>(_pMixture->duplicate(K::k,
00094                                             DUPL_DISTRIB));
00095   // create and initialize a temporary mixtureGF to accumulate cov and mean
00096   if (_pMixForAccumulation != NULL)
00097     delete _pMixForAccumulation;
00098   _pMixForAccumulation = &MixtureGF::create(K::k, "",
00099                       _pMixtureForEM->getVectSize(),
00100                       _pMixtureForEM->getDistribCount());
00101   unsigned long vectSize = _pMixture->getVectSize();
00102   for (unsigned long cc=0; cc<_distribCount; cc++)
00103   {
00104     DistribGF& d = _pMixForAccumulation->getDistrib(cc);
00105 
00106     real_t* m = d.getMeanVect().getArray();     
00107     real_t* c = d.getCovMatrix().getArray();
00108         
00109     for (unsigned long i=0; i<vectSize; i++)
00110     {
00111       m[i] = 0.0;
00112       for (unsigned long j=0; j<vectSize; j++)
00113         c[i + j*vectSize] = 0.0;
00114         c[0 + 0*vectSize] = 1e200; // to avoid throwing exception "Matrix is not positive definite"
00115     }
00116         
00117   }
00118   _featureCounterForEM = 0.0;
00119   _resetedEM = true;
00120 }
00121 
00122 //-------------------------------------------------------------------------
00123 occ_t M::computeAndAccumulateEM(const Feature& f, double w)
00124 {
00125   assertResetEMDone();
00126   real_t sum = computeAndAccumulateOcc(f, w);
00127   Feature::data_t* dataVect = f.getDataVector();
00128   unsigned long vectSize = _pMixture->getVectSize();
00129   unsigned long vectSize2 = vectSize*vectSize;
00130 
00131   for (unsigned long c=0; c<_distribCount; c++)
00132   {
00133     DistribGF& d = _pMixForAccumulation->getDistrib(c);
00134     real_t* dTmpMeanVect = d.getMeanVect().getArray();
00135     real_t* dTmpCovMatr  = d.getCovMatrix().getArray();
00136     
00137     for (unsigned long i=0; i<vectSize; i++)
00138     {
00139       real_t mean = _occVect[c] * dataVect[i];
00140       dTmpMeanVect[i] += mean;
00141       for (unsigned long j=i*vectSize; j<vectSize2; j += vectSize)
00142         dTmpCovMatr[i+j]  += mean * dataVect[j];
00143     }
00144   }
00145     _featureCounterForEM += w;
00146   return sum;
00147 }
00148 //-------------------------------------------------------------------------
00149 void M::addAccEM(const MixtureStat& mx)
00150 {
00151   const MixtureGFStat* p = dynamic_cast<const MixtureGFStat*>(&mx);
00152   if (p == NULL)
00153     throw Exception("MixtureStat incompatibility", __FILE__, __LINE__);
00154   if (p->_distribCount != _distribCount)
00155     throw Exception("MixtureStat incompatibility", __FILE__, __LINE__);
00156   //const MixtureGFStat& m = static_cast<const MixtureGFStat&>(mx);
00157 
00158   throw Exception("unimplemented method :o(", __FILE__, __LINE__);
00159   // TODO : implement this method
00160 }
00161 //-------------------------------------------------------------------------
00162 const Mixture& M::getEM()
00163 {
00164   assertResetEMDone();
00165   unsigned long vectSize = _pMixture->getVectSize();
00166   unsigned long c, idx, vectSize2 = vectSize*vectSize;
00167   occ_t occ, totOcc = 0.0;
00168   real_t* dTmpCovMatr;
00169   real_t* dTmpMeanVect;
00170   real_t* dCovMatr;
00171   real_t* dMeanVect;
00172   real_t mean, mean2, cov;
00173 
00174   for (c=0; c<_distribCount; c++)
00175     totOcc += _accumulatedOccVect[c];
00176 
00177   for (c=0; c<_distribCount; c++)
00178   {
00179     occ = _accumulatedOccVect[c];
00180     if (occ > 0.0)
00181     {
00182       DistribGF& dTmp = _pMixForAccumulation->getDistrib(c);
00183       dTmpCovMatr  = dTmp.getCovMatrix().getArray();
00184       dTmpMeanVect = dTmp.getMeanVect().getArray();
00185 
00186       DistribGF& d = _pMixtureForEM->getDistrib(c);
00187       dCovMatr  = d.getCovMatrix().getArray();
00188       dMeanVect = d.getMeanVect().getArray();
00189 
00190 
00191       for (unsigned long i=0; i<vectSize; i++)
00192       {
00193         dMeanVect[i] = mean = dTmpMeanVect[i] / occ;
00194         mean2 = mean*mean;
00195         for (unsigned long j=0; j<vectSize2; j += vectSize)
00196         {
00197           idx = i+j;
00198           cov  = dTmpCovMatr[idx] / occ - mean2;
00199           if (cov >= MIN_COV )
00200             dCovMatr[idx] = cov;
00201           else
00202             dCovMatr[idx] = MIN_COV;
00203         }
00204       }
00205       _pMixtureForEM->weight(c) = occ/totOcc;
00206       d.computeAll();
00207     }
00208   }
00209   return *_pMixtureForEM;
00210 }
00211 //-------------------------------------------------------------------------
00212 MixtureGF& M::getInternalAccumEM()
00213 {
00214   assertResetEMDone();
00215   assert(_pMixForAccumulation != NULL);
00216   return *_pMixForAccumulation;
00217 }
00218 //-------------------------------------------------------------------------
00219 String M::getClassName() const { return "MixtureGFStat"; }
00220 //-------------------------------------------------------------------------
00221 M::~MixtureGFStat()
00222 {
00223   if (_pMixForAccumulation != NULL)
00224     delete _pMixForAccumulation;
00225   if (_pMixtureForEM != NULL)
00226     delete _pMixtureForEM;
00227 }
00228 //-------------------------------------------------------------------------
00229 
00230 #endif // !defined(ALIZE_MixtureGFStat_cpp)
00231