MixtureGDStat.cpp

Go to the documentation of this file.
00001 /*
00002         This file is part of ALIZE which is an open-source tool for 
00003         speaker recognition.
00004 
00005     ALIZE is free software: you can redistribute it and/or modify
00006     it under the terms of the GNU Lesser General Public License as 
00007     published by the Free Software Foundation, either version 3 of 
00008     the License, or any later version.
00009 
00010     ALIZE is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU Lesser General Public License for more details.
00014 
00015     You should have received a copy of the GNU Lesser General Public 
00016     License along with ALIZE.
00017     If not, see <http://www.gnu.org/licenses/>.
00018         
00019         ALIZE is a development project initiated by the ELISA consortium
00020         [alize.univ-avignon.fr/] and funded by the French Research 
00021         Ministry in the framework of the TECHNOLANGUE program 
00022         [www.technolangue.net]
00023 
00024         The ALIZE project team wants to highlight the limits of voice
00025         authentication in a forensic context.
00026         The "Person  Authentification by Voice: A Need of Caution" paper 
00027         proposes a good overview of this point (cf. "Person  
00028         Authentification by Voice: A Need of Caution", Bonastre J.F., 
00029         Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00030         chagnolleau I., Eurospeech 2003, Genova].
00031         The conclusion of the paper of the paper is proposed bellow:
00032         [Currently, it is not possible to completely determine whether the 
00033         similarity between two recordings is due to the speaker or to other 
00034         factors, especially when: (a) the speaker does not cooperate, (b) there 
00035         is no control over recording equipment, (c) recording conditions are not 
00036         known, (d) one does not know whether the voice was disguised and, to a 
00037         lesser extent, (e) the linguistic content of the message is not 
00038         controlled. Caution and judgment must be exercised when applying speaker 
00039         recognition techniques, whether human or automatic, to account for these 
00040         uncontrolled factors. Under more constrained or calibrated situations, 
00041         or as an aid for investigative purposes, judicious application of these 
00042         techniques may be suitable, provided they are not considered as infallible.
00043         At the present time, there is no scientific process that enables one to 
00044         uniquely characterize a person=92s voice or to identify with absolute 
00045         certainty an individual from his or her voice.]
00046         Contact Jean-Francois Bonastre for more information about the licence or
00047         the use of ALIZE
00048 
00049         Copyright (C) 2003-2010
00050         Laboratoire d'informatique d'Avignon [lia.univ-avignon.fr]
00051         ALIZE admin [alize@univ-avignon.fr]
00052         Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_MixtureGDStat_cpp)
00056 #define ALIZE_MixtureGDStat_cpp
00057 
00058 #include <new>
00059 #include "MixtureGDStat.h"
00060 #include "alizeString.h"
00061 #include "Feature.h"
00062 #include "DistribGD.h"
00063 #include "Mixture.h"
00064 #include "MixtureGD.h"
00065 #include "DistribRefVector.h"
00066 #include "Config.h"
00067 #include "Exception.h"
00068 
00069 using namespace alize;
00070 typedef MixtureGDStat M;
00071 
00072 //-------------------------------------------------------------------------
00073 M::MixtureGDStat(const K&, StatServer& ss, const MixtureGD& m, const Config& c)
00074 :MixtureStat(ss, m, c), _pMixForAccumulation(NULL), _pMixtureForEM(NULL) {}
00075 //-------------------------------------------------------------------------
00076 MixtureGDStat& M::create(const K&, StatServer& ss,
00077                                      const MixtureGD& m, const Config& c)
00078 {
00079   MixtureGDStat* p = new (std::nothrow) MixtureGDStat(K::k, ss, m, c);
00080   assertMemoryIsAllocated(p, __FILE__, __LINE__);
00081   return *p;
00082 }
00083 //-------------------------------------------------------------------------
00084 void M::resetEM()
00085 {
00086   assert(_pMixture->getDistribCount() == _distribCount);
00087   resetOcc();
00088 
00089   // copy the original mixture and its ditributions
00090   if (_pMixtureForEM != NULL)
00091     delete _pMixtureForEM;
00092   _pMixtureForEM = &static_cast<MixtureGD&>(_pMixture->duplicate(K::k,
00093                                             DUPL_DISTRIB));
00094   // create and initialize a temporary mixtureGD to accumulate cov and mean
00095   if (_pMixForAccumulation != NULL)
00096     delete _pMixForAccumulation;
00097   _pMixForAccumulation = &MixtureGD::create(K::k, "",
00098                       _pMixtureForEM->getVectSize(),
00099                       _pMixtureForEM->getDistribCount());
00100   for (unsigned long c=0; c<_distribCount; c++)
00101   {
00102     DistribGD& d = _pMixForAccumulation->getDistrib(c);
00103     d.getMeanVect().setAllValues(0.0);
00104     d.getCovVect().setAllValues(0.0);
00105   }
00106   _featureCounterForEM = 0.0;
00107   _resetedEM = true;
00108 }
00109 //-------------------------------------------------------------------------
00110 occ_t M::computeAndAccumulateEM(const Feature& f, double w)
00111 {
00112   assertResetEMDone();
00113   real_t sum = computeAndAccumulateOcc(f, w);
00114 
00115   Feature::data_t* dataVect = f.getDataVector();
00116   real_t t, *meanVect, *covVect;
00117   unsigned long vectSize = _pMixture->getVectSize();
00118 
00119   for (unsigned long c=0; c<_distribCount; c++)
00120   {
00121     DistribGD& d = _pMixForAccumulation->getDistrib(c);
00122     meanVect = d.getMeanVect().getArray();
00123     covVect  = d.getCovVect().getArray();
00124     
00125     for (unsigned long i=0; i<vectSize; i++)
00126     {
00127       t = _occVect[c] * dataVect[i];
00128       covVect[i]  += t * dataVect[i];
00129       meanVect[i] += t;
00130     }
00131   }
00132   _featureCounterForEM += w;
00133   return sum;
00134 }
00135 //-------------------------------------------------------------------------
00136 void M::addAccEM(const MixtureStat& mx)
00137 {
00138   const MixtureGDStat* p = dynamic_cast<const MixtureGDStat*>(&mx);
00139   if (p == NULL)
00140     throw Exception("MixtureStat incompatibility", __FILE__, __LINE__);
00141   if (p->_distribCount != _distribCount)
00142     throw Exception("MixtureStat incompatibility", __FILE__, __LINE__);
00143   const MixtureGDStat& m = static_cast<const MixtureGDStat&>(mx);
00144 
00145   _accumulatedOccVect += m._accumulatedOccVect;
00146   _featureCounterForAccumulatedOcc += m._featureCounterForAccumulatedOcc;
00147 
00148   for (unsigned long c=0; c<_distribCount; c++)
00149   {
00150     DistribGD& d = _pMixForAccumulation->getDistrib(c);
00151     const DistribGD& d2 = m._pMixForAccumulation->getDistrib(c);
00152     d.getCovVect() += d2.getCovVect();
00153     d.getMeanVect() += d2.getMeanVect();
00154   }
00155   _featureCounterForEM += m._featureCounterForEM;
00156 }
00157 //-------------------------------------------------------------------------
00158 const Mixture& M::getEM()
00159 {
00160   assertResetEMDone();
00161   unsigned long c;
00162 
00163   occ_t totOcc = _accumulatedOccVect.computeSum();
00164   unsigned long vectSize = _pMixture->getVectSize();
00165 
00166   for (c=0; c<_distribCount; c++)
00167   {
00168     const occ_t occ = _accumulatedOccVect[c];
00169     if (occ > 0.0)
00170     {
00171       DistribGD& dTmp = _pMixForAccumulation->getDistrib(c);
00172       real_t* dTmpCovVect  = dTmp.getCovVect().getArray();
00173       real_t* dTmpMeanVect = dTmp.getMeanVect().getArray();
00174 
00175       DistribGD& d  = _pMixtureForEM->getDistrib(c);
00176       real_t* dCovVect   = d.getCovVect().getArray();
00177       real_t* dMeanVect  = d.getMeanVect().getArray();
00178 
00179       real_t mean, cov;
00180 
00181       for (unsigned long i=0; i<vectSize; i++)
00182       {
00183         mean = dTmpMeanVect[i] / occ;
00184         cov  = dTmpCovVect [i] / occ - mean * mean;
00185         if (cov >MIN_COV)
00186           dCovVect [i] = cov;
00187         else
00188           dCovVect [i] = MIN_COV;
00189         dMeanVect[i] = mean;
00190       }
00191       _pMixtureForEM->weight(c) = occ/totOcc;
00192       d.computeAll();
00193     }
00194   }
00195   return *_pMixtureForEM;
00196 }
00197 //-------------------------------------------------------------------------
00198 MixtureGD& M::getInternalAccumEM()
00199 {
00200   assertResetEMDone();
00201   assert(_pMixForAccumulation != NULL);
00202   return *_pMixForAccumulation;
00203 }
00204 //-------------------------------------------------------------------------
00205 String M::getClassName() const { return "MixtureGDStat"; }
00206 //-------------------------------------------------------------------------
00207 M::~MixtureGDStat()
00208 {
00209   if (_pMixForAccumulation != NULL)
00210     delete _pMixForAccumulation;
00211   if (_pMixtureForEM != NULL)
00212     delete _pMixtureForEM;
00213 }
00214 //-------------------------------------------------------------------------
00215 
00216 #endif // !defined(ALIZE_MixtureGDStat_cpp)
00217