MixtureServerFileReaderXml.cpp

Go to the documentation of this file.
00001 /*
00002         This file is part of ALIZE which is an open-source tool for 
00003         speaker recognition.
00004 
00005     ALIZE is free software: you can redistribute it and/or modify
00006     it under the terms of the GNU Lesser General Public License as 
00007     published by the Free Software Foundation, either version 3 of 
00008     the License, or any later version.
00009 
00010     ALIZE is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU Lesser General Public License for more details.
00014 
00015     You should have received a copy of the GNU Lesser General Public 
00016     License along with ALIZE.
00017     If not, see <http://www.gnu.org/licenses/>.
00018         
00019         ALIZE is a development project initiated by the ELISA consortium
00020         [alize.univ-avignon.fr/] and funded by the French Research 
00021         Ministry in the framework of the TECHNOLANGUE program 
00022         [www.technolangue.net]
00023 
00024         The ALIZE project team wants to highlight the limits of voice
00025         authentication in a forensic context.
00026         The "Person  Authentification by Voice: A Need of Caution" paper 
00027         proposes a good overview of this point (cf. "Person  
00028         Authentification by Voice: A Need of Caution", Bonastre J.F., 
00029         Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00030         chagnolleau I., Eurospeech 2003, Genova].
00031         The conclusion of the paper of the paper is proposed bellow:
00032         [Currently, it is not possible to completely determine whether the 
00033         similarity between two recordings is due to the speaker or to other 
00034         factors, especially when: (a) the speaker does not cooperate, (b) there 
00035         is no control over recording equipment, (c) recording conditions are not 
00036         known, (d) one does not know whether the voice was disguised and, to a 
00037         lesser extent, (e) the linguistic content of the message is not 
00038         controlled. Caution and judgment must be exercised when applying speaker 
00039         recognition techniques, whether human or automatic, to account for these 
00040         uncontrolled factors. Under more constrained or calibrated situations, 
00041         or as an aid for investigative purposes, judicious application of these 
00042         techniques may be suitable, provided they are not considered as infallible.
00043         At the present time, there is no scientific process that enables one to 
00044         uniquely characterize a person=92s voice or to identify with absolute 
00045         certainty an individual from his or her voice.]
00046         Contact Jean-Francois Bonastre for more information about the licence or
00047         the use of ALIZE
00048 
00049         Copyright (C) 2003-2010
00050         Laboratoire d'informatique d'Avignon [lia.univ-avignon.fr]
00051         ALIZE admin [alize@univ-avignon.fr]
00052         Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_MixtureServerFileReaderXml_cpp)
00056 #define ALIZE_MixtureServerFileReaderXml_cpp
00057 
00058 #include <new>
00059 #include "MixtureServerFileReaderXml.h"
00060 #include "MixtureServer.h"
00061 #include "MixtureGD.h"
00062 #include "MixtureGF.h"
00063 #include "DistribGD.h"
00064 #include "DistribGF.h"
00065 #include "Exception.h"
00066 #include "XmlParser.h"
00067 #include "Config.h"
00068 #include "FileReader.h"
00069 
00070 // see http://babel.alis.com/web_ml/xml/REC-xml.fr.html#NT-XMLDecl
00071 
00072 using namespace alize;
00073 typedef MixtureServerFileReaderXml R;
00074 
00075 //-------------------------------------------------------------------------
00076 R::MixtureServerFileReaderXml(const FileName& f, const Config& c)
00077 :MixtureServerFileReaderAbstract(&FileReader::create(f, getPath(f, c),
00078  getExt(f, c), false /* no swap */)), XmlParser() {}
00079 //-------------------------------------------------------------------------
00080 R& R::create(const FileName& f, const Config& c)
00081 {
00082   R* p = new (std::nothrow) R(f, c);
00083   assertMemoryIsAllocated(p, __FILE__, __LINE__);
00084   return *p;
00085 }
00086 //-------------------------------------------------------------------------
00087 void R::readMixtureServer(MixtureServer& ms)
00088 {
00089   assert(_pReader != NULL);
00090   _line = 1;
00091   _pMixtureServer = &ms;
00092   parse();
00093   _pReader->close();
00094 }
00095 //-------------------------------------------------------------------------
00096 void R::eventOpeningElement(const String& path)
00097 {
00098   if (false) {}
00099   else if (path.endsWith("<mean>"))
00100   {
00101     _meanIndexFound = false;
00102   }
00103   else if (path.endsWith("<covInv>"))
00104   {
00105     _covInvIndexFound = false;
00106     _covInvIndexJFound = false;
00107   }
00108   else if (path.endsWith("<cov>"))
00109   {
00110     _covIndexFound = false;
00111   }
00112   else if (path.endsWith("<mean><i>"))   {}
00113   else if (path.endsWith("<covInv><i>") || path.endsWith("<covInv><j>")) {}
00114   else if (path.endsWith("<cov><i>") || path.endsWith("<cov><j>"))  {}
00115   else if (path.endsWith("<MixtureServer><DistribGD>"))
00116   {
00117     _distribTypeDefined = true;
00118     _distribType = DistribType_GD;
00119     _pDistrib = NULL;
00120   }
00121   else if (path.endsWith("<MixtureServer><DistribGF>"))
00122   {
00123     _distribTypeDefined = true;
00124     _distribType = DistribType_GF;
00125     _pDistrib = NULL;
00126   }
00127   else if (path.endsWith("<MixtureServer><DistribGD><i>")) {}
00128   else if (path.endsWith("<MixtureServer><DistribGF><i>")) {}
00129   else if (path.endsWith("<MixtureServer><MixtureGD><DistribGD>"))
00130   {
00131     _distribIndexFound = false;
00132     _weightFound = false;
00133     _distribTypeDefined = true;
00134     _distribType = DistribType_GD;
00135   }
00136   else if (path.endsWith("<MixtureServer><MixtureGF><DistribGF>"))
00137   {
00138     _distribIndexFound = false;
00139     _weightFound = false;
00140     _distribTypeDefined = true;
00141     _distribType = DistribType_GF;
00142   }
00143   else if (path.endsWith("<MixtureServer><MixtureGD><DistribGD><i>"))     {}
00144   else if (path.endsWith("<MixtureServer><MixtureGF><DistribGF><i>"))     {}
00145   else if (path.endsWith("<MixtureServer><MixtureGD><DistribGD><dictIdx>")) {}
00146   else if (path.endsWith("<MixtureServer><MixtureGF><DistribGF><dictIdx>")) {}
00147   else if (path.endsWith("<MixtureServer><MixtureGD><DistribGD><weight>"))  {}
00148   else if (path.endsWith("<MixtureServer><MixtureGF><DistribGF><weight>"))  {}
00149   else if (path.endsWith("<MixtureServer><MixtureGD>"))
00150   {
00151     _pMixture = NULL;
00152     _mixtType = DistribType_GD;
00153     _mixtTypeDefined = true;
00154     _distribTypeDefined = false;
00155   }
00156   else if (path.endsWith("<MixtureServer><MixtureGF>"))
00157   {
00158     _pMixture = NULL;
00159     _mixtType = DistribType_GF;
00160     _mixtTypeDefined = true;
00161     _distribTypeDefined = false;
00162   }
00163   else if (path.endsWith("<MixtureServer><MixtureGD><id>"))       {}
00164   else if (path.endsWith("<MixtureServer><MixtureGF><id>"))       {}
00165   else if (path.endsWith("<MixtureServer><MixtureGD><distribCount>")) {}
00166   else if (path.endsWith("<MixtureServer><MixtureGF><distribCount>")) {}
00167   else if (path.endsWith("<MixtureServer><vectSize>"))
00168   {
00169     if (_vectSizeFound)
00170       eventError("More than one tag " + path + " !");
00171     _vectSizeFound = true;
00172   }
00173   else if (path.endsWith("<MixtureServer><version>"))    {}
00174   else if (path.endsWith("<MixtureServer><mixtureCount>")) {}
00175   else if (path.endsWith("<MixtureServer><distribCount>")) {}
00176   else if (path.endsWith("<MixtureServer><name>"))     {}
00177   else if (path.endsWith("<MixtureServer>"))
00178   {
00179     _pMixtureServer->reset();
00180     _pMixtureServer->setServerName("");
00181     _vectSizeFound = false;
00182     _mixtTypeDefined = false;
00183   }
00184   else
00185     eventError("Unknown tag in the path " + path);
00186 }
00187 //-------------------------------------------------------------------------
00188 void R::eventClosingElement(const String& path,
00189                              const String& value)
00190 {
00191   if (false) {}
00192   else if (path.endsWith("<mean>"))
00193   {
00194     if (!_meanIndexFound)
00195       eventError("Index missing for mean");
00196     if (!_distribTypeDefined)
00197       eventError("unknown mixture type");
00198     if (_distribType == DistribType_GD)
00199       getDistribGD().setMean(value.toDouble(), _meanIndex);
00200     else if (_distribType == DistribType_GF)
00201       getDistribGF().setMean(value.toDouble(), _meanIndex);
00202     _meanIndexFound = false;
00203   }
00204   else if (path.endsWith("<covInv>"))
00205   {
00206     if (!_covInvIndexFound)
00207       eventError("Index i missing for covInv");
00208     if (!_distribTypeDefined)
00209       eventError("unknown mixture type");
00210     if (_distribType == DistribType_GD)
00211       getDistribGD().setCovInv(K::k, value.toDouble(), _covInvIndex);
00212     else if (_distribType == DistribType_GF)
00213     {
00214       if (!_covInvIndexJFound)
00215         eventError("Index j missing for covInv");
00216       getDistribGF().setCovInv(K::k, value.toDouble(), _covInvIndex, _covInvIndexJ);
00217     }
00218   }
00219   else if (path.endsWith("<cov>"))
00220   {
00221     if (!_covIndexFound)
00222       eventError("Index missing for cov");
00223     if (!_distribTypeDefined)
00224       eventError("unknown mixture type");
00225     if (_distribType == DistribType_GD)
00226     {
00227       getDistribGD().setCov(value.toDouble(), _covIndex);
00228     }
00229     else if (_distribType == DistribType_GF)
00230     {
00231       if (!_covIndexFound)
00232         eventError("Index j missing for cov");
00233       getDistribGF().setCov(value.toDouble(), _covIndex, _covIndexJ);
00234     }
00235   }
00236   else if (path.endsWith("<mean><i>"))
00237   {
00238     _meanIndex = value.toLong();
00239     _meanIndexFound = true;
00240   }
00241   else if (path.endsWith("<covInv><i>"))
00242   {
00243     _covInvIndex = value.toLong();
00244     _covInvIndexFound = true;
00245   }
00246   else if (path.endsWith("<covInv><j>"))
00247   {
00248     _covInvIndexJ = value.toLong();
00249     _covInvIndexJFound = true;
00250   }
00251   else if (path.endsWith("<cov><i>"))
00252   {
00253     _covIndex = value.toLong();
00254     _covIndexFound = true;
00255   }
00256   else if (path.endsWith("<cov><j>"))
00257   {
00258     _covIndexJ = value.toLong();
00259     _covIndexJFound = true;
00260   }
00261 
00262   // -----------------------------------------------
00263 
00264   else if (path.endsWith("<MixtureServer><DistribGD><i>")) {}
00265   else if (path.endsWith("<MixtureServer><DistribGF><i>")) {}
00266   else if (path.endsWith("<MixtureServer><DistribGD>") ||
00267            path.endsWith("<MixtureServer><DistribGF>"))
00268     _distribTypeDefined = false;
00269 
00270   // -----------------------------------------------
00271 
00272   else if (path.endsWith("<MixtureServer><MixtureGD><DistribGD><i>"))     {}
00273   else if (path.endsWith("<MixtureServer><MixtureGF><DistribGF><i>"))     {}
00274   else if (path.endsWith("<MixtureServer><MixtureGD><DistribGD><dictIdx>") ||
00275            path.endsWith("<MixtureServer><MixtureGF><DistribGF><dictIdx>"))
00276   {
00277     _distribIndex = value.toLong();
00278     _distribIndexFound = true;
00279   }
00280   else if (path.endsWith("<MixtureServer><MixtureGD><DistribGD><weight>") ||
00281            path.endsWith("<MixtureServer><MixtureGF><DistribGF><weight>"))
00282   {
00283     _weight = value.toDouble();
00284     _weightFound = true;
00285   }
00286   else if (path.endsWith("<MixtureServer><MixtureGD><DistribGD>") ||
00287            path.endsWith("<MixtureServer><MixtureGF><DistribGF>"))
00288   {
00289     if (!_distribIndexFound)
00290       eventError("dict index missing to assign a distrib to a mixture");
00291     if (!_weightFound)
00292       eventError("unknown weigth to assign a distrib to a mixture");
00293     Distrib& d = _pMixtureServer->getDistrib(_distribIndex);
00294     _pMixtureServer->addDistribToMixture(getMixture(), d, _weight);
00295     _distribTypeDefined = false;
00296   }
00297 
00298   // -----------------------------------------------
00299 
00300   else if (path.endsWith("<MixtureServer><MixtureGD>") ||
00301            path.endsWith("<MixtureServer><MixtureGF>"))
00302     _mixtTypeDefined = false;
00303   else if (path.endsWith("<MixtureServer><MixtureGD><id>"))
00304   {
00305     _pMixtureServer->setMixtureId(getMixtureGD(), value);
00306   }
00307   else if (path.endsWith("<MixtureServer><MixtureGF><id>"))
00308   {
00309     _pMixtureServer->setMixtureId(getMixtureGF(), value);
00310   }
00311   else if (path.endsWith("<MixtureServer><MixtureGD><distribCount>")) {}
00312   else if (path.endsWith("<MixtureServer><MixtureGF><distribCount>")) {}
00313 
00314   // -----------------------------------------------
00315 
00316   else if (path.endsWith("<MixtureServer><version>"))
00317   {
00318     if (value != "1")
00319       eventError("invalid version");
00320   }
00321   else if (path.endsWith("<MixtureServer><name>"))
00322   {
00323     _pMixtureServer->setServerName(value);
00324   }
00325   else if (path.endsWith("<MixtureServer><vectSize>"))
00326   {
00327     _vectSize = value.toLong();
00328     _vectSizeFound = true;
00329   }
00330   else if (path.endsWith("<MixtureServer><mixtureCount>"))  {}
00331   else if (path.endsWith("<MixtureServer><distribCount>"))  {}
00332 }
00333 //-------------------------------------------------------------------------
00334 void R::eventError(const String& msg)
00335 {
00336   assert(_pReader != NULL);
00337   _pReader->close();
00338   _pMixtureServer->reset();
00339   _pMixtureServer->setServerName("");
00340   throw InvalidDataException("Error line " + String::valueOf(_line)
00341            + " : " + msg, __FILE__, __LINE__, _pReader->getFullFileName());
00342 }
00343 //-------------------------------------------------------------------------
00344 const String& R::readOneChar()
00345 {
00346   assert(_pReader != NULL);
00347   const String& s = _pReader->readString(1);
00348   if (s == "\n")
00349     _line++;
00350   return s;
00351 }
00352 
00353 //-------------------------------------------------------------------------
00354 Mixture& R::getMixture()
00355 {
00356   if (_pMixture == NULL)
00357   {
00358     if (!_mixtTypeDefined)
00359       eventError("unknown mixture type");
00360     _pMixture = &_pMixtureServer->createMixture(0, _mixtType);
00361   }
00362   return *_pMixture;
00363 }
00364 //-------------------------------------------------------------------------
00365 MixtureGD& R::getMixtureGD()
00366 { return static_cast<MixtureGD&>(getMixture()); }
00367 //-------------------------------------------------------------------------
00368 MixtureGF& R::getMixtureGF()
00369 { return static_cast<MixtureGF&>(getMixture()); }
00370 //-------------------------------------------------------------------------
00371 Distrib& R::getDistrib()
00372 {
00373   if (_pDistrib == NULL)
00374   {
00375     if (!_vectSizeFound)
00376       eventError("Unknown vectSize !");
00377     if (!_distribTypeDefined)
00378       eventError("unknown mixture type");
00379     _pDistrib = &_pMixtureServer->createDistrib(_distribType, _vectSize);
00380   }
00381   return *_pDistrib;
00382 }
00383 //-------------------------------------------------------------------------
00384 DistribGD& R::getDistribGD()
00385 { return static_cast<DistribGD&>(getDistrib()); }
00386 //-------------------------------------------------------------------------
00387 DistribGF& R::getDistribGF()
00388 { return static_cast<DistribGF&>(getDistrib()); }
00389 //-------------------------------------------------------------------------
00390 String R::getClassName() const { return "MixtureServerFileReaderXml"; }
00391 //-------------------------------------------------------------------------
00392 R::~MixtureServerFileReaderXml() {}
00393 //-------------------------------------------------------------------------
00394 
00395 #endif // !defined(ALIZE_MixtureServerFileReaderXml_cpp)
00396