MixtureFileReaderXml.cpp

Go to the documentation of this file.
00001 /*
00002         This file is part of ALIZE which is an open-source tool for 
00003         speaker recognition.
00004 
00005     ALIZE is free software: you can redistribute it and/or modify
00006     it under the terms of the GNU Lesser General Public License as 
00007     published by the Free Software Foundation, either version 3 of 
00008     the License, or any later version.
00009 
00010     ALIZE is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU Lesser General Public License for more details.
00014 
00015     You should have received a copy of the GNU Lesser General Public 
00016     License along with ALIZE.
00017     If not, see <http://www.gnu.org/licenses/>.
00018         
00019         ALIZE is a development project initiated by the ELISA consortium
00020         [alize.univ-avignon.fr/] and funded by the French Research 
00021         Ministry in the framework of the TECHNOLANGUE program 
00022         [www.technolangue.net]
00023 
00024         The ALIZE project team wants to highlight the limits of voice
00025         authentication in a forensic context.
00026         The "Person  Authentification by Voice: A Need of Caution" paper 
00027         proposes a good overview of this point (cf. "Person  
00028         Authentification by Voice: A Need of Caution", Bonastre J.F., 
00029         Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00030         chagnolleau I., Eurospeech 2003, Genova].
00031         The conclusion of the paper of the paper is proposed bellow:
00032         [Currently, it is not possible to completely determine whether the 
00033         similarity between two recordings is due to the speaker or to other 
00034         factors, especially when: (a) the speaker does not cooperate, (b) there 
00035         is no control over recording equipment, (c) recording conditions are not 
00036         known, (d) one does not know whether the voice was disguised and, to a 
00037         lesser extent, (e) the linguistic content of the message is not 
00038         controlled. Caution and judgment must be exercised when applying speaker 
00039         recognition techniques, whether human or automatic, to account for these 
00040         uncontrolled factors. Under more constrained or calibrated situations, 
00041         or as an aid for investigative purposes, judicious application of these 
00042         techniques may be suitable, provided they are not considered as infallible.
00043         At the present time, there is no scientific process that enables one to 
00044         uniquely characterize a person=92s voice or to identify with absolute 
00045         certainty an individual from his or her voice.]
00046         Contact Jean-Francois Bonastre for more information about the licence or
00047         the use of ALIZE
00048 
00049         Copyright (C) 2003-2010
00050         Laboratoire d'informatique d'Avignon [lia.univ-avignon.fr]
00051         ALIZE admin [alize@univ-avignon.fr]
00052         Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_MixtureFileReaderXml_cpp)
00056 #define ALIZE_MixtureFileReaderXml_cpp
00057 
00058 #include <new>
00059 #include "MixtureFileReaderXml.h"
00060 #include "MixtureGD.h"
00061 #include "MixtureGF.h"
00062 #include "DistribGD.h"
00063 #include "DistribGF.h"
00064 #include "Exception.h"
00065 #include "XmlParser.h"
00066 #include "Config.h"
00067 #include "FileReader.h"
00068 
00069 // see http://babel.alis.com/web_ml/xml/REC-xml.fr.html#NT-XMLDecl
00070 
00071 using namespace alize;
00072 typedef MixtureFileReaderXml R;
00073 
00074 //-------------------------------------------------------------------------
00075 R::MixtureFileReaderXml(const FileName& f, const Config& c)
00076 :MixtureFileReaderAbstract(&FileReader::create(f, getPath(f, c),
00077  getExt(f, c), false), c), _pMixture(NULL) {}
00078 //-------------------------------------------------------------------------
00079 R& R::create(const FileName& f, const Config& c)
00080 {
00081   R* p = new (std::nothrow) R(f, c);
00082   assertMemoryIsAllocated(p, __FILE__, __LINE__);
00083   return *p;
00084 }
00085 //-------------------------------------------------------------------------
00086 const Mixture& R::readMixture()
00087 {
00088   _line = 1;
00089   _idFound = false;
00090   _distribCountFound = false;
00091   _vectSizeFound = false;
00092   _typeFound = false;
00093 
00094   parse();
00095   _pReader->close();
00096   return *_pMixture;
00097 }
00098 //-------------------------------------------------------------------------
00099 const MixtureGD& R::readMixtureGD()
00100 {
00101   const MixtureGD* p = dynamic_cast<const MixtureGD*>(&readMixture());
00102   if (p == NULL)
00103     throw Exception("The file does not contain a Mixture with type GD",
00104       __FILE__, __LINE__);
00105   return *p;
00106 }
00107 //-------------------------------------------------------------------------
00108 const MixtureGF& R::readMixtureGF()
00109 {
00110   const MixtureGF* p = dynamic_cast<const MixtureGF*>(&readMixture());
00111   if (p == NULL)
00112     throw Exception("The file does not contain a mixture with type GF",
00113       __FILE__, __LINE__);
00114   return *p;
00115 }
00116 //-------------------------------------------------------------------------
00117 void R::eventOpeningElement(const String& path)
00118 {
00119   if (false) {}
00120   else if (path.endsWith("<mean><i>"))
00121   {
00122     if (_meanIndexFound)
00123       eventError("More than one tag " + path + " !");
00124     _meanIndexFound = true;
00125   }
00126   else if (path.endsWith("<mean>"))
00127   {
00128     _meanIndexFound = false;
00129   }
00130   else if (path.endsWith("<covInv><i>"))
00131   {
00132     if (_covInvIndexFound)
00133       eventError("More than one tag " + path + " !");
00134     _covInvIndexFound = true;
00135   }
00136   else if (path.endsWith("<covInv><j>"))
00137   {
00138     _covInvIndexJFound = true;
00139   }
00140   else if (path.endsWith("<covInv>"))
00141   {
00142     _covInvIndexFound = false;
00143   }
00144   else if (path.endsWith("<cov><i>"))
00145   {
00146     if (_covIndexFound)
00147       eventError("More than one tag " + path + " !");
00148   }
00149   else if (path.endsWith("<cov><j>"))
00150   {
00151     if (_covIndexFound)
00152       eventError("More than one tag " + path + " !");
00153   }
00154   else if (path.endsWith("<cov>"))
00155   {
00156     _covIndexFound = false;
00157   }
00158   else if (path.endsWith("<DistribGD><i>") || path.endsWith("<DistribGF><i>"))
00159   {
00160     if (_distribIndexFound)
00161       eventError("More than one tag " + path + " !");
00162     _distribIndexFound = true;
00163   }
00164   else if (path.endsWith("<weight>"))
00165   {
00166     if (_weightFound)
00167       eventError("More than one tag " + path + " !");
00168     _weightFound = true;
00169   }
00170   else if (path.endsWith("<cst>")) {}
00171   else if (path.endsWith("<det>")) {}
00172   else if (path.endsWith("<DistribGD>") || path.endsWith("<DistribGF>"))
00173   {
00174     if (!_distribCountFound)
00175       eventError("Dont't know distribCount to create the mixture");
00176     if (!_vectSizeFound)
00177       eventError("Dont't know vectSize to create the mixture");
00178     _distribIndexFound = false;
00179     _weightFound = false;
00180   }
00181   else if (path.endsWith("<distribCount>"))
00182   {
00183     if (_distribCountFound)
00184       eventError("More than one tag " + path + " !");
00185     _distribCountFound = true;
00186   }
00187   else if (path.endsWith("<vectSize>"))
00188   {
00189     if (_vectSizeFound)
00190       eventError("More than one tag " + path + " !");
00191     _vectSizeFound = true;
00192   }
00193   else if (path.endsWith("<MixtureGD><id>") || path.endsWith("<MixtureGF><id>"))
00194   {
00195     if (_idFound)
00196       eventError("More than one tag " + path + " !");
00197     _idFound = true;
00198   }
00199   else if (path.endsWith("<version>")) {}
00200   else if (path.endsWith("<MixtureGD>"))
00201   {
00202     if (_pMixture != NULL)
00203       eventError("More than one tag " + path + " !");
00204     _type = DistribType_GD;
00205     _typeFound = true;
00206   }
00207   else if (path.endsWith("<MixtureGF>"))
00208   {
00209     if (_pMixture != NULL)
00210       eventError("More than one tag " + path + " !");
00211     _type = DistribType_GF;
00212     _typeFound = true;
00213   }
00214   else
00215     eventError("Unknown tag in the path " + path);
00216 }
00217 //-------------------------------------------------------------------------
00218 void R::eventClosingElement(const String& path, const String& value)
00219 {
00220   if (false)
00221   {
00222   }
00223   else if (path.endsWith("<mean><i>"))
00224   {
00225     _meanIndex = value.toULong();
00226   }
00227   else if (path.endsWith("<mean>"))
00228   {
00229     if (_meanIndexFound == false)
00230       eventError("Index missing for mean");
00231     switch (type())
00232     {
00233     case DistribType_GD:
00234       distribGD().setMean(value.toDouble(), _meanIndex);
00235       break;
00236     case DistribType_GF:
00237       distribGF().setMean(value.toDouble(), _meanIndex);
00238     }
00239   }
00240   else if (path.endsWith("<covInv><i>"))
00241   {
00242     _covInvIndex = value.toULong();
00243   }
00244   else if (path.endsWith("<covInv><j>"))
00245   {
00246     _covInvIndexJ = value.toULong();
00247   }
00248   else if (path.endsWith("<covInv>"))
00249   {
00250     if (_covInvIndexFound == false)
00251       eventError("Index missing for covInv");
00252     switch (type())
00253     {
00254     case DistribType_GD:
00255       distribGD().setCovInv(K::k, value.toDouble(),_covInvIndex);
00256       break;
00257     case DistribType_GF:
00258       distribGF().setCovInv(K::k, value.toDouble(), _covInvIndex,
00259                                                        _covInvIndexJ);
00260     }
00261   }
00262   else if (path.endsWith("<cov><i>"))
00263   {
00264     _covIndex = value.toULong();
00265     _covIndexFound = true;
00266   }
00267   else if (path.endsWith("<cov>"))
00268   {
00269     if (!_covIndexFound)
00270       eventError("Index missing for cov");
00271     if (type() == DistribType_GD)
00272       distribGD().setCov(value.toDouble(), _covIndex);
00273     else
00274       ; // no cov matrix for GF
00275   }
00276   else if (path.endsWith("<DistribGD><i>") || path.endsWith("<DistribGF><i>"))
00277   {
00278     _distribIndex = value.toULong();
00279     _distribIndexFound = true;
00280   }
00281   else if (path.endsWith("<weight>"))
00282   {
00283     if (!_distribIndexFound)
00284       eventError("Don't know distrib index");
00285     mixture().weight(_distribIndex) = value.toDouble();
00286   }
00287   else if (path.endsWith("<cst>"))
00288   {
00289     if (!_distribIndexFound)
00290       eventError("Don't know distrib index");
00291     switch (type())
00292     {
00293     case DistribType_GD:
00294       mixtureGD().getDistrib(_distribIndex).
00295                      setCst(K::k, value.toDouble());
00296       break;
00297     case DistribType_GF:
00298       mixtureGF().getDistrib(_distribIndex).
00299                      setCst(K::k, value.toDouble());
00300     }
00301   }
00302   else if (path.endsWith("<det>"))
00303   {
00304     if (!_distribIndexFound)
00305       eventError("Don't know distrib index");
00306     switch (type())
00307     {
00308     case DistribType_GD:
00309       mixtureGD().getDistrib(_distribIndex).
00310                      setDet(K::k, value.toDouble());
00311       break;
00312     case DistribType_GF:
00313       mixtureGF().getDistrib(_distribIndex).
00314                      setDet(K::k, value.toDouble());
00315     }
00316   }
00317   else if (path.endsWith("<DistribGD>"))
00318   {
00319     if (!_weightFound)
00320       eventError("Unknow weight");
00321   }
00322   else if (path.endsWith("<distribCount>"))
00323   {
00324     _distribCount = value.toULong();
00325   }
00326   else if (path.endsWith("<vectSize>"))
00327   {
00328     _vectSize = value.toULong();
00329   }
00330   else if (path.endsWith("<MixtureGD><id>"))
00331   {
00332     _id = value;
00333   }
00334   else if (path.endsWith("<MixtureGD><version>"))
00335   {
00336     if (value != "1")
00337       eventError("invalid version");
00338   }
00339   else if (path.endsWith("<MixtureGD>"))
00340   {
00341     if (_idFound)
00342       mixture().setId(K::k, _id);
00343   }
00344 }
00345 //-------------------------------------------------------------------------
00346 void R::eventError(const String& msg)
00347 {
00348   assert(_pReader != NULL);
00349   _pReader->close();
00350   throw InvalidDataException("Error line " + String::valueOf(_line)
00351     + " : " + msg, __FILE__, __LINE__, _pReader->getFullFileName());
00352 }
00353 //-------------------------------------------------------------------------
00354 const String& R::readOneChar()
00355 {
00356   assert(_pReader != NULL);
00357   const String& s = _pReader->readString(1);
00358   if (s == "\n")
00359     _line++;
00360   return s;
00361 }
00362 //-------------------------------------------------------------------------
00363 Mixture& R::mixture() // private
00364 {
00365   if (_pMixture == NULL)
00366   {
00367     if (!_vectSizeFound)
00368       eventError("Don't know the vectSize to create the mixture");
00369     if (!_distribCountFound)
00370       eventError("Don't know the distrib count to create the mixture");
00371     switch (type())
00372     {
00373     case DistribType_GD:
00374       _pMixture = &MixtureGD::create(K::k, "", _vectSize, _distribCount);
00375       break;
00376     case DistribType_GF:
00377       _pMixture = &MixtureGF::create(K::k, "", _vectSize, _distribCount);
00378     }
00379   }
00380   return *_pMixture;
00381 }
00382 //-------------------------------------------------------------------------
00383 MixtureGD& R::mixtureGD() // private
00384 { return *dynamic_cast<MixtureGD*>(&mixture()); }
00385 //-------------------------------------------------------------------------
00386 MixtureGF& R::mixtureGF() // private
00387 { return *dynamic_cast<MixtureGF*>(&mixture()); }
00388 //-------------------------------------------------------------------------
00389 DistribGD& R::distribGD() // private
00390 {
00391   if (!_distribIndexFound)
00392      eventError("Don't know distrib index");
00393   return *dynamic_cast<DistribGD*>(&mixture().getDistrib(_distribIndex));
00394 }
00395 //-------------------------------------------------------------------------
00396 DistribGF& R::distribGF() // private
00397 {
00398   if (!_distribIndexFound)
00399      eventError("Don't know distrib index");
00400   return *dynamic_cast<DistribGF*>(&mixture().getDistrib(_distribIndex));
00401 }
00402 //-------------------------------------------------------------------------
00403 const DistribType& R::type() // private
00404 {
00405   if (!_typeFound)
00406     eventError("Unknown mixture type (GD ?, GF ?)");
00407   return _type;
00408 }
00409 //-------------------------------------------------------------------------
00410 String R::getClassName() const { return "MixtureFileReaderXml"; }
00411 //-------------------------------------------------------------------------
00412 R::~MixtureFileReaderXml()
00413 {
00414   if (_pMixture != NULL)
00415     delete _pMixture;
00416 }
00417 //-------------------------------------------------------------------------
00418 
00419 #endif // !defined(ALIZE_MixtureFileReaderXml_cpp)
00420