XmlParser.cpp

Go to the documentation of this file.
00001 /*
00002         This file is part of ALIZE which is an open-source tool for 
00003         speaker recognition.
00004 
00005     ALIZE is free software: you can redistribute it and/or modify
00006     it under the terms of the GNU Lesser General Public License as 
00007     published by the Free Software Foundation, either version 3 of 
00008     the License, or any later version.
00009 
00010     ALIZE is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU Lesser General Public License for more details.
00014 
00015     You should have received a copy of the GNU Lesser General Public 
00016     License along with ALIZE.
00017     If not, see <http://www.gnu.org/licenses/>.
00018         
00019         ALIZE is a development project initiated by the ELISA consortium
00020         [alize.univ-avignon.fr/] and funded by the French Research 
00021         Ministry in the framework of the TECHNOLANGUE program 
00022         [www.technolangue.net]
00023 
00024         The ALIZE project team wants to highlight the limits of voice
00025         authentication in a forensic context.
00026         The "Person  Authentification by Voice: A Need of Caution" paper 
00027         proposes a good overview of this point (cf. "Person  
00028         Authentification by Voice: A Need of Caution", Bonastre J.F., 
00029         Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00030         chagnolleau I., Eurospeech 2003, Genova].
00031         The conclusion of the paper of the paper is proposed bellow:
00032         [Currently, it is not possible to completely determine whether the 
00033         similarity between two recordings is due to the speaker or to other 
00034         factors, especially when: (a) the speaker does not cooperate, (b) there 
00035         is no control over recording equipment, (c) recording conditions are not 
00036         known, (d) one does not know whether the voice was disguised and, to a 
00037         lesser extent, (e) the linguistic content of the message is not 
00038         controlled. Caution and judgment must be exercised when applying speaker 
00039         recognition techniques, whether human or automatic, to account for these 
00040         uncontrolled factors. Under more constrained or calibrated situations, 
00041         or as an aid for investigative purposes, judicious application of these 
00042         techniques may be suitable, provided they are not considered as infallible.
00043         At the present time, there is no scientific process that enables one to 
00044         uniquely characterize a person=92s voice or to identify with absolute 
00045         certainty an individual from his or her voice.]
00046         Contact Jean-Francois Bonastre for more information about the licence or
00047         the use of ALIZE
00048 
00049         Copyright (C) 2003-2010
00050         Laboratoire d'informatique d'Avignon [lia.univ-avignon.fr]
00051         ALIZE admin [alize@univ-avignon.fr]
00052         Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_XmlParser_cpp)
00056 #define ALIZE_XmlParser_cpp
00057 
00058 #include "XmlParser.h"
00059 #include "MixtureGD.h"
00060 #include "MixtureGF.h"
00061 
00062 // see http://babel.alis.com/web_ml/xml/REC-xml.fr.html#NT-XMLDecl
00063 
00064 using namespace alize;
00065 
00066 //-------------------------------------------------------------------------
00067 XmlParser::XmlParser()
00068 :Object() {}
00069 //-------------------------------------------------------------------------
00070 void XmlParser::parse()
00071 {
00072   // lecture 1er et seul element
00073   test(readNextChar() == "<", ": first character must be '<'");
00074   parseElement("", readOneChar());
00075 }
00076 //-------------------------------------------------------------------------
00077 void XmlParser::parseElement(String path, String s)
00078 {
00079   String tag, value;
00080 
00081   // read the opening tag
00082   test(s != ">" && s != "<" && s != "\"" && !isASeparator(s), "");
00083   while (s != "/" && s != ">" && !isASeparator(s))
00084   {
00085     tag += s;
00086     s = readOneChar();
00087   }
00088   eventOpeningElement(path += "<" + tag + ">");
00089 
00090   if (isASeparator(s))
00091     s = readNextChar();
00092 
00093   // read attributes
00094 
00095   while ( s != "/" && s != ">")
00096   {
00097     parseAttribute(path, s);
00098     s = readNextChar();
00099   }
00100 
00101   // fin element simple
00102 
00103   if (s == "/")
00104   {
00105     test(readOneChar() == ">", ": character '>' expected after '/'");
00106     eventClosingElement(path, value);
00107     return; // fin element simple
00108   }
00109 
00110   // element compose
00111 
00112   while (true)
00113   {
00114     while ( (s = readOneChar()) != "<")
00115     {
00116       if (s != "\r" && s != "\t" && s != "\n")
00117         value += s;
00118     }
00119     s = readOneChar();
00120 
00121     // closing tag
00122 
00123     if (s == "/")
00124     {
00125       s  = readOneChar();
00126       test(s != ">", ": a tag cannot be empty");
00127       String closingElement; // lecture balise de fermeture
00128       while (s != ">")
00129       {
00130         test(s != "/" && s != "\"" && s != "<" && !isASeparator(s),
00131           ": the tag contains an invalid character");
00132         closingElement += s;
00133         s  = readOneChar();
00134       }
00135       test(tag == closingElement, " : End tag <" + closingElement +
00136          "> does not match the start tag <" + tag  + ">");
00137 
00138       eventClosingElement(path, value);
00139       return; // fin element compose
00140     }
00141     parseElement(path, s);
00142   }
00143 }
00144 //-------------------------------------------------------------------------
00145 void XmlParser::parseAttribute(String path, String s)
00146 {
00147   String attribute, value;
00148   test(s != "\"" && s != "<" && s != "=", "");
00149   while (s != "=" && !isASeparator(s))
00150   {
00151     attribute += s;
00152     s = readOneChar();
00153     test(s != "/" && s != ">" && s != "<" && s != "\"" && s != "'",
00154               ": an attribute contain an invalid character");
00155   }
00156   eventOpeningElement(path += "<" + attribute + ">");
00157   if (isASeparator(s))
00158     test(readNextChar() == "=",
00159        ": Missing equals sign between attribute and attribute value");
00160   String quote = readNextChar();
00161   test(quote == "\"" || quote == "'", String(": a string literal was")
00162           + "expected, but no opening quote character was found");
00163   while ( (s = readOneChar()) != quote)
00164     value += s;
00165   eventClosingElement(path, value);
00166 }
00167 //-------------------------------------------------------------------------
00168 // Return the next character of the file that is not a separator character
00169 //-------------------------------------------------------------------------
00170 const String& XmlParser::readNextChar()
00171 {
00172   while(true) 
00173   {
00174     const String& s = readOneChar();
00175     if (!isASeparator(s))
00176       return s;
00177   }
00178   return readNextChar(); // never used
00179 }
00180 //-------------------------------------------------------------------------
00181 bool XmlParser::isASeparator(String s) const
00182 { return s == " " || s == "\n" || s == "\t" ||s == "\r"; }
00183 //-------------------------------------------------------------------------
00184 void XmlParser::test(bool v, const String& msg) { if (!v) eventError(msg); }
00185 //-------------------------------------------------------------------------
00186 XmlParser::~XmlParser() {}
00187 //-------------------------------------------------------------------------
00188 
00189 #endif // !defined(ALIZE_XmlParser_cpp)
00190