Histo.cpp

Go to the documentation of this file.
00001 /*
00002         This file is part of ALIZE which is an open-source tool for 
00003         speaker recognition.
00004 
00005     ALIZE is free software: you can redistribute it and/or modify
00006     it under the terms of the GNU Lesser General Public License as 
00007     published by the Free Software Foundation, either version 3 of 
00008     the License, or any later version.
00009 
00010     ALIZE is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU Lesser General Public License for more details.
00014 
00015     You should have received a copy of the GNU Lesser General Public 
00016     License along with ALIZE.
00017     If not, see <http://www.gnu.org/licenses/>.
00018         
00019         ALIZE is a development project initiated by the ELISA consortium
00020         [alize.univ-avignon.fr/] and funded by the French Research 
00021         Ministry in the framework of the TECHNOLANGUE program 
00022         [www.technolangue.net]
00023 
00024         The ALIZE project team wants to highlight the limits of voice
00025         authentication in a forensic context.
00026         The "Person  Authentification by Voice: A Need of Caution" paper 
00027         proposes a good overview of this point (cf. "Person  
00028         Authentification by Voice: A Need of Caution", Bonastre J.F., 
00029         Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00030         chagnolleau I., Eurospeech 2003, Genova].
00031         The conclusion of the paper of the paper is proposed bellow:
00032         [Currently, it is not possible to completely determine whether the 
00033         similarity between two recordings is due to the speaker or to other 
00034         factors, especially when: (a) the speaker does not cooperate, (b) there 
00035         is no control over recording equipment, (c) recording conditions are not 
00036         known, (d) one does not know whether the voice was disguised and, to a 
00037         lesser extent, (e) the linguistic content of the message is not 
00038         controlled. Caution and judgment must be exercised when applying speaker 
00039         recognition techniques, whether human or automatic, to account for these 
00040         uncontrolled factors. Under more constrained or calibrated situations, 
00041         or as an aid for investigative purposes, judicious application of these 
00042         techniques may be suitable, provided they are not considered as infallible.
00043         At the present time, there is no scientific process that enables one to 
00044         uniquely characterize a person=92s voice or to identify with absolute 
00045         certainty an individual from his or her voice.]
00046         Contact Jean-Francois Bonastre for more information about the licence or
00047         the use of ALIZE
00048 
00049         Copyright (C) 2003-2010
00050         Laboratoire d'informatique d'Avignon [lia.univ-avignon.fr]
00051         ALIZE admin [alize@univ-avignon.fr]
00052         Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_Histo_cpp)
00056 #define ALIZE_Histo_cpp
00057 
00058 #if defined(_WIN32)
00059 #define _CRT_SECURE_NO_WARNINGS
00060 #endif
00061 
00062 #include <fstream>
00063 #include <cstdio>
00064 #include "Object.h"
00065 #include "Histo.h"
00066 #include "alizeString.h"
00067 #include "RealVector.h"
00068 #include "Exception.h"
00069 
00070 using namespace std;
00071 using namespace alize;
00072 
00073 const Histo::TData Histo::EPS = 0.0000000000000000001;
00074 
00075 //-------------------------------------------------------------------------
00076 Histo::Histo(unsigned long nbBin)
00077 :Object(), _nbBin(nbBin), _bound(nbBin+1, nbBin+1), _count(nbBin, nbBin) {} 
00078 //-------------------------------------------------------------------------
00079 Histo::Histo(const Histo& h)
00080 :Object(), _nbBin(h._nbBin), _bound(h._bound), _count(h._count),
00081  _data(h._data) {}
00082 //-------------------------------------------------------------------------
00083 const Histo& Histo::operator=(const Histo& h)
00084 {
00085   if (this != &h)
00086   {
00087     _nbBin = h._nbBin;
00088     _bound = h._bound;
00089     _count = h._count;
00090     _data  = h._data;
00091   }
00092   return *this;   
00093 }
00094 //-------------------------------------------------------------------------
00095 
00096 
00097 // ******************************************************
00098 // Class Histo distribution estimation, tools and writing
00099 // ******************************************************
00100 
00101 //-------------------------------------------------------------------------
00102 Histo::TData Histo::lowerBound(unsigned long i) const
00103 {
00104   if ((_nbBin == 0) || (i>=_nbBin))
00105     throw Exception("Index out of bounds", __FILE__, __LINE__);
00106   return _bound[i];
00107 }
00108 //-------------------------------------------------------------------------
00109 Histo::TData Histo::higherBound(unsigned long i) const
00110 {
00111   if ((_nbBin == 0) || (i>=_nbBin))
00112     throw Exception("Index out of bounds", __FILE__, __LINE__);
00113   return _bound[i+1];
00114 }
00115 //-------------------------------------------------------------------------
00116 Histo::TData Histo::count(unsigned long i) const
00117 {
00118   if ((_nbBin == 0) || (i>=_nbBin))
00119     throw Exception("Index out of bounds", __FILE__, __LINE__);
00120   return _count[i];
00121 }
00122 //-------------------------------------------------------------------------
00123 Histo::TData Histo::operator()(TData score, int type) const
00124 {
00125   if (type==0)
00126   {
00127     if (_nbBin == 0 || score < _bound[0] || score > _bound[_nbBin])
00128       return 0;
00129     unsigned long i;
00130     for (i=0; score>_bound[i+1]; i++)
00131       ;
00132     return _count[i];
00133   }
00134   if (type == 1)
00135   {
00136     if (_nbBin == 0 || score <= _bound[0])
00137       return 0;
00138     if (score >= _bound[_nbBin])
00139       return _nbBin-1;
00140     unsigned long i;
00141     for (i=0; score>_bound[i+1]; i++)
00142       ;
00143     return i;
00144   }
00145   throw Exception("Invalid type", __FILE__, __LINE__);
00146   return 0; // never called
00147 }
00148 //-------------------------------------------------------------------------
00149 void Histo::accumulateValue(TData value) { _data.addValue(value); }
00150 //-------------------------------------------------------------------------
00151 void Histo::computeHisto(int opt)
00152 {
00153   if (_data.size() < 2)
00154     throw Exception("Too few data to build the histo", __FILE__, __LINE__);
00155   if (_nbBin != 0)
00156   {
00157     unsigned long b = 0;// bin index
00158     const unsigned long nbData = _data.size();
00159   
00160     _data.ascendingSort(); // sort
00161     // build the histo
00162     unsigned long i=0; // Data index
00163     TData bound = (TData)nbData / (TData)_nbBin;
00164     _bound[0] = _data[0];
00165     TData nbValBin = 1; // Nb data for the current bin
00166 
00167     while (i<nbData)
00168     {   
00169       if (i < bound-1) // All data without the last
00170       {
00171         i++;
00172         nbValBin++;
00173       }
00174       else
00175       {   // We are on the last VALUE of the bin 
00176         i++;
00177         while ((i<nbData) && (_data[i]==_data[i-1]))
00178         { // In case of equal values
00179           nbValBin++;
00180           i++;
00181         }
00182         if (i == nbData)
00183           break;
00184         // Set the end of the bin and the begin of the next one...
00185         _bound[b+1] = _data[i];
00186       
00187         // we deal with the last data/bin after the loop
00188         if ((_bound[b+1]-_bound[b])>EPS) 
00189           _count[b] = nbValBin / (nbData*(_bound[b+1]-_bound[b]));
00190         else 
00191           _count[b] = EPS;
00192         
00193         // Initialize next bin
00194         b++;
00195         bound = i-1 + (TData)(nbData-i+1) / (_nbBin-b);
00196         nbValBin=1;
00197       }
00198     }
00199 
00200     // end of the current Bin
00201     _bound[b+1] = _data[nbData-1];
00202   
00203     if (_bound[b+1]-_bound[b] > EPS)
00204       _count[b] = nbValBin / (nbData*(_bound[b+1]-_bound[b]));
00205     else
00206       _count[b] = EPS;
00207 
00208     // Not used bins at the end
00209     for (b++; b<_nbBin; b++)
00210     {
00211       _bound[b+1] = _bound[b];
00212       _count[b] = 0;
00213     }
00214   }
00215  
00216   if (opt == 0)
00217   {
00218     _data.clear();
00219   }
00220   else
00221     throw Exception("Invalid option", __FILE__, __LINE__);
00222 
00223 
00224 /*
00225   if (_nbBin != 0)
00226   {
00227     _data.ascendingSort(); // ascending sort
00228     unsigned long nbData = _data.size();
00229     unsigned long b = 0, i=0;
00230 
00231     TData width = _data[nbData-1] - _data[0];
00232     _bound[_nbBin] = _data[nbData-1];
00233     _bound[0] = _data[0];
00234     TData bound = _data[0] + width/_nbBin;
00235     _count[0] = 0.0;
00236 
00237     while (true)
00238     {
00239       if (_data[i] < bound || _data[i] == _bound[_nbBin])
00240       {
00241         _count[b]++;
00242         i++;
00243         if (i == nbData) // if no data 
00244           break; // leave loop
00245       }
00246       else
00247       {
00248         // end of the current bin
00249         _bound[b+1] = _data[i];
00250         if (_bound[b+1]-_bound[b] > EPS)
00251           _count[b] /= nbData*(_bound[b+1]-_bound[b]);
00252         else
00253           _count[b] = EPS;
00254         // go to next bin
00255         b++;
00256         bound = _data[0] + (width*(b+1))/_nbBin;
00257         _count[b] = 0.0;
00258       }
00259     }
00260     // end of the current bin
00261     if (_bound[b+1]-_bound[b] > EPS)
00262       _count[b] /= nbData*(_bound[b+1]-_bound[b]);
00263     else
00264       _count[b] = EPS;
00265 
00266     // Not used bins at the end
00267     for (b++; b<_nbBin; b++)
00268     {
00269       _bound[b] = _bound[b-1];
00270       _count[b] = 0.0;
00271     }
00272   }*/
00273  
00274   if (opt == 0)
00275   {
00276     _data.clear();
00277   }
00278   else
00279     throw Exception("Invalid option", __FILE__, __LINE__);
00280 
00281 }
00282 
00283 // Other, general IO, Public Functions for Histo 
00284 //-------------------------------------------------------------------------
00285 void Histo::load(const FileName& f)
00286 {
00287   const char *fileName = f.c_str();
00288   char S[200];
00289   int nbBin;
00290   long i;
00291   float tmp1, tmp2;
00292   
00293   ifstream txtFile(fileName, ios::in);
00294   if (!txtFile)
00295     throw IOException("Cannot open file", __FILE__, __LINE__, f);
00296   txtFile >> nbBin;    
00297   Histo hTmp(nbBin);
00298   i = 0;
00299   txtFile.getline(S,200);
00300   while ((i<nbBin) && txtFile)
00301   {
00302     if ((S[0]!='#') && (S[0]!=0) && (S[0]!=13)&& (S[0]!=10))
00303     {
00304       sscanf(S, "%f  %f", &tmp1, &tmp2);
00305       hTmp._bound[i] = tmp1;
00306       hTmp._count[i] = tmp2;
00307       i++;   
00308     }
00309     txtFile.getline(S, 200);
00310   }
00311   if (i == nbBin)
00312   {
00313     sscanf(S, "%f", &tmp1);
00314     hTmp._bound[nbBin] = tmp1;
00315   }
00316   else
00317     throw IOException("Not enough data in file", __FILE__, __LINE__, f);
00318      
00319    (*this) = hTmp;   
00320 }
00321 //-------------------------------------------------------------------------
00322 void Histo::save(const FileName& f) const
00323 {
00324   ofstream txtFile(f.c_str(), ios::out);
00325   if (!txtFile)
00326     throw IOException("Cannot open file", __FILE__, __LINE__, f);
00327   txtFile << _nbBin << endl;
00328   for (unsigned long i=0; i<_nbBin; i++)
00329     txtFile << _bound[i] << "   " << _count[i] << endl;
00330   txtFile << _bound[_nbBin] << endl;
00331 }   
00332 //-------------------------------------------------------------------------
00333 void Histo::saveGnuplot(const FileName& f) const
00334 {
00335   ofstream txtFile(f.c_str(), ios::out);
00336   if (!txtFile)
00337     throw IOException("Cannot open file", __FILE__, __LINE__, f);
00338   txtFile << _bound[0] << " " << 0<< endl; 
00339 
00340   for (unsigned long i=0; i<_nbBin; i++)
00341   {
00342     txtFile << _bound[i] << " " << _count[i]<< endl; 
00343     txtFile << _bound[i+1] << " " << _count[i]<< endl; 
00344   }
00345   txtFile << _bound[_nbBin] << " " << 0<< endl; 
00346 }    
00347 //-------------------------------------------------------------------------
00348 // Divise each bin by factor
00349 //-------------------------------------------------------------------------
00350 void Histo::div(real_t factor)
00351 {
00352   if (factor == 0.0) 
00353     throw Exception("factor cannot be 0.0", __FILE__, __LINE__);
00354   for (unsigned long i=0; i<_nbBin ;i++)
00355   { _count[i] /= factor; }
00356 }
00357 //-------------------------------------------------------------------------
00358 unsigned long Histo::size() const { return _nbBin; }
00359 //-------------------------------------------------------------------------
00360 String Histo::getClassName() const { return "Histo"; }
00361 //-------------------------------------------------------------------------
00362 String Histo::toString() const
00363 {
00364   unsigned long i;
00365   real_t sum = 0.0;
00366   String s = Object::toString() + " Nb bin = " + String::valueOf(_nbBin);
00367   for (i=0; i<_nbBin; i++)
00368   {
00369     s += "\n  " + String::valueOf(_bound[i]) + " : "
00370     + String::valueOf(_count[i]);
00371     sum += (_bound[i+1] - _bound[i]) * _count[i];
00372   }
00373   if (_nbBin != 0)
00374     s += "\n  " + String::valueOf(_bound[i]);
00375   return s;
00376 }
00377 //-------------------------------------------------------------------------
00378 Histo::~Histo() {}
00379 //-------------------------------------------------------------------------
00380 
00381 #endif // !defined(ALIZE_Histo_cpp)