FeatureFileReaderSingle.cpp

Go to the documentation of this file.
00001 /*
00002         This file is part of ALIZE which is an open-source tool for 
00003         speaker recognition.
00004 
00005     ALIZE is free software: you can redistribute it and/or modify
00006     it under the terms of the GNU Lesser General Public License as 
00007     published by the Free Software Foundation, either version 3 of 
00008     the License, or any later version.
00009 
00010     ALIZE is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU Lesser General Public License for more details.
00014 
00015     You should have received a copy of the GNU Lesser General Public 
00016     License along with ALIZE.
00017     If not, see <http://www.gnu.org/licenses/>.
00018         
00019         ALIZE is a development project initiated by the ELISA consortium
00020         [alize.univ-avignon.fr/] and funded by the French Research 
00021         Ministry in the framework of the TECHNOLANGUE program 
00022         [www.technolangue.net]
00023 
00024         The ALIZE project team wants to highlight the limits of voice
00025         authentication in a forensic context.
00026         The "Person  Authentification by Voice: A Need of Caution" paper 
00027         proposes a good overview of this point (cf. "Person  
00028         Authentification by Voice: A Need of Caution", Bonastre J.F., 
00029         Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00030         chagnolleau I., Eurospeech 2003, Genova].
00031         The conclusion of the paper of the paper is proposed bellow:
00032         [Currently, it is not possible to completely determine whether the 
00033         similarity between two recordings is due to the speaker or to other 
00034         factors, especially when: (a) the speaker does not cooperate, (b) there 
00035         is no control over recording equipment, (c) recording conditions are not 
00036         known, (d) one does not know whether the voice was disguised and, to a 
00037         lesser extent, (e) the linguistic content of the message is not 
00038         controlled. Caution and judgment must be exercised when applying speaker 
00039         recognition techniques, whether human or automatic, to account for these 
00040         uncontrolled factors. Under more constrained or calibrated situations, 
00041         or as an aid for investigative purposes, judicious application of these 
00042         techniques may be suitable, provided they are not considered as infallible.
00043         At the present time, there is no scientific process that enables one to 
00044         uniquely characterize a person=92s voice or to identify with absolute 
00045         certainty an individual from his or her voice.]
00046         Contact Jean-Francois Bonastre for more information about the licence or
00047         the use of ALIZE
00048 
00049         Copyright (C) 2003-2010
00050         Laboratoire d'informatique d'Avignon [lia.univ-avignon.fr]
00051         ALIZE admin [alize@univ-avignon.fr]
00052         Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_FeatureFileReaderSingle_cpp)
00056 #define ALIZE_FeatureFileReaderSingle_cpp
00057 
00058 #include <new>
00059 #include "FeatureFileReaderSingle.h"
00060 #include "FileReader.h"
00061 #include "Exception.h"
00062 #include "LabelServer.h"
00063 #include "Label.h"
00064 #include "FeatureFlags.h"
00065 #include "Config.h"
00066 #include "RealVector.h"
00067 #include "FileReader.h"
00068 
00069 using namespace alize;
00070 typedef FeatureFileReaderSingle R;
00071 
00072 //-------------------------------------------------------------------------
00073 R::FeatureFileReaderSingle(FileReader* r, FeatureInputStream* st, 
00074                            const Config& c, LabelServer* p,
00075                            BufferUsage b, unsigned long bufferSize,
00076                            HistoricUsage h, unsigned long historicSize)
00077 :FeatureFileReaderAbstract(NULL, c, p, b, bufferSize, h, historicSize),
00078  _pReader(r), _pFeatureInputStream(st), _pFeature(NULL), _featureIndex(0),
00079  _lastFeatureIndex(0),
00080  _featureIndexOfBuffer(0), _nbStored(0), _pBuffer(&FloatVector::create())
00081 {}
00082 //-------------------------------------------------------------------------
00083 String R::getPath(const FileName& f, const Config& c) const
00084 {  // protected method
00085    if (f.beginsWith("/") || f.beginsWith("./"))
00086     return "";
00087   return c.getParam_featureFilesPath();
00088 }
00089 //-------------------------------------------------------------------------
00090 String R::getExt(const FileName& f, const Config& c) const
00091 {  // protected method
00092    if (f.beginsWith("/") || f.beginsWith("./"))
00093     return "";
00094   return c.getParam_loadFeatureFileExtension();
00095 }
00096 //-------------------------------------------------------------------------
00097 bool R::getBigEndian(const Config& c, BigEndian b) const // protected
00098 {
00099   if (b == BIGENDIAN_TRUE)
00100     return true;
00101   if (b == BIGENDIAN_FALSE)
00102     return false;
00103   // BIGENDIAN_AUTO
00104   if (c.existsParam_loadFeatureFileBigEndian)
00105     return c.getParam_loadFeatureFileBigEndian();
00106   if (c.existsParam_bigEndian)
00107     return c.getParam_bigEndian();
00108   return false;
00109 }
00110 //-------------------------------------------------------------------------
00111 void R::close()
00112 {
00113   if (_pReader != NULL)
00114     _pReader->close();
00115   if (_pFeatureInputStream != NULL)
00116     _pFeatureInputStream->close();
00117 }
00118 //-------------------------------------------------------------------------
00119 bool R::readFeature(Feature& f, unsigned long step)
00120 {
00121   assert(_pReader != NULL || _pFeatureInputStream != NULL);
00122   if (_seekWanted)
00123   {
00124     _seekWanted = false;
00125     if (_historicUsage == LIMITED && !featureWantedIsInHistoric())
00126     {
00127       f.setVectSize(K::k, getVectSize());
00128       f.setValidity(false);
00129       _error = FEATURE_OUT_OF_HISTORY;
00130       return true;
00131     }
00132     _featureIndex = _seekWantedIdx;
00133   }
00134   // si on depasse la fin du fichier
00135   unsigned long featureCount = getFeatureCount();
00136   if (_featureIndex >= featureCount)
00137     return false;
00138   // si on demande une feature hors du buffer
00139   if (_featureIndex < _featureIndexOfBuffer ||
00140       _featureIndex >= _featureIndexOfBuffer + _nbStored)
00141   {
00142     if (!_bufferSizeDefined)
00143     {
00144       unsigned long m = _pBuffer->size();
00145       if (_bufferIsInternal)
00146       {
00147         if (_bufferUsage == BUFFER_USERDEFINE)
00148           m = _userDefineBufferSize/sizeof(float);
00149         else if (_bufferUsage == BUFFER_AUTO)
00150         {
00151           if (getConfig().existsParam_loadFeatureFileMemAlloc)
00152           {
00153             m = getConfig().getParam_loadFeatureFileMemAlloc()/sizeof(float);
00154             unsigned long n = featureCount*getVectSize();
00155             if (n < m)
00156               m = n;
00157           }
00158         }
00159       }
00160       if (m < getVectSize()) // minimum size
00161         m = getVectSize();
00162       _pBuffer->setSize(m);
00163       _bufferSizeDefined = true;
00164     }
00165     unsigned long start = _featureIndex;
00166     if (featureCount-_featureIndex < _pBuffer->size()/getVectSize())
00167     {
00168       unsigned long x = _pBuffer->size()/getVectSize() -
00169                         (featureCount-_featureIndex);
00170       if (x < _featureIndex)
00171         start -= x;
00172       else
00173         start = 0;
00174     }
00175     // si le bloc de donnees a charger ne suit pas le bloc deja en memoire
00176     // on se repositionne dans le fichier
00177     if (start != _featureIndexOfBuffer + _nbStored /*+ 1*/) {
00178       if (_pReader != NULL) {
00179         _pReader->seek(getHeaderLength() + start*getVectSize()*sizeof(float));
00180       }
00181       else {
00182         _pFeatureInputStream->seekFeature(start);
00183       }
00184     }
00185     // chargement des donnees dans le buffer
00186     if (_pReader != NULL)
00187       _nbStored = _pReader->readSomeFloats(*_pBuffer)/getVectSize();
00188     else
00189     {
00190       // Pas performant. A améliorer
00191       _nbStored = 0;
00192       unsigned long vectSize = _pFeatureInputStream->getVectSize();
00193       while ((_nbStored+1)*vectSize <= _pBuffer->size()
00194               && _pFeatureInputStream->readFeature(_f))
00195       {
00196         unsigned long ii = _nbStored*vectSize;
00197         for (unsigned long j=0; j<vectSize; j++)
00198           (*_pBuffer)[ii+j] = (float) _f[j];
00199         _nbStored++;
00200       }
00201     }
00202 
00203     _featureIndexOfBuffer = start;
00204     // if all the features are loaded in the buffer, we close the file
00205     if (_nbStored == featureCount)
00206       close();
00207     else
00208       // données pas toutes en mémoire -> interdit le writeFeature()
00209       _featuresAreWritable = false;
00210   }
00211   f.setVectSize(K::k, getVectSize());
00212   f.setData(*_pBuffer, (_featureIndex-_featureIndexOfBuffer)*getVectSize());
00213   f.setValidity(true);
00214 
00215   _featureIndex += step;
00216   if (_featureIndex > _lastFeatureIndex)
00217     _lastFeatureIndex = _featureIndex;
00218   if (_pLabelServer != NULL)
00219   {
00220     Label l;
00221     if (_pReader != NULL)
00222       l.setSourceName(_pReader->getFileName());
00223     else
00224       l.setSourceName(_pFeatureInputStream->getNameOfASource(0)); // TODO : not always 0 ?
00225     f.setLabelCode(_pLabelServer->addLabel(l));
00226   }
00227   _error = NO_ERROR;
00228   return true;
00229 }
00230 //-------------------------------------------------------------------------
00231 bool R::writeFeature(const Feature& f, unsigned long step)
00232 {
00233   if (!_featuresAreWritable)
00234     throw Exception("Feature writing forbidden", __FILE__, __LINE__);
00235   assert(_pReader != NULL || _pFeatureInputStream != NULL);
00236   if (_seekWanted)
00237   {
00238     _seekWanted = false;
00239     if (_historicUsage == LIMITED && !featureWantedIsInHistoric())
00240       throw Exception("Feature out of Historic", __FILE__, __LINE__);
00241     _featureIndex = _seekWantedIdx;
00242   }
00243   // si on depasse la fin du fichier
00244   unsigned long featureCount = getFeatureCount();
00245   if (_featureIndex >= featureCount)
00246     return false;
00247 
00248   // si on demande une feature hors du buffer
00249   if (_featureIndex < _featureIndexOfBuffer ||
00250       _featureIndex >= _featureIndexOfBuffer + _nbStored)
00251   {
00252     if (!_bufferSizeDefined)
00253     {
00254       unsigned long m = _pBuffer->size();
00255       if (_bufferIsInternal)
00256       {
00257         if (_bufferUsage == BUFFER_USERDEFINE)
00258           m = _userDefineBufferSize/sizeof(float);
00259         else if (_bufferUsage == BUFFER_AUTO)
00260         {
00261           if (getConfig().existsParam_loadFeatureFileMemAlloc)
00262           {
00263             m = getConfig().getParam_loadFeatureFileMemAlloc()/sizeof(float);
00264             unsigned long n = featureCount*getVectSize();
00265             if (n < m)
00266               m = n;
00267           }
00268         }
00269       }
00270       if (m < getVectSize()) // minimum size
00271         m = getVectSize();
00272       _pBuffer->setSize(m);
00273       _bufferSizeDefined = true;
00274     }
00275     unsigned long start = _featureIndex;
00276     if (featureCount-_featureIndex < _pBuffer->size()/getVectSize())
00277     {
00278       unsigned long x = _pBuffer->size()/getVectSize() -
00279                         (featureCount-_featureIndex);
00280       if (x < _featureIndex)
00281         start -= x;
00282       else
00283         start = 0;
00284     }
00285     // si le bloc de donnees a charger ne suit pas le bloc deja en memoire
00286     // on se repositionne dans le fichier
00287     if (start != _featureIndexOfBuffer + _nbStored + 1) {
00288       if (_pReader != NULL) {
00289         _pReader->seek(getHeaderLength() + start*getVectSize()*sizeof(float));
00290       }
00291       else {
00292         _pFeatureInputStream->seekFeature(start);
00293       }
00294     }
00295     // chargement des donnees dans le buffer
00296     if (_pReader != NULL)
00297       _nbStored = _pReader->readSomeFloats(*_pBuffer)/getVectSize();
00298     else
00299     {
00300       // Pas performant. A améliorer
00301       _nbStored = 0;
00302       unsigned long vectSize = _pFeatureInputStream->getVectSize();
00303       while ((_nbStored+1)*vectSize <= _pBuffer->size()
00304               && _pFeatureInputStream->readFeature(_f))
00305       {
00306         unsigned long ii = _nbStored*vectSize;
00307         for (unsigned long j=0; j<vectSize; j++)
00308           (*_pBuffer)[ii+j] = (float) _f[j];
00309         _nbStored++;
00310       }
00311     }
00312 
00313     _featureIndexOfBuffer = start;
00314     // if all the features are loaded in the buffer, we close the file
00315     if (_nbStored == featureCount)
00316       close();
00317     else
00318       // données pas toutes en mémoire -> interdit le writeFeature()
00319       throw Exception("Feature writing forbidden (data are not all in memory)"
00320                       , __FILE__, __LINE__);
00321   }
00322   unsigned long vectSize = getVectSize();
00323   if (vectSize != f.getVectSize())
00324     throw Exception("incompatibles vectSize (" + String::valueOf(vectSize)
00325         + "/" + String::valueOf(f.getVectSize()) + ")", __FILE__, __LINE__);
00326   unsigned long offset = (_featureIndex-_featureIndexOfBuffer)*vectSize;
00327   for (unsigned long i=0; i<vectSize; i++)
00328     (*_pBuffer)[i+offset] = (float)f[i]; // TODO : conversion a revoir ?
00329   _featureIndex += step;
00330   if (_featureIndex > _lastFeatureIndex)
00331     _lastFeatureIndex = _featureIndex;
00332   return true;
00333 }
00334 //-------------------------------------------------------------------------
00335 bool R::featureWantedIsInHistoric() const
00336 {
00337   if (_seekWantedIdx > _lastFeatureIndex)
00338     return false;
00339   if (_historicSize > _lastFeatureIndex)
00340     return _seekWantedIdx >= 0;
00341   return _seekWantedIdx >= _lastFeatureIndex-_historicSize;
00342 }
00343 //-------------------------------------------------------------------------
00344 void R::setExternalBufferToUse(FloatVector& v)
00345 {
00346   if (_bufferIsInternal && _pBuffer != NULL )
00347     delete _pBuffer;
00348   _pBuffer = &v;
00349   _bufferSizeDefined = false;
00350   _bufferIsInternal = false;
00351   _featureIndexOfBuffer = 0;
00352   _nbStored = 0;
00353 }
00354 //-------------------------------------------------------------------------
00355 // Comportement par defaut. Methode surchargee dans les sous-classes
00356 unsigned long R::getHeaderLength() { return 0; }
00357 //-------------------------------------------------------------------------
00358 unsigned long R::getSourceCount() {return 1;}
00359 //-------------------------------------------------------------------------
00360 unsigned long R::getFeatureCountOfASource(unsigned long srcIdx)
00361 {
00362   if (srcIdx != 0)
00363     throw Exception("Only 1 file available", __FILE__, __LINE__);
00364   return getFeatureCount();
00365 }
00366 //-------------------------------------------------------------------------
00367 unsigned long R::getFeatureCountOfASource(const FileName& f)
00368 {
00369   assert(_pReader != NULL || _pFeatureInputStream != NULL);
00370   if (_pReader != NULL)
00371   {
00372     if (f != _pReader->getFileName())
00373       throw Exception("Wrong source name : " + f, __FILE__, __LINE__);
00374   }
00375   else
00376     if (f != _pFeatureInputStream->getNameOfASource(0)) // TODO : always 0 ?
00377       throw Exception("Wrong source name : " + f, __FILE__, __LINE__);
00378   return getFeatureCount();
00379 }
00380 //-------------------------------------------------------------------------
00381 unsigned long R::getFirstFeatureIndexOfASource(unsigned long srcIdx)
00382 {
00383   if (srcIdx != 0)
00384     throw Exception("Only 1 file available", __FILE__, __LINE__);
00385   return 0;
00386 }
00387 //-------------------------------------------------------------------------
00388 unsigned long R::getFirstFeatureIndexOfASource(const FileName& f)
00389 {
00390   assert(_pReader != NULL || _pFeatureInputStream != NULL);
00391   if (_pReader != NULL)
00392   {
00393     if (f != _pReader->getFileName())
00394       throw Exception("Wrong source name : " + f, __FILE__, __LINE__);
00395   }
00396   else
00397     if (f != _pFeatureInputStream->getNameOfASource(0)) // TODO : always 0 ?
00398       throw Exception("Wrong source name : " + f, __FILE__, __LINE__);
00399   return 0;
00400 }
00401 //-------------------------------------------------------------------------
00402 const String& R::getNameOfASource(unsigned long srcIdx)
00403 {
00404   if (srcIdx != 0)
00405     throw Exception("Only 1 file available", __FILE__, __LINE__);
00406   assert(_pReader != NULL || _pFeatureInputStream != NULL);
00407   if (_pReader != NULL)
00408     return _pReader->getFileName();
00409   else
00410     return _pFeatureInputStream->getNameOfASource(0); // TODO : always 0 ?
00411 }
00412 //-------------------------------------------------------------------------
00413 String R::toString() const
00414 {
00415   assert(_pReader != NULL || _pFeatureInputStream != NULL);
00416   FeatureFileReaderSingle& r
00417                            = const_cast<FeatureFileReaderSingle&>(*this);
00418   const FeatureFlags flags(r.getFeatureFlags());
00419   String n;
00420   if (_pReader != NULL)
00421     n = _pReader->getFullFileName();
00422   else
00423     n = _pFeatureInputStream->getNameOfASource(0); // TODO : always 0 ?
00424   return Object::toString()
00425     + "\n  file name   = '" + n + "'"
00426     + "\n  vectSize    = " + String::valueOf(r.getVectSize())
00427     + "\n  feature count = " + String::valueOf(r.getFeatureCount())
00428     + "\n  sample rate   = " + String::valueOf(r.getSampleRate())
00429     + "\n  flag S    = " + String::valueOf(flags.useS)
00430     + "\n  flag E    = " + String::valueOf(flags.useE)
00431     + "\n  flag D    = " + String::valueOf(flags.useD)
00432     + "\n  flag DE     = " + String::valueOf(flags.useDE)
00433     + "\n  flag DD     = " + String::valueOf(flags.useDD)
00434     + "\n  flag DDE    = " + String::valueOf(flags.useDDE);
00435 }
00436 //-------------------------------------------------------------------------
00437 R::~FeatureFileReaderSingle()
00438 {
00439   if (_pReader != NULL)
00440     delete _pReader;
00441   // do not delete _pFeatureInputStream
00442   if (_pFeature != NULL)
00443     delete _pFeature;
00444   if (_bufferIsInternal && _pBuffer != NULL )
00445     delete _pBuffer;
00446 }
00447 //-------------------------------------------------------------------------
00448 
00449 #endif // !defined(ALIZE_FeatureFileReaderSingle_cpp)
00450