AudioFileReader.cpp

Go to the documentation of this file.
00001 /*
00002         This file is part of ALIZE which is an open-source tool for 
00003         speaker recognition.
00004 
00005     ALIZE is free software: you can redistribute it and/or modify
00006     it under the terms of the GNU Lesser General Public License as 
00007     published by the Free Software Foundation, either version 3 of 
00008     the License, or any later version.
00009 
00010     ALIZE is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU Lesser General Public License for more details.
00014 
00015     You should have received a copy of the GNU Lesser General Public 
00016     License along with ALIZE.
00017     If not, see <http://www.gnu.org/licenses/>.
00018         
00019         ALIZE is a development project initiated by the ELISA consortium
00020         [alize.univ-avignon.fr/] and funded by the French Research 
00021         Ministry in the framework of the TECHNOLANGUE program 
00022         [www.technolangue.net]
00023 
00024         The ALIZE project team wants to highlight the limits of voice
00025         authentication in a forensic context.
00026         The "Person  Authentification by Voice: A Need of Caution" paper 
00027         proposes a good overview of this point (cf. "Person  
00028         Authentification by Voice: A Need of Caution", Bonastre J.F., 
00029         Bimbot F., Boe L.J., Campbell J.P., Douglas D.A., Magrin-
00030         chagnolleau I., Eurospeech 2003, Genova].
00031         The conclusion of the paper of the paper is proposed bellow:
00032         [Currently, it is not possible to completely determine whether the 
00033         similarity between two recordings is due to the speaker or to other 
00034         factors, especially when: (a) the speaker does not cooperate, (b) there 
00035         is no control over recording equipment, (c) recording conditions are not 
00036         known, (d) one does not know whether the voice was disguised and, to a 
00037         lesser extent, (e) the linguistic content of the message is not 
00038         controlled. Caution and judgment must be exercised when applying speaker 
00039         recognition techniques, whether human or automatic, to account for these 
00040         uncontrolled factors. Under more constrained or calibrated situations, 
00041         or as an aid for investigative purposes, judicious application of these 
00042         techniques may be suitable, provided they are not considered as infallible.
00043         At the present time, there is no scientific process that enables one to 
00044         uniquely characterize a person=92s voice or to identify with absolute 
00045         certainty an individual from his or her voice.]
00046         Contact Jean-Francois Bonastre for more information about the licence or
00047         the use of ALIZE
00048 
00049         Copyright (C) 2003-2010
00050         Laboratoire d'informatique d'Avignon [lia.univ-avignon.fr]
00051         ALIZE admin [alize@univ-avignon.fr]
00052         Jean-Francois Bonastre [jean-francois.bonastre@univ-avignon.fr]
00053 */
00054 
00055 #if !defined(ALIZE_AudioFileReader_cpp)
00056 #define ALIZE_AudioFileReader_cpp
00057 #include <new>
00058 #include "AudioFileReader.h"
00059 #include "AudioFrame.h"
00060 #include "alizeString.h"
00061 #include "Config.h"
00062 #include "FileReader.h"
00063 
00064 using namespace alize;
00065 typedef AudioFileReader R;
00066 
00067 //-------------------------------------------------------------------------
00068 R::AudioFileReader(const FileName& f, const Config& c, BigEndian be)
00069 :AudioInputStream(getChannel(c)), _pReader(&FileReader::create(f, getPath(f, c),
00070  getExt(f, c), getBigEndian(c, be))), _frameIndex(0), _paramDefined(false),
00071  _seekWanted(true), _seekWantedIdx(0)
00072 {
00073   // only SPHERE files (other formats later...)
00074   // on part du principe que les .sph on un codage ulaw
00075   // si stereo, on ne garde que le 1er canal
00076 }
00077 //-------------------------------------------------------------------------
00078 R& R::create(const FileName& f, const Config& c, BigEndian be)
00079 {
00080   R* p = new (std::nothrow) R(f, c, be);
00081   assertMemoryIsAllocated(p, __FILE__, __LINE__);
00082   return *p;
00083 }
00084 //-------------------------------------------------------------------------
00085 String R::getPath(const FileName& f, const Config& c) const
00086 {
00087   if (f.beginsWith("/") || f.beginsWith("./"))
00088     return "";
00089   return c.getParam_audioFilesPath();
00090 }
00091 //-------------------------------------------------------------------------
00092 String R::getExt(const FileName& f, const Config& c) const
00093 {
00094   if (f.beginsWith("/") || f.beginsWith("./"))
00095     return "";
00096   return c.getParam_loadAudioFileExtension();
00097 }
00098 //-------------------------------------------------------------------------
00099 void R::close()
00100 {
00101   if (_pReader != NULL)
00102     _pReader->close();
00103 }
00104 //-------------------------------------------------------------------------
00105 unsigned long R::getChannel(const Config& c) const
00106 {
00107   if (!c.existsParam_loadAudioFileChannel)
00108     return 0;
00109   return c.getParam_loadAudioFileChannel();
00110 }
00111 //-------------------------------------------------------------------------
00112 bool R::getBigEndian(const Config& c, BigEndian b) const // protected
00113 {
00114   if (b == BIGENDIAN_TRUE)
00115     return true;
00116   if (b == BIGENDIAN_FALSE)
00117     return false;
00118   // BIGENDIAN_AUTO
00119   if (c.existsParam_loadAudioFileBigEndian)
00120     return c.getParam_loadAudioFileBigEndian();
00121   if (c.existsParam_bigEndian)
00122     return c.getParam_bigEndian();
00123   return false;
00124 }
00125 //-------------------------------------------------------------------------
00126 bool R::readFrame(AudioFrame& f)
00127 {
00128   if (_seekWanted)
00129   {
00130     _seekWanted = false;
00131     _frameIndex = _seekWantedIdx;
00132     if (_frameIndex >= getFrameCount())
00133       return false;
00134     unsigned long n = getSampleBytes()*getChannelCount();
00135     _pReader->seek(getHeaderLength()+n*_frameIndex);
00136   }
00137   // si on depasse la fin du fichier
00138   unsigned long i, frameCount = getFrameCount();
00139   // the call to getFrameCount() defines _sampleBytes & other stuff
00140   if (_frameIndex >= frameCount)
00141     return false;
00142   if (_selectedChannel > _channelCount)
00143     throw Exception("Unavailable selected channel #"
00144           + String::valueOf(_selectedChannel), __FILE__, __LINE__);
00145   if (_sampleBytes == 2) // 16 bits
00146   {
00147     if (_channelCount == 1)
00148       f.setData(_pReader->readInt2());
00149     else if (_channelCount == 2)
00150     {
00151       if (_selectedChannel == 0)
00152       {
00153         f.setData(_pReader->readInt2());
00154         _pReader->readInt2();
00155       }
00156       else
00157       {
00158         _pReader->readInt2();
00159         f.setData(_pReader->readInt2());
00160       }
00161     }
00162     else // multi-channels
00163     {
00164       for (i=0; i<_selectedChannel; i++)
00165         _pReader->readInt2();
00166       f.setData(_pReader->readInt2());
00167       for (i++; i<_channelCount; i++)
00168         _pReader->readInt2();
00169     }
00170   }
00171   else if (_sampleBytes == 1) // 8 bits
00172   {
00173     if (_channelCount == 1)
00174       f.setData(_pReader->readChar());
00175     else if (_channelCount == 2)
00176     {
00177       if (_selectedChannel == 0)
00178       {
00179         f.setData(_pReader->readChar());
00180         _pReader->readChar();
00181       }
00182       else
00183       {
00184         _pReader->readChar();
00185         f.setData(_pReader->readChar());
00186       }
00187     }
00188     else // multi-channels
00189     {
00190       for (i=0; i<_selectedChannel; i++)
00191         _pReader->readChar();
00192       f.setData(_pReader->readChar());
00193       for (i++; i<_channelCount; i++)
00194         _pReader->readChar();
00195     }
00196   }
00197   else
00198     throw Exception("Unimplemented code (TODO)", __FILE__, __LINE__);
00199   f.setValidity(true);
00200   _frameIndex++;
00201   return true; // invalid frame
00202 }
00203 //-------------------------------------------------------------------------
00204 void R::seekFrame(unsigned long n) // n = next frame to read
00205 {
00206   _seekWanted = true;
00207   _seekWantedIdx = n;
00208 }
00209 //-------------------------------------------------------------------------
00210 unsigned long R::getFrameCount()
00211 {
00212   if (!_paramDefined)
00213     readParams();
00214   return _frameCount;
00215 }
00216 //-------------------------------------------------------------------------
00217 void R::readParams() // private 
00218 {
00219   assert(_pReader != NULL);
00220   _pReader->open(); // can throw FileNotFoundException
00221   unsigned long lineCount = 0;
00222   long headerLength = -1;
00223   long frameCount = -1;
00224   long channelCount = -1;
00225   long sampleBytes = -1;
00226   long sampleRate = -1;
00227   while (true)
00228   {
00229     const String& s = _pReader->readLine();
00230     if (s.beginsWith("end_head"))
00231       break;
00232     lineCount++;
00233     // on saute la 1ere ligne
00234     if (lineCount == 1)
00235       continue;
00236     if (lineCount == 2)
00237     {
00238       headerLength = s.toLong();
00239       continue;
00240     }
00241     // on saute les comentaires
00242     if (s.beginsWith(";"))
00243       continue;
00244     if (s.beginsWith("channel_count -i "))
00245       channelCount = s.getToken(2).toLong();
00246     if (s.beginsWith("sample_rate -i "))
00247       sampleRate = s.getToken(2).toLong();
00248     else if (s.beginsWith("sample_n_bytes -i "))
00249       sampleBytes = s.getToken(2).toLong();
00250     else if (s.beginsWith("sample_count -i "))
00251       frameCount = s.getToken(2).toLong();
00252     else if (s.beginsWith("sample_byte_format -s2 01"))
00253       _pReader->swap() = false;
00254   }
00255   // tests whether everything is ok
00256   if (headerLength == -1 ||
00257       frameCount == -1 ||
00258       channelCount == -1 ||
00259       sampleRate == -1 ||
00260       sampleBytes == -1)
00261   {
00262     _pReader->close();
00263     throw InvalidDataException("Wrong header", __FILE__, __LINE__,
00264                                _pReader->getFullFileName());
00265   }
00266   _headerLength = headerLength;
00267   _frameCount = frameCount;
00268   _sampleBytes = sampleBytes;
00269   _frameRate = (real_t)sampleRate;
00270   _channelCount = channelCount;
00271   _paramDefined = true;
00272 }
00273 //-------------------------------------------------------------------------
00274 void R::reset() { seekFrame(0); }
00275 //-------------------------------------------------------------------------
00276 unsigned long R::getSourceCount() { return 1; }
00277 //-------------------------------------------------------------------------
00278 unsigned long R::getHeaderLength() // private
00279 {
00280   if (!_paramDefined)
00281     readParams();
00282   return _headerLength;
00283 }
00284 //-------------------------------------------------------------------------
00285 unsigned long R::getChannelCount()
00286 {
00287   if (!_paramDefined)
00288     readParams();
00289   return _channelCount;
00290 }
00291 //-------------------------------------------------------------------------
00292 unsigned long R::getSampleBytes()
00293 {
00294   if (!_paramDefined)
00295     readParams();
00296   return _sampleBytes;
00297 }
00298 //-------------------------------------------------------------------------
00299 real_t R::getFrameRate()
00300 {
00301   if (!_paramDefined)
00302     readParams();
00303   return _frameRate;
00304 }
00305 //-------------------------------------------------------------------------
00306 String R::getClassName() const { return "AudioFileReader"; }
00307 //-------------------------------------------------------------------------
00308 String R::toString() const
00309 {
00310   AudioFileReader& r = const_cast<AudioFileReader&>(*this);
00311   return Object::toString()
00312     +"\n frame count      = " + String::valueOf(r.getFrameCount())
00313     +"\n channel count    = " + String::valueOf(r.getChannelCount())
00314     +"\n selected channel = " + String::valueOf(r.getSelectedChannel())
00315     +"\n sample bytes     = " + String::valueOf(r.getSampleBytes());
00316 }
00317 //-------------------------------------------------------------------------
00318 R::~AudioFileReader()
00319 {
00320   assert(_pReader != NULL);
00321   delete _pReader;
00322 }
00323 //-------------------------------------------------------------------------
00324 
00325 #endif // !defined(ALIZE_AudioFileReader_cpp)
00326