00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 #if !defined(ALIZE_Svm_cpp)
00056 #define ALIZE_Svm_cpp
00057
00058 #include <iostream>
00059 #include <fstream>
00060 #include <cstdio>
00061 #include <cassert>
00062 #include <cmath>
00063 #include <stdlib.h>
00064 #include "Svm.h"
00065
00066 #include "liatools.h"
00067
00068 #define INF HUGE_VAL
00069 #define TAU 1e-12
00070
00071 using namespace alize;
00072 using namespace std;
00073
00074
00075
00076
00077 double getC(Matrix <double> train) {
00078 double acc=0.0;
00079 for (unsigned long i=0;i<train.rows();i++)
00080 for (unsigned long j=0;j<train.cols();j++)
00081 acc+=train(i,j)*train(i,j);
00082 acc/=train.rows();
00083 return 1/acc;
00084 }
00085
00086
00087
00088
00089 svm_parameter definesParameter (Config & config,Matrix <double> & mat) {
00090 struct svm_parameter param;
00091 param.svm_type = C_SVC;
00092 if (config.existsParam("kernelType")) {
00093 param.kernel_type = config.getParam("kernelType").toLong();
00094 } else {param.kernel_type=LINEAR;}
00095 param.degree = 1;
00096
00097 param.coef0 = 0;
00098 param.nu = 0.5;
00099 param.cache_size = 10000;
00100 if (config.existsParam("C")) {
00101 param.C = config.getParam("C").toDouble();
00102 } else {param.C = getC(mat);}
00103 if (verbose) cout << "(Svm) Regularization Factor C=" << param.C <<endl;
00104 param.eps = 1e-3;
00105 param.p = 0.1;
00106 param.shrinking = 1;
00107 param.probability = 0;
00108 param.nr_weight = 0;
00109 param.weight_label = NULL;
00110 param.weight = NULL;
00111 if (config.existsParam("targetPenalty")) {
00112 param.nr_weight = 1;
00113 param.weight_label = (int *)realloc(param.weight_label,sizeof(int)*param.nr_weight);
00114 param.weight = (double *)realloc(param.weight,sizeof(double)*param.nr_weight);
00115 param.weight_label[param.nr_weight-1] = 1;
00116 param.weight[param.nr_weight-1] = config.getParam("targetPenalty").toDouble();
00117 }
00118 return param;
00119 }
00120
00121
00122
00123
00124 void readProblem(Matrix <double> & inputProb,RealVector <double>& labels,struct svm_problem & prob,struct svm_parameter& param,struct svm_node *x_space) {
00125 if (verbose) cout << "(SVM) Reading SVM Classification pb" << endl;
00126 int max_index=0;
00127 int j=0;
00128 int i=0;
00129 for(i=0;i<prob.l;i++)
00130 {
00131 prob.x[i] = &x_space[j];
00132 prob.y[i] = labels[i];
00133 unsigned long k=0;
00134 while(k<inputProb.cols())
00135 {
00136 (x_space[j].index)=k+1;
00137 (x_space[j].value)=inputProb(i,k);
00138 ++j;
00139 k++;
00140 }
00141 if(j>=1 && x_space[j-1].index > max_index)
00142 max_index = x_space[j-1].index;
00143 x_space[j++].index = -1;
00144 }
00145 if (verbose) cout << "(Svm) Problem in memory" << endl;
00146 }
00147
00148
00149
00150 void concat(Matrix<double> & RES,Matrix<double> & A) {
00151 unsigned long stop=RES.rows();
00152 RES.setDimensions(RES.rows()+A.rows(),RES.cols());
00153 for (unsigned long i=0;i<A.rows();i++)
00154 for (unsigned long j=0;j<A.cols();j++)
00155 RES(stop+i,j)=A(i,j);
00156 }
00157
00158 void copyLine(Matrix<double>&RES,Matrix<double>&X,unsigned long idx) {
00159 for (unsigned long i=0;i<X.cols();i++)
00160 RES(idx,i)=X(0,i);
00161 }
00162
00163 void copy(Matrix<double>&X,Matrix<double>&D) {
00164 D.setAllValues(0.0);
00165 D.setDimensions(X.rows(),X.cols());
00166 double*x=X.getArray();
00167 double*d=D.getArray();
00168 for (unsigned long i=0;i<X.rows();i++)
00169 for (unsigned long j=0;j<X.cols();j++)
00170 d[i*X.cols()+j]=x[i*X.cols()+j];
00171 }
00172
00173
00174
00175
00176 struct Svm_Hyperplane {
00177 RealVector <double> w;
00178 double offset;
00179 };
00180
00181 void loadHyperplane(String &filename,struct Svm_Hyperplane &h,Config & config) {
00182 Matrix <double> model;
00183 model.load(filename,config);
00184 h.w.setSize(model.cols());
00185 for (unsigned long i=0;i<model.cols()-1;i++)
00186 h.w[i]=model(0,i);
00187 h.offset=model(0,model.cols()-1);
00188 }
00189
00190 void getHyperplane(struct Svm_Hyperplane &h,const svm_model *model,RealVector <double> &labels,Config & config) {
00191 int libsvm_bug=1;
00192 if(labels[0]!=1) libsvm_bug=-1;
00193 int l = model->l;
00194 const double * const *sv_coef = model->sv_coef;
00195 const svm_node * const *SV = model->SV;
00196 unsigned long vsize=config.getParam("vsize").toLong();
00197 h.w.setSize(vsize);
00198 h.w.setAllValues(0.0);
00199 for(int i=0;i<l;i++) {
00200 const svm_node *p = SV[i];
00201 for (unsigned long j=0;j<h.w.size();j++,p++)
00202 h.w[j]+=libsvm_bug*sv_coef[0][i]*p->value;
00203 }
00204 h.offset=libsvm_bug*model->rho[0];
00205 }
00206
00207
00208 void saveHyperplane(struct Svm_Hyperplane &h,String &outputFilename,Config & config) {
00209 Matrix <double> w;
00210 w.setDimensions(1,h.w.size()+1);
00211 for (unsigned long i=0;i<h.w.size();i++)
00212 w(0,i)=h.w[i];
00213 w(0,h.w.size())=h.offset;
00214 w.save(outputFilename,config);
00215 }
00216
00217
00218
00219 void predict(Matrix <double> & testInstances, String & model, RealVector <double>& scores,Config & config) {
00220 if (verbose) {cout << "(Svm) Processing model [" << model<<"] "<<endl;}
00221 struct Svm_Hyperplane h;
00222 loadHyperplane(model,h,config);
00223
00224 double norm_w=0.0;
00225 for (unsigned long j=0;j<h.w.size();j++)
00226 norm_w+=h.w[j]*h.w[j];
00227 norm_w=sqrt(norm_w);
00228
00229 if (verbose) cout << "(Svm) Norm of hyperplane: ["<<norm_w<<"] Offset ["<<h.offset<<"]"<<endl;
00230
00231 unsigned long nbScores=testInstances.rows();
00232 unsigned long vsize=testInstances.cols();
00233 scores.setSize(nbScores);
00234 scores.setAllValues(0.0);
00235 double *_t=testInstances.getArray();
00236 for (unsigned long j=0;j<scores.size();j++)
00237 for (unsigned long k=0;k<vsize;k++)
00238 scores[j]+=h.w[k]*_t[j*vsize+k];
00239
00240 for (unsigned long j=0;j<scores.size();j++)
00241 scores[j]-=h.offset;
00242 }
00243
00244
00245
00246
00247 XLine loadBck(Matrix<double>&Bck,Config &config) {
00248 XList bckList(config.getParam("inputBckList"));
00249 XLine impostorId;
00250 String vectorFilesPath = config.getParam("bckFilesPath");
00251 String vectorFilesExt = config.getParam("vectorFilesExtension");
00252 Bck.setDimensions(bckList.getLineCount(),config.getParam("vsize").toLong());
00253 Bck.setAllValues(0.0);
00254 if (verbose) cout << "(Svm) Bck Dimensions: ("<<Bck.rows()<<","<<Bck.cols()<<")"<<endl;
00255 XLine *pLine;
00256 unsigned long cnt=0;
00257 while((pLine=bckList.getLine())!=NULL) {
00258 Matrix <double> tmp;
00259 String file=vectorFilesPath+pLine->getElement(0)+vectorFilesExt;
00260 if (verboseLevel >1) cout<<" (Svm) Adding: ["<<file<<"]"<<endl;
00261 impostorId.addElement(pLine->getElement(0));
00262 tmp.load(file,config);
00263 if (tmp.cols()!=(unsigned long)config.getParam("vsize").toLong()) throw Exception("Param vectSize and vector size does not match",__FILE__,__LINE__);
00264 copyLine(Bck,tmp,cnt);
00265 cnt++;
00266 }
00267 bckList.rewind();
00268 return impostorId;
00269 }
00270
00271
00272
00273
00274 int svmTrain(Config &config)
00275 {
00276 String outputFilename = config.getParam("outputFilename");
00277 String vectorFilesPath = config.getParam("vectorFilesPath");
00278 String modelFilesPath = config.getParam("modelFilesPath");
00279 String vectorFilesExt = config.getParam("vectorFilesExtension");
00280 String modelFilesExt = config.getParam("modelFilesExtension");
00281 String inputFilename = config.getParam("inputFilename");
00282 try{
00283 Matrix <double> Bck;
00284 if (verbose) cout << "(Svm) Using ["<<config.getParam("inputBckList")<<"] as additionnal exemples for training" << endl;
00285 XLine impostorId=loadBck(Bck,config);
00286 XList inputList;
00287 bool tnorm=false;
00288 if (config.existsParam("Tnorm")) {tnorm=true;if(verbose)cout <<"(Svm) Tnorm mode: Leave one out in inputBckList"<<endl;}
00289 inputList.load(inputFilename,config);
00290 XLine *pLine;
00291
00292 while((pLine=inputList.getLine())!=NULL) {
00293 String clientModelname=pLine->getElement(0);
00294 String clientModelTrain=pLine->getElement(0);
00295 inputFilename=vectorFilesPath+clientModelTrain+vectorFilesExt;
00296 outputFilename=modelFilesPath+clientModelname+modelFilesExt;
00297 if (verbose) cout << "(Svm) Using ["<<inputFilename<<"] as training for: [" << outputFilename<<"]"<<endl;
00298 Matrix <double>clientVector;
00299 RealVector <double> labels;
00300 if (!tnorm) {
00301 clientVector.load(inputFilename,config);
00302 unsigned long nbSessions=clientVector.rows();
00303 if (verbose) cout << "(Svm) Number of sessions: " << nbSessions << endl;
00304 concat(clientVector,Bck);
00305 labels.setSize(clientVector.rows());
00306 for (unsigned long i=0;i<clientVector.rows();i++) {
00307 if (i<nbSessions) labels[i]=1.0;
00308 else labels[i]=-1.0;
00309 }
00310 }
00312 else {
00313 copy(Bck,clientVector);
00314 labels.setSize(clientVector.rows());
00315 for (unsigned long i=0;i<clientVector.rows();i++) {
00316 if (clientModelTrain==impostorId.getElement(i)) {labels[i]=1.0;}
00317 else labels[i]=-1.0;
00318 }
00319 }
00320
00321 if (verbose) cout << "(Svm) Pb Dimensions: ("<<clientVector.rows()<<","<<clientVector.cols()<<")"<<endl;
00322
00323 struct svm_parameter param=definesParameter(config,clientVector);
00324 struct svm_problem prob;
00325 prob.l=clientVector.rows();
00326 prob.y=new double[prob.l];
00327 prob.x=new svm_node*[prob.l];
00328 struct svm_node *x_space=NULL;
00329 x_space=new svm_node[(prob.l+1)*clientVector.cols()];
00330 readProblem(clientVector,labels,prob,param,x_space);
00331
00332
00333 const char* error_msg = svm_check_parameter(&prob,¶m);
00334 if(error_msg) {
00335 fprintf(stderr,"Error: %s\n",error_msg); exit(1);
00336 }
00337 else {
00338 if (verbose) cout<<"(Svm) Training Model"<<endl;
00339 svm_model *model=svm_train(&prob,¶m);
00340 if (verboseLevel > 1) {
00341 for (unsigned long i=0;i<(unsigned long)model->l;i++)
00342 cout << "Sv["<<i<<"] ="<<model->sv_coef[0][i]<<endl;
00343 }
00344 struct Svm_Hyperplane h;
00345 getHyperplane(h,model,labels,config);
00346 saveHyperplane(h,outputFilename,config);
00347 if (verbose) cout<<"(Svm) Model saved in "<<outputFilename<<endl;
00348 svm_destroy_model(model);
00349 }
00350
00351 svm_destroy_param(¶m);
00352 delete prob.y;
00353 delete prob.x;
00354 delete x_space;
00355 }
00356 }
00357 catch (Exception& e){cout << e.toString().c_str() << endl;}
00358 return 0;
00359 }
00360
00361
00362
00363
00364 int svmPredict(Config &config)
00365 {
00366 String inputFilename = config.getParam("inputFilename");
00367 String outputFilename = config.getParam("outputFilename");
00368 String vectorFilesPath = config.getParam("vectorFilesPath");
00369 String modelFilesPath = config.getParam("modelFilesPath");
00370 String vectorFilesExt = config.getParam("vectorFilesExtension");
00371 String modelFilesExt = config.getParam("modelFilesExtension");
00372
00373 try{
00374 String gender="M";
00375 int decision=0;
00376 ofstream outNist(outputFilename.c_str(),ios::out | ios::trunc);
00377 XList inputNdx(inputFilename,config);
00378 XLine *pLine;
00379 while((pLine=inputNdx.getLine())!=NULL) {
00380 String modelFilename=modelFilesPath+pLine->getElement(0)+modelFilesExt;
00381 Matrix <double> testInstances;
00382 testInstances.setDimensions(pLine->getElementCount()-1,config.getParam("vsize").toLong());
00383 if (verbose) cout << "(Svm) For "<<pLine->getElement(0) << ", add " <<pLine->getElementCount()-1 << " tests ";
00384 for (unsigned long i=1;i<pLine->getElementCount();i++) {
00385 Matrix <double> tmp;
00386 String testFile=vectorFilesPath+pLine->getElement(i)+vectorFilesExt;
00387 if (verboseLevel > 1) {cout << " [" << pLine->getElement(i)<<"] ";}
00388 tmp.load(testFile,config);
00389 if (tmp.cols()!=(unsigned long)config.getParam("vsize").toLong()) throw Exception("Param vectSize and vector size does not match",__FILE__,__LINE__);
00390 copyLine(testInstances,tmp,i-1);
00391 }
00392 if (verbose) cout <<endl<<"(Svm) Test dimensions: ["<<testInstances.rows()<<","<<testInstances.cols()<<"]"<<endl;
00393 RealVector <double> scores;
00394 predict(testInstances,modelFilename,scores,config);
00395 for (unsigned long i=1;i<pLine->getElementCount();i++) {
00396 if (verboseLevel >1) outputResultLine(scores[i-1], pLine->getElement(0),pLine->getElement(i) ,gender ,decision,cout);
00397 outputResultLine(scores[i-1], pLine->getElement(0),pLine->getElement(i) ,gender ,decision,outNist);
00398 }
00399 }
00400 outNist.close();
00401 }
00402 catch (Exception& e){cout << e.toString().c_str() << endl;exit(1);}
00403 return 0;
00404 }
00405
00406
00407
00408 int svmPredictTnorm(Config &config)
00409 {
00410 String inputFilename = config.getParam("inputFilename");
00411 String outputFilename = config.getParam("outputFilename");
00412 String vectorFilesPath = config.getParam("vectorFilesPath");
00413 String modelFilesPath = config.getParam("modelFilesPath");
00414 String vectorFilesExt = config.getParam("vectorFilesExtension");
00415 String modelFilesExt = config.getParam("modelFilesExtension");
00416
00417 try{
00418 String gender="M";
00419 int decision=0;
00420 ofstream outNist(outputFilename.c_str(),ios::out | ios::trunc);
00421 XList inputNdx(inputFilename,config);
00422 Matrix <double> testInstances;
00423 testInstances.setDimensions(inputNdx.getLine(0).getElementCount()-1,config.getParam("vsize").toLong());
00424 for (unsigned long i=1;i<inputNdx.getLine(0).getElementCount();i++) {
00425 Matrix <double> tmp;
00426 String testFile=vectorFilesPath+inputNdx.getLine(0).getElement(i)+vectorFilesExt;
00427 if (verboseLevel > 1) {cout << " [" << inputNdx.getLine(0).getElement(i)<<"] ";}
00428 tmp.load(testFile,config);
00429 if (tmp.cols()!=(unsigned long)config.getParam("vsize").toLong()) throw Exception("Param vectSize and vector size does not match",__FILE__,__LINE__);
00430 copyLine(testInstances,tmp,i-1);
00431 }
00432 XLine *pLine;
00433 while((pLine=inputNdx.getLine())!=NULL) {
00434 if (verbose) cout << "(Svm) For "<<pLine->getElement(0) << ", add " <<pLine->getElementCount()-1 << " tests ";
00435 String modelFilename=modelFilesPath+pLine->getElement(0)+modelFilesExt;
00436 if (verbose) cout <<endl<<"(Svm) Test dimensions: ["<<testInstances.rows()<<","<<testInstances.cols()<<"]"<<endl;
00437 RealVector <double> scores;
00438 predict(testInstances,modelFilename,scores,config);
00439 for (unsigned long i=1;i<pLine->getElementCount();i++) {
00440 if (verboseLevel >1) outputResultLine(scores[i-1], pLine->getElement(0),pLine->getElement(i) ,gender ,decision,cout);
00441 outputResultLine(scores[i-1], pLine->getElement(0),pLine->getElement(i) ,gender ,decision,outNist);
00442 }
00443 }
00444 outNist.close();
00445 }
00446 catch (Exception& e){cout << e.toString().c_str() << endl;exit(1);}
00447 return 0;
00448 }
00449
00450 double dotProduct(DoubleVector & v1,DoubleVector & v2) {
00451 double sum=0;
00452 for(unsigned long i=0;i<v1.size();i++)
00453 sum+=v1[i]*v2[i];
00454 return sum;
00455 }
00456
00457 #endif //!defined(ALIZE_Svm_cpp)
00458