00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 #include<iostream>
00056 #include<fstream>
00057 #include<cstdio>
00058 #include<cassert>
00059 #include<cmath>
00060 #include "TrainTools.h"
00061 #include "SegTools.h"
00062
00063 using namespace alize;
00064 using namespace std;
00065
00066
00067
00068
00069
00070
00071
00072
00073 double clrCrit(Config& config, SegCluster& c1, SegCluster &c2, StatServer& ss, FeatureServer& fs, MixtureGD& m1, MixtureGD& m2, MixtureGD& world){
00074
00075 if(verbose) cout << "Likelihood computation" << endl;
00076 MixtureStat &m1c2Acc=ss.createAndStoreMixtureStat(m1);
00077 MixtureStat &m2c1Acc=ss.createAndStoreMixtureStat(m2);
00078 MixtureStat &mWc1Acc=ss.createAndStoreMixtureStat(world);
00079 MixtureStat &mWc2Acc=ss.createAndStoreMixtureStat(world);
00080
00081 accumulateStatLLK(ss,fs,m1c2Acc,c2,config);
00082 accumulateStatLLK(ss,fs,mWc2Acc,c2,config);
00083 accumulateStatLLK(ss,fs,m2c1Acc,c1,config);
00084 accumulateStatLLK(ss,fs,mWc1Acc,c1,config);
00085
00086 double m1c2LLK = m1c2Acc.getMeanLLK();
00087 double m2c1LLK = m2c1Acc.getMeanLLK();
00088 double mWc1LLK = mWc1Acc.getMeanLLK();
00089 double mWc2LLK = mWc2Acc.getMeanLLK();
00090
00091 if ((verbose) && (verboseLevel == 2)){
00092 cout << "LLKm2c1 ("<<totalFrame(c1)<<") => " << m2c1LLK << endl;
00093 cout << "LLKWc1 ("<<totalFrame(c1)<<") => " << mWc1LLK << endl;
00094 cout << "LLKm1c2 ("<<totalFrame(c2)<<") => " << m1c2LLK << endl;
00095 cout << "LLKWc2 ("<<totalFrame(c2)<<") => " << mWc2LLK << endl;
00096 }
00097
00098 double clr = (m1c2LLK - mWc2LLK) + (m2c1LLK - mWc1LLK);
00099 if (verbose) cout << "CLR ("<<totalFrame(c1)<<"/"<<totalFrame(c2)<< ") => " << clr << endl;
00100 return clr;
00101 }
00102
00103
00104
00105
00106
00107
00108 double gllrCrit(Config& config, SegCluster& c1, SegCluster &c2, SegCluster &c12, StatServer& ss, FeatureServer& fs, MixtureGD& m1, MixtureGD& m2, MixtureGD& m12){
00109
00110 MixtureStat &m1Acc=ss.createAndStoreMixtureStat(m1);
00111 MixtureStat &m2Acc=ss.createAndStoreMixtureStat(m2);
00112 MixtureStat &m12Acc=ss.createAndStoreMixtureStat(m12);
00113
00114 accumulateStatLLK(ss,fs,m1Acc,c1,config);
00115 accumulateStatLLK(ss,fs,m2Acc,c2,config);
00116 accumulateStatLLK(ss,fs,m12Acc,c12,config);
00117
00118 double m1LLK = m1Acc.getAccumulatedLLK();
00119 double m2LLK = m2Acc.getAccumulatedLLK();
00120 double m12LLK = m12Acc.getAccumulatedLLK();
00121 if ((verbose) && (verboseLevel == 2)){
00122 cout << "LLK1 => " << m1LLK << endl;
00123 cout << "LLK2 => " << m2LLK << endl;
00124 cout << "LLK12 => " << m12LLK << endl;
00125 }
00126 double gllr = m12LLK - (m1LLK + m2LLK);
00127 if (verbose) cout << "GLLR ("<<totalFrame(c1)<<"/"<<totalFrame(c2)<< ") => " << gllr << endl;
00128 return gllr;
00129 }
00130
00131
00132
00133
00134
00135
00136 double bicCrit(Config& config, SegCluster& c1, SegCluster &c2, SegCluster &c12, StatServer& ss, FeatureServer& fs, MixtureGD& m1, MixtureGD& m2, MixtureGD& m12){
00137
00138 double gllr = gllrCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00139 double lambda = 1.0;
00140 double P = 0.5 * ((2 * fs.getVectSize()+1)*m1.getDistribCount()) * log((double)totalFrame(c1)+totalFrame(c2));
00141 double bic = (-1 * gllr) - lambda * P;
00142 if (verbose) cout << "BIC ("<<totalFrame(c1)<<"/"<<totalFrame(c2)<< ") => " << bic << " P => " << P << endl;
00143
00144 return bic;
00145 }
00146
00147
00148
00149
00150
00151
00152 double deltabicCrit(Config& config, SegCluster& c1, SegCluster &c2, SegCluster &c12, StatServer& ss, FeatureServer& fs, MixtureGD& m1, MixtureGD& m2, MixtureGD& m12){
00153
00154 double deltabic = gllrCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00155 if (verbose) cout << "Delta BIC (=GLLR) ("<<totalFrame(c1)<<"/"<<totalFrame(c2)<< ") => " << deltabic << endl;
00156
00157 return deltabic;
00158 }
00159
00160
00161
00162
00163
00164
00165 double clusteringCriterionByAdapt(Config& config, Seg *segment1, Seg *segment2, StatServer& ss, FeatureServer& fs, MixtureGD& world, String crit){
00166
00167 if(verbose){
00168 cout << "Computation between: " << segment1->begin() << " " << endSeg(segment1);
00169 cout << " and " << segment2->begin() << " " << endSeg(segment2) << endl;
00170 }
00171
00172 SegServer segTemp;
00173 MixtureServer ms(config);
00174 MAPCfg mapCfg(config);
00175 mapCfg.setMethod("MAPOccDep");
00176 mapCfg.setMeanReg(16);
00177 mapCfg.setBaggedFrameProbability(1.0);
00178
00179 if(verbose) cout << "Mixture building m1 and m2" << endl;
00180 SegCluster& c1=segTemp.createCluster();
00181 c1.add(segTemp.createSeg(segment1->begin(),segment1->length(),0,"null",segment1->sourceName()));
00182 SegCluster& c2=segTemp.createCluster();
00183 c2.add(segTemp.createSeg(segment2->begin(),segment2->length(),0,"null",segment2->sourceName()));
00184
00185 MixtureGD &m1=ms.duplicateMixture(world,DUPL_DISTRIB);
00186 MixtureGD &m2=ms.duplicateMixture(world,DUPL_DISTRIB);
00187 adaptModel(config,ss,ms,fs,c1,world,m1, mapCfg);
00188 adaptModel(config,ss,ms,fs,c2,world,m2, mapCfg);
00189
00190 if((crit=="GLR") || (crit=="BIC")){
00191 if(verbose) cout << "Mixture building m12" << endl;
00192 SegCluster& c12=segTemp.createCluster();
00193 c12.add(segTemp.createSeg(segment1->begin(),segment1->length(),0,"null",segment1->sourceName()));
00194 c12.add(segTemp.createSeg(segment2->begin(),segment2->length(),0,"null",segment2->sourceName()));
00195
00196 MixtureGD &m12=ms.duplicateMixture(world,DUPL_DISTRIB);
00197 adaptModel(config,ss,ms,fs,c12,world,m12, mapCfg);
00198
00199 if(crit=="GLR"){
00200 return gllrCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00201 }
00202 if(crit=="BIC"){
00203 return bicCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00204 }
00205 }
00206
00207 else if(crit == "CLR"){
00208 return clrCrit(config, c1, c2, ss, fs, m1, m2, world);
00209 }
00210 else{
00211 cout << "ERROR: unknown clustering criterion !!!!" << endl;
00212 return -1;
00213 }
00214 return -1;
00215
00216 }
00217
00218
00219
00220
00221
00222
00223
00224 double clusteringCriterion(Config& config, Seg *segment1, Seg *segment2, StatServer& ss, FeatureServer& fs, MixtureGD& world, String crit){
00225
00226
00227 if(verbose){
00228 cout << "Computation between: " << segment1->begin() << " " << endSeg(segment1);
00229 cout << " and " << segment2->begin() << " " << endSeg(segment2) << endl;
00230 }
00231
00232 if(verbose) cout << "Mixture building m1 and m2" << endl;
00233 TrainCfg trainCfg(config);
00234 trainCfg.setBaggedFrameProbability(0.8);
00235 trainCfg.setNbTrainIt(10);
00236
00237 SegServer segTemp;
00238 SegCluster& c1=segTemp.createCluster();
00239 c1.add(segTemp.createSeg(segment1->begin(),segment1->length(),0,"null",segment1->sourceName()));
00240 SegCluster& c2=segTemp.createCluster();
00241 c2.add(segTemp.createSeg(segment2->begin(),segment2->length(),0,"null",segment2->sourceName()));
00242
00243 MixtureServer ms(config);
00244 MixtureGD &m1=ms.duplicateMixture(world,DUPL_DISTRIB);
00245 MixtureGD &m2=ms.duplicateMixture(world,DUPL_DISTRIB);
00246
00247 FrameAccGD globalFrameAcc1;
00248 globalMeanCov (fs,c1,globalFrameAcc1,config);
00249 DoubleVector globalMean1=globalFrameAcc1.getMeanVect();
00250 DoubleVector globalCov1=globalFrameAcc1.getCovVect();
00251 FrameAccGD globalFrameAcc2;
00252 globalMeanCov (fs,c2,globalFrameAcc2,config);
00253 DoubleVector globalMean2=globalFrameAcc2.getMeanVect();
00254 DoubleVector globalCov2=globalFrameAcc2.getCovVect();
00255
00256 if(verbose) cout<<" Training EM for speaker m1"<<endl;
00257 trainModel(config,ss,fs,c1,globalMean1,globalCov1,m1, trainCfg);
00258 if(verbose) cout<<" Training EM for speaker m2"<<endl;
00259 trainModel(config,ss,fs,c2,globalMean2,globalCov2,m2, trainCfg);
00260
00261 if((crit=="GLR") || (crit=="BIC")){
00262 if(verbose) cout << "Mixture building m12" << endl;
00263 SegCluster& c12=segTemp.createCluster();
00264 c12.add(segTemp.createSeg(segment1->begin(),segment1->length(),0,"null",segment1->sourceName()));
00265 c12.add(segTemp.createSeg(segment2->begin(),segment2->length(),0,"null",segment2->sourceName()));
00266 MixtureGD &m12=ms.duplicateMixture(world,DUPL_DISTRIB);
00267 FrameAccGD globalFrameAcc12;
00268 globalMeanCov (fs,c12,globalFrameAcc12,config);
00269 DoubleVector globalMean12=globalFrameAcc12.getMeanVect();
00270 DoubleVector globalCov12=globalFrameAcc12.getCovVect();
00271 if(verbose) cout<<" Training EM for speaker m12"<<endl;
00272 trainModel(config,ss,fs,c12,globalMean12,globalCov12,m12, trainCfg);
00273
00274 if(crit=="GLR"){
00275 return gllrCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00276 }
00277 if(crit=="BIC"){
00278 return bicCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00279 }
00280 }
00281
00282 else if(crit == "CLR"){
00283 return clrCrit(config, c1, c2, ss, fs, m1, m2, world);
00284 }
00285 else{
00286 cout << "ERROR: unknown clustering criterion !!!!" << endl;
00287 return -1;
00288 }
00289 return -1;
00290 }
00291
00292
00293
00294
00295
00296
00297
00298 SegCluster& mergeCluster(SegCluster& c1, SegCluster& c2, SegServer& segTemp, String merge="NULL"){
00299
00300 SegCluster& c12=segTemp.createCluster();
00301 c12.setString(merge);
00302 Seg *segment;
00303 c1.rewind();
00304 while((segment=c1.getSeg()) != NULL){
00305 c12.add(segTemp.createSeg(segment->begin(),segment->length(),0,merge,segment->sourceName()));
00306 }
00307 c2.rewind();
00308 while((segment=c2.getSeg()) != NULL){
00309 c12.add(segTemp.createSeg(segment->begin(),segment->length(),0,merge,segment->sourceName()));
00310 }
00311 return c12;
00312 }
00313
00314
00315
00316
00317
00318
00319 double clusteringCriterionWithoutWorldInit(Config& config, SegCluster& c1, SegCluster& c2, StatServer& ss,
00320 FeatureServer& fs,MixtureGD& world,String crit){
00321
00322 double criterion=0.0;
00323 SegServer segTemp;
00324 MixtureServer ms(config);
00325
00326 if(verbose) cout << "Mixture building m1 and m2" << endl;
00327 TrainCfg trainCfg(config);
00328 trainCfg.setInitVarFlooring(0.0);
00329 trainCfg.setFinalVarFlooring(0.0);
00330
00331
00332 FrameAccGD globalFrameAcc1;
00333 globalMeanCov (fs,c1,globalFrameAcc1,config);
00334 DoubleVector globalMean1=globalFrameAcc1.getMeanVect();
00335 DoubleVector globalCov1=globalFrameAcc1.getCovVect();
00336 FrameAccGD globalFrameAcc2;
00337 globalMeanCov (fs,c2,globalFrameAcc2,config);
00338 DoubleVector globalMean2=globalFrameAcc2.getMeanVect();
00339 DoubleVector globalCov2=globalFrameAcc2.getCovVect();
00340
00341
00342 if(crit == "DELTABIC"){
00343 if (verbose) cout <<"m1 and m2 model init from scratch (hola !)"<<endl;
00344 trainCfg.setBaggedFrameProbabilityInit(0.4);
00345
00346 int gausNb1=5;
00347 int gausNb2=5;
00348
00349
00350
00351
00352
00353
00354
00355 MixtureGD &m1=ms.createMixtureGD(gausNb1);
00356 MixtureGD &m2=ms.createMixtureGD(gausNb2);
00357 trainCfg.setNbTrainIt(1);
00358
00359 mixtureInit(ms,fs,m1,c1,globalCov1,config,trainCfg);
00360 mixtureInit(ms,fs,m2,c2,globalCov2,config,trainCfg);
00361
00362 trainCfg.setNbTrainIt(5);
00363 trainCfg.setBaggedFrameProbability(1.0);
00364 if(verbose) cout<<" Training EM for speaker m1 (nbGauss:"<<gausNb1<<")"<<endl;
00365 trainModel(config,ss,fs,c1,globalMean1,globalCov1,m1, trainCfg);
00366 if(verbose) cout<<" Training EM for speaker m2 (nbGauss:"<<gausNb2<<")"<<endl;
00367 trainModel(config,ss,fs,c2,globalMean2,globalCov2,m2, trainCfg);
00368
00369 if(verbose) cout << "Mixture building m12" << endl;
00370 SegCluster& c12=mergeCluster(c1, c2, segTemp);
00371
00372 int gausNb12=gausNb1+gausNb2;
00373 MixtureGD &m12=ms.createMixtureGD(gausNb12);
00374
00375 FrameAccGD globalFrameAcc12;
00376 globalMeanCov (fs,c12,globalFrameAcc12,config);
00377 DoubleVector globalMean12=globalFrameAcc12.getMeanVect();
00378 DoubleVector globalCov12=globalFrameAcc12.getCovVect();
00379
00380 if (verbose) cout <<"m12 model init from scratch"<<endl;
00381 trainCfg.setBaggedFrameProbabilityInit(0.4);
00382 trainCfg.setBaggedFrameProbability(0.4);
00383 trainCfg.setNbTrainIt(1);
00384 mixtureInit(ms,fs,m12,c12,globalCov12,config,trainCfg);
00385
00386 if(verbose) cout<<" Training EM for speaker m12"<<endl;
00387 trainCfg.setNbTrainIt(5);
00388 trainCfg.setBaggedFrameProbability(1.0);
00389 trainModel(config,ss,fs,c12,globalMean12,globalCov12,m12, trainCfg);
00390
00391 criterion = deltabicCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00392 ms.deleteMixture(m1);
00393 ms.deleteMixture(m2);
00394 ms.deleteMixture(m12);
00395 ms.deleteUnusedDistribs();
00396
00397 return criterion;
00398 }
00399 else{
00400 if (verbose) cout <<"m1 and m2 model init from scratch"<<endl;
00401 trainCfg.setBaggedFrameProbabilityInit(0.1);
00402 trainCfg.setBaggedFrameProbability(0.4);
00403 MixtureGD &m1=ms.createMixtureGD();
00404 MixtureGD &m2=ms.createMixtureGD();
00405
00406 mixtureInit(ms,fs,m1,c1,globalCov1,config,trainCfg);
00407 mixtureInit(ms,fs,m2,c2,globalCov2,config,trainCfg);
00408
00409 trainCfg.setNbTrainIt(10);
00410 trainCfg.setBaggedFrameProbability(1.0);
00411 if(verbose) cout<<" Training EM for speaker m1"<<endl;
00412 trainModel(config,ss,fs,c1,globalMean1,globalCov1,m1, trainCfg);
00413 if(verbose) cout<<" Training EM for speaker m2"<<endl;
00414 trainModel(config,ss,fs,c2,globalMean2,globalCov2,m2, trainCfg);
00415
00416
00417 if((crit=="GLR") || (crit=="BIC")){
00418 if(verbose) cout << "Mixture building m12" << endl;
00419 SegCluster& c12=mergeCluster(c1, c2, segTemp);
00420
00421 FrameAccGD globalFrameAcc12;
00422 globalMeanCov (fs,c12,globalFrameAcc12,config);
00423 DoubleVector globalMean12=globalFrameAcc12.getMeanVect();
00424 DoubleVector globalCov12=globalFrameAcc12.getCovVect();
00425
00426 if (verbose) cout <<"m12 model init from scratch"<<endl;
00427 MixtureGD &m12=ms.createMixtureGD();
00428 trainCfg.setBaggedFrameProbabilityInit(0.1);
00429 trainCfg.setBaggedFrameProbability(0.4);
00430 trainCfg.setNbTrainIt(2);
00431 mixtureInit(ms,fs,m12,c12,globalCov12,config,trainCfg);
00432 if(verbose) cout<<" Training EM for speaker m12"<<endl;
00433 trainCfg.setNbTrainIt(10);
00434 trainCfg.setBaggedFrameProbability(1.0);
00435 trainModel(config,ss,fs,c12,globalMean12,globalCov12,m12, trainCfg);
00436
00437 if(crit=="GLR"){
00438 criterion = gllrCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00439 ms.deleteMixture(m1);
00440 ms.deleteMixture(m2);
00441 ms.deleteMixture(m12);
00442 ms.deleteUnusedDistribs();
00443 return criterion;
00444 }
00445 if(crit=="BIC"){
00446 criterion = bicCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00447 ms.deleteMixture(m1);
00448 ms.deleteMixture(m2);
00449 ms.deleteMixture(m12);
00450 ms.deleteUnusedDistribs();
00451 return criterion;
00452 }
00453 }
00454
00455 else if(crit == "CLR"){
00456 criterion = clrCrit(config, c1, c2, ss, fs, m1, m2, world);
00457 ms.deleteMixture(m1);
00458 ms.deleteMixture(m2);
00459 ms.deleteUnusedDistribs();
00460 return criterion;
00461 }
00462 else{
00463 cout << "ERROR: unknown clustering criterion !!!!" << endl;
00464 return -1;
00465 }
00466 }
00467
00468 return -1;
00469 }
00470
00471
00472
00473
00474
00475
00476 double clusteringCriterionWithoutWorldInit(Config& config, SegCluster& c1, MixtureGD& m1, SegCluster& c2, MixtureGD& m2, StatServer& ss,
00477 FeatureServer& fs,MixtureGD& world,String crit){
00478
00479 SegServer segTemp;
00480
00481 TrainCfg trainCfg(config);
00482 trainCfg.setBaggedFrameProbability(0.8);
00483 trainCfg.setNbTrainIt(10);
00484 trainCfg.setInitVarFlooring(0.0);
00485 trainCfg.setFinalVarFlooring(0.0);
00486
00487 if(crit == "DELTABIC"){
00488
00489 SegCluster& c12=mergeCluster(c1, c2, segTemp);
00490
00491 MixtureServer msc12(config);
00492 MixtureGD &m12=msc12.createMixtureGD((c1.getCount()+c2.getCount())*5);
00493
00494
00495 FrameAccGD globalFrameAcc12;
00496 globalMeanCov (fs,c12,globalFrameAcc12,config);
00497 DoubleVector globalMean12=globalFrameAcc12.getMeanVect();
00498 DoubleVector globalCov12=globalFrameAcc12.getCovVect();
00499
00500 if(totalFrame(c12) < 150){
00501 trainCfg.setBaggedFrameProbabilityInit(1.0);
00502 trainCfg.setBaggedFrameProbability(1.0);
00503 }else{
00504 trainCfg.setBaggedFrameProbabilityInit(0.4);
00505 trainCfg.setBaggedFrameProbability(0.4);
00506 }
00507 trainCfg.setNbTrainIt(2);
00508 mixtureInit(msc12,fs,m12,c12,globalCov12,config,trainCfg);
00509
00510 trainCfg.setNbTrainIt(5);
00511 trainCfg.setBaggedFrameProbability(1.0);
00512 trainModel(config,ss,fs,c12,globalMean12,globalCov12,m12, trainCfg);
00513
00514 return deltabicCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00515
00516 }
00517 else{
00518 if((crit=="GLR") || (crit=="BIC")){
00519 if(verbose) cout << "Mixture building m12" << endl;
00520 MixtureServer ms(config);
00521 SegCluster& c12=mergeCluster(c1, c2, segTemp);
00522
00523 FrameAccGD globalFrameAcc12;
00524 globalMeanCov (fs,c12,globalFrameAcc12,config);
00525 DoubleVector globalMean12=globalFrameAcc12.getMeanVect();
00526 DoubleVector globalCov12=globalFrameAcc12.getCovVect();
00527
00528 if (verbose) cout <<"m12 model init from scratch"<<endl;
00529 MixtureGD &m12=ms.createMixtureGD(c1.getCount());
00530 trainCfg.setBaggedFrameProbabilityInit(0.4);
00531 trainCfg.setBaggedFrameProbability(0.4);
00532 trainCfg.setNbTrainIt(2);
00533 mixtureInit(ms,fs,m12,c12,globalCov12,config,trainCfg);
00534 if(verbose) cout<<" Training EM for speaker m12"<<endl;
00535 trainCfg.setNbTrainIt(5);
00536 trainCfg.setBaggedFrameProbability(1.0);
00537 trainModel(config,ss,fs,c12,globalMean12,globalCov12,m12, trainCfg);
00538
00539 if(crit=="GLR"){
00540 return gllrCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00541 }
00542 if(crit=="BIC"){
00543 return bicCrit(config, c1, c2, c12, ss, fs, m1, m2, m12);
00544 }
00545 }
00546
00547 else if(crit == "CLR"){
00548 return clrCrit(config, c1, c2, ss, fs, m1, m2, world);
00549 }
00550 else{
00551 cout << "ERROR: unknown clustering criterion !!!!" << endl;
00552 return -1;
00553 }
00554 }
00555 return -1;
00556 }
00557
00558
00559
00560
00561
00562
00563 double clusteringCriterionWithoutWorldInit(Config& config, Seg *segment1, Seg *segment2, StatServer& ss, FeatureServer& fs,MixtureGD& world,String crit){
00564
00565 SegServer segTemp;
00566 if(verbose){
00567 cout << "Computation between: " << segment1->begin() << " " << endSeg(segment1);
00568 cout << " and " << segment2->begin() << " " << endSeg(segment2) << endl;
00569 }
00570
00571 SegCluster& c1=segTemp.createCluster();
00572 c1.add(segTemp.createSeg(segment1->begin(),segment1->length(),0,"null",segment1->sourceName()));
00573 SegCluster& c2=segTemp.createCluster();
00574 c2.add(segTemp.createSeg(segment2->begin(),segment2->length(),0,"null",segment2->sourceName()));
00575
00576 return clusteringCriterionWithoutWorldInit(config, c1, c2, ss, fs,world,crit);
00577
00578 }
00579
00580
00581
00582
00583
00584
00585
00586 bool isSimilarSegment(Config& config, Seg *segment1, Seg *segment2, StatServer& ss, FeatureServer& fs, MixtureGD& world, String crit){
00587
00588
00589 double threshold=0.0;
00590
00591
00592 double critValue=clusteringCriterionWithoutWorldInit(config, segment1, segment2, ss, fs, world, crit);
00593 if(config.existsParam("clusteringCritThresh"))
00594 threshold=config.getParam("clusteringCritThresh").toDouble();
00595 if((crit == "BIC") || (crit == "CLR") || (crit == "DELTABIC"))
00596 if(critValue > threshold) return true;
00597
00598 else if(crit == "GLR")
00599 if(critValue < threshold) return true;
00600
00601 return false;
00602 }
00603
00604
00605
00606
00607
00608
00609
00610
00611 double cohortMaxLikelihood(StatServer& ss,FeatureServer &fs,hmm& actualHMM,unsigned long except,unsigned long begin,unsigned long longSelection,Config& config){
00612 DoubleVector llr;
00613
00614 cout << "nb: " << actualHMM.getNbState() << endl;
00615 for(unsigned long i=0; i<actualHMM.getNbState(); i++){
00616 if(i!=except){
00617 MixtureGD& m=actualHMM.getDensity(i);
00618 double mean = meanLikelihood(ss,fs,m,begin,longSelection,config);
00619 cout << "mean: " << mean << endl;
00620 llr.addValue(mean);
00621 }
00622 }
00623 double max = llr[llr.getIndexOfLargestValue()];
00624 if((verbose) && (verboseLevel == 2))
00625 cout << "Max of cohort: " << max << endl;
00626 return max;
00627 }
00628
00629
00630
00631
00632
00633
00634
00635 Seg *bestFittingSegment(Config& config, SegCluster& cluster, MixtureGD& m, StatServer& ss, FeatureServer& fs){
00636
00637 DoubleVector llr,starts;
00638 cluster.rewind();
00639 Seg *segment;
00640 while((segment=cluster.getSeg())!=NULL){
00641 String fileLabel=segment->sourceName();
00642 unsigned long begin=segment->begin()+fs.getFirstFeatureIndexOfASource(fileLabel);
00643
00644 double llrTmp = meanLikelihood(ss,fs,m,begin,segment->length(),config);
00645 if(verbose) cout << segment->begin() << " " << endSeg(segment) << " => " << llrTmp << endl;
00646 llr.addValue(llrTmp);
00647 }
00648
00649 if(llr.size() == 0) return NULL;
00650
00651 bool goon=true;
00652 while(goon){
00653 unsigned long ind = llr.getIndexOfLargestValue();
00654 if((verbose) && (verboseLevel == 2)) cout << "Best llk " << ind << " => " << llr[ind] << endl;
00655 if(llr[ind] == -200)
00656 return NULL;
00657
00658 cluster.rewind();
00659 for (unsigned long i=0; i<=ind; i++){
00660 segment=cluster.getSeg();
00661 }
00662 if(segment->length() > 600)
00663 goon = false;
00664 else{
00665 llr[ind]=-200;
00666 }
00667 }
00668 cluster.rewind();
00669
00670 return segment;
00671 }
00672
00673
00674
00675
00676
00677
00678 Seg *bestFittingSegment(Config& config, SegCluster& cluster, MixtureGD& m, StatServer& ss, FeatureServer& fs, MixtureGD& world){
00679
00680 DoubleVector llr,starts;
00681 cluster.rewind();
00682 Seg *segment;
00683 while((segment=cluster.getSeg())!=NULL){
00684 String fileLabel=segment->sourceName();
00685 unsigned long begin=segment->begin()+fs.getFirstFeatureIndexOfASource(fileLabel);
00686
00687
00688 double normalizedFactor=meanLikelihood(ss,fs,world,begin,segment->length(),config);
00689
00690 double llrTmp = meanLikelihood(ss,fs,m,begin,segment->length(),config) - normalizedFactor;
00691 if(verbose) cout << segment->begin() << " " << endSeg(segment) << " => " << llrTmp << endl;
00692 llr.addValue(llrTmp);
00693 }
00694
00695 if(llr.size() == 0) return NULL;
00696
00697 bool goon=true;
00698 while(goon){
00699 unsigned long ind = llr.getIndexOfLargestValue();
00700 if((verbose) && (verboseLevel == 2)) cout << "Best llk " << ind << " => " << llr[ind] << endl;
00701 if(llr[ind] == -200)
00702 return NULL;
00703
00704 cluster.rewind();
00705 for (unsigned long i=0; i<=ind; i++){
00706 segment=cluster.getSeg();
00707 }
00708 if(segment->length() > 600)
00709 goon = false;
00710 else{
00711 llr[ind]=-200;
00712 }
00713 }
00714 cluster.rewind();
00715
00716 return segment;
00717 }
00718
00719
00720
00721
00722
00723
00724 Seg *bestFittingSegment(Config& config, SegCluster& cluster, MixtureGD& m, StatServer& ss, FeatureServer& fs, hmm& actualHMM,unsigned long except){
00725
00726 DoubleVector llr,starts;
00727 cluster.rewind();
00728 Seg *segment;
00729 while((segment=cluster.getSeg())!=NULL){
00730 String fileLabel=segment->sourceName();
00731 unsigned long begin=segment->begin()+fs.getFirstFeatureIndexOfASource(fileLabel);
00732
00733
00734 double normalizedFactor=cohortMaxLikelihood(ss,fs,actualHMM,except,segment->begin(),segment->length(),config);
00735
00736 double llrTmp = meanLikelihood(ss,fs,m,begin,segment->length(),config) - normalizedFactor;
00737 if(verbose) cout << segment->begin() << " " << endSeg(segment) << " => " << llrTmp << endl;
00738 llr.addValue(llrTmp);
00739 }
00740
00741 if(llr.size() == 0) return NULL;
00742
00743 bool goon=true;
00744 while(goon){
00745 unsigned long ind = llr.getIndexOfLargestValue();
00746 if((verbose) && (verboseLevel == 2)) cout << "Best llk " << ind << " => " << llr[ind] << endl;
00747 if(llr[ind] == -200)
00748 return NULL;
00749
00750 cluster.rewind();
00751 for (unsigned long i=0; i<=ind; i++){
00752 segment=cluster.getSeg();
00753 }
00754 if(segment->length() > 600)
00755 goon = false;
00756 else{
00757 llr[ind]=-200;
00758 }
00759 }
00760 cluster.rewind();
00761
00762 return segment;
00763 }
00764
00765
00766
00767
00768
00769
00770
00771 unsigned long bestFittingCluster(Config& config, hmm& actualHMM, SegServer& actualSeg, Seg *segment, StatServer& ss, FeatureServer& fs, unsigned long exceptInd=200){
00772
00773 DoubleVector llr,allInd;
00774
00775 for(unsigned long icluster=0; icluster<actualSeg.getClusterCount(); icluster++){
00776 if(icluster != exceptInd){
00777 MixtureGD& m=actualHMM.getDensity(icluster);
00778 double llrTmp = meanLikelihood(ss,fs,m,segment->begin(),segment->length(),config);
00779 if(verbose) cout << "meanLikelihood of cluster: " << icluster << " => " << llrTmp << endl;
00780 llr.addValue(llrTmp);
00781 allInd.addValue((double)icluster);
00782 }
00783 }
00784 unsigned long ind = (unsigned long)(allInd[llr.getIndexOfLargestValue()]);
00785 if(verbose) cout << "Best cluster: " << ind << endl;
00786
00787 return ind;
00788 }
00789
00790
00791
00792
00793
00794
00795 void intraCluster(Config& config, hmm& actualHMM, SegServer& actualSeg, StatServer& ss, FeatureServer& fs, MixtureGD& world, String crit){
00796 for(unsigned long icluster=0;icluster<actualSeg.getClusterCount();icluster++){
00797 SegCluster& clusterA=actualSeg.getCluster(icluster);
00798 if (verbose) cout<<" Purify cluster for speaker "<<clusterA.string()<<endl;
00799 clusterA.rewind();
00800 Seg *segment1 = bestFittingSegment(config, clusterA, actualHMM.getDensity(icluster), ss, fs,world);
00801 if ((verbose) && (verboseLevel == 2)) cout<<"Best segment "<<segment1->begin()<< " " << endSeg(segment1) << endl;
00802 Seg *segment2;
00803 while((segment2=clusterA.getSeg())!=NULL)
00804 isSimilarSegment(config, segment1, segment2, ss, fs, world, crit);
00805 }
00806 }
00807
00808
00809
00810
00811
00812
00813
00814 void interCluster(Config& config, hmm& actualHMM, SegServer& actualSeg, StatServer& ss, FeatureServer& fs, MixtureGD& world, String crit){
00815
00816 for(unsigned long icluster=0;icluster<actualSeg.getClusterCount();icluster++){
00817
00818 SegCluster& clusterA=actualSeg.getCluster(icluster);
00819 if (verbose) cout<<" Evaluation of purity inter cluster for speaker "<<clusterA.string()<<endl;
00820 clusterA.rewind();
00821
00822 Seg *segment1 = bestFittingSegment(config, clusterA, actualHMM.getDensity(icluster), ss, fs,world);
00823 if ((verbose) && (verboseLevel == 2)) cout<<"Best segment "<<segment1->begin()<< " " << endSeg(segment1) << endl;
00824 Seg *segment2;
00825
00826 for(unsigned long other=0; other<actualSeg.getClusterCount(); other++){
00827 if(other != icluster){
00828 SegCluster& clusterB=actualSeg.getCluster(other);
00829 clusterB.rewind();
00830 if (verbose) cout<<" Against speaker "<<clusterB.string()<<endl;
00831
00832 while((segment2=clusterB.getSeg())!=NULL)
00833 isSimilarSegment(config, segment1, segment2, ss, fs, world, crit);
00834 }
00835 }
00836 }
00837 }