Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 #if !defined(ALIZE_BNgram_cpp)
00056 #define ALIZE_BNgram_cpp
00057
00058 #include <iostream>
00059 #include <fstream>
00060 #include <cstdio>
00061 #include <cassert>
00062 #include <cmath>
00063 #include <liatools.h>
00064 #include "BNgram.h"
00065
00066
00067
00068
00069
00070 BNgramNode * BNgram::_newNode(short int symb,BNgramNode*br=NULL){
00071 BNgramNode * ptr=new BNgramNode;
00072 ptr->child=new BNgramNode* [_nbElt];
00073 ptr->count=new unsigned long [_nbElt];
00074 for (short int i=0;i<_nbElt;i++){ptr->child[i]=NULL;ptr->count[i]=0;}
00075 ptr->idx=(symb/_nbElt)*_nbElt;
00076 ptr->brother=br;
00077 return ptr;
00078 }
00079 BNgramNode * BNgram::_findInsert(BNgramNode* ptr,short int idx){
00080
00081 if (idx<(ptr->idx+_nbElt))return ptr;
00082 while ((ptr->brother!=NULL) && (idx>=(ptr->brother->idx+_nbElt))) ptr=ptr->brother;
00083 if (ptr->brother==NULL) {ptr->brother=_newNode(idx); return ptr->brother;}
00084 if ((idx>=ptr->brother->idx)&& (idx<ptr->brother->idx+_nbElt)) return ptr->brother;
00085 ptr->brother=_newNode(idx,ptr->brother); return ptr->brother;
00086 }
00087 void BNgram::_add(short int symb,bool first,short int nb=1){
00088
00089
00090 static BNgramNode* seed;
00091 static short int level;
00092 if (first) {
00093 seed=_seed;
00094 level=0;
00095 _totalCount++;
00096 }
00097 if (debug) cout <<"add symb["<<symb<<"] Level["<<level<<"]"<<endl;
00098
00099 seed=_findInsert(seed,symb);
00100 seed->count[symb%_nbElt]+=nb;
00101
00102
00103 if (seed->child[symb%_nbElt]==NULL) seed->child[symb%_nbElt]=_newNode(0);
00104 seed=seed->child[symb%_nbElt];
00105 level++;
00106 }
00107 void BNgram::_freeTree(BNgramNode *seed){
00108 if (seed==NULL) return;
00109 for (short int i=0;i<_nbElt;i++)_freeTree(seed->child[i]);
00110 delete [] seed->child;
00111 delete [] seed->count;
00112 delete seed;
00113 }
00114 void BNgram::_show(BNgramNode * seed,String ngramS="",ostream & str=cout){
00115 if (seed==NULL) return;
00116 for (BNgramNode* ptr=seed;(ptr!=NULL);ptr=ptr->brother)
00117 for (short int i=0;i<_maxLength;i++)
00118 if (ptr->count[i]!=0){
00119 String newNgramS=ngramS+" "+String::valueOf(ptr->idx+i);
00120 str<<newNgramS<<" "<<ptr->count[i]<<endl;
00121 _show(ptr->child[i],newNgramS,str);
00122 }
00123 }
00124
00125 BNgram::BNgram(unsigned long maxLength,unsigned long nbElementByNode=100){
00126 _maxLength=maxLength;
00127 _totalCount=0;
00128
00129 _circArray=new unsigned long [_maxLength];
00130 _readIdx=0;
00131 _nbElt=nbElementByNode;
00132 _seed=_newNode(0);
00133 }
00134 BNgram::~BNgram(){
00135 delete [] _circArray;
00136 _freeTree(_seed);
00137 }
00138 void BNgram::beginSeq(short int symb,short int nb){
00139 _readIdx=0;
00140 _circArray[_readIdx++]=symb;
00141 }
00142 void BNgram::addSymb(short int symb,short int nb){
00143 _circArray[_readIdx%_maxLength]=symb;
00144 if (_readIdx>=(unsigned long) (_maxLength-1)){
00145 _add(_circArray[(_readIdx-(_maxLength-1))%_maxLength],true,nb);
00146 for (unsigned long i=(_readIdx-(_maxLength-1))+1;i<=_readIdx;i++)
00147 _add(_circArray[i%_maxLength],false,nb);
00148 }
00149 _readIdx++;
00150 }
00151 void BNgram::endSeq(short int nb){
00152 for (unsigned long length=_maxLength-1;length>0;length--)
00153 if (_readIdx>=length){
00154 _add(_circArray[(_readIdx-length)%_maxLength],true,nb);
00155 for (unsigned long i=(_readIdx-length)+1;i<_readIdx;i++)
00156 _add(_circArray[i%_maxLength],false,nb);
00157 }
00158 }
00159
00160 void BNgram::show(ostream & str=cout){
00161 String ngramS="";
00162 str<<"symbol total count["<<_totalCount<<"]"<<endl;
00163 _show(_seed,ngramS,str);
00164 }
00165
00166
00167 void testBNgram(Config &config){
00168 short int order=config.getParam("maxOrder").toLong();
00169 short int symb=-1;
00170 BNgram ngram(order,2);
00171 cin>>symb;
00172
00173 bool begin=true;
00174 while (symb!=-1){
00175 if (begin){
00176 ngram.beginSeq(symb);
00177 begin=false;
00178 }
00179 else ngram.addSymb(symb);
00180 cin>>symb;
00181 }
00182 ngram.endSeq();
00183 ngram.show();
00184 }
00185 #endif