Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 #if !defined(ALIZE_SequenceExtractor_h)
00056 #define ALIZE_SequenceExtractor_h
00057
00058 #include "alize.h"
00059 #include "LabelNGram.h"
00060 #include "BNgram.h"
00061
00062 class SymbTab{
00063 short int _nbSymb;
00064 bool *_symbTab;
00065 public:
00066 void init(bool val=false){for (int i=0;i<_nbSymb;_symbTab[i++]=val);}
00067 void reserveMem();
00068 void setNbSymb(short int nbSymb){_nbSymb=nbSymb;}
00069 SymbTab(short int nbSymb){_nbSymb=nbSymb;reserveMem();init(false);}
00070 SymbTab(){_nbSymb=0;_symbTab=NULL;}
00071 SymbTab(const SymbTab &);
00072 ~SymbTab(){if (_symbTab!=NULL) delete [] _symbTab;};
00073 SymbTab& operator =(const SymbTab & symbTab);
00074 bool operator==(const SymbTab &);
00075 bool operator!=(const SymbTab &);
00076 bool isIn(short int symb);
00077 void setSymb(short int symb);
00078 short int getNbSymb(){return _nbSymb;}
00079 void show();
00080 };
00081
00082 class ReadMemory{
00083 ifstream _inputFile;
00084 unsigned long _idx;
00085 unsigned long _realIdx;
00086 short int *_buf;
00087 int _bufSize;
00088 unsigned long _begin;
00089 unsigned long _length;
00090 bool _segmental;
00091 bool readSymb(short int &, string &);
00092 public:
00093 ReadMemory(String,int,unsigned long,unsigned long);
00094 ~ReadMemory();
00095 bool notEof();
00096 bool eof();
00097 bool lecture(bool);
00098 unsigned long getIdx();
00099 short int getCurrentSymb();
00100 void setIdx(unsigned long idx);
00101 };
00102
00103 class Seq{
00104 short int _maxLength;
00105 short int _length;
00106 short int _nbInputSymb;
00107 SymbTab *_array;
00108 void _reserveMem();
00109 public:
00110 Seq(short int,short int);
00111 Seq(const Seq&);
00112 ~Seq();
00113 Seq & operator=(const Seq &);
00114 SymbTab &operator[](short int);
00115 void setLength(short int order);
00116 short int getLength();
00117 void init(short int);
00118 void show();
00119 void add(SymbTab *ptr);
00120
00121 };
00122
00123 struct CommonPartTreeNode{
00124 short int symb;
00125 unsigned long count;
00126 unsigned long totalChildCount;
00127 CommonPartTreeNode *ch;
00128 CommonPartTreeNode *br;
00129 };
00130 class CommonPartTree{
00131 CommonPartTreeNode * _seed;
00132 unsigned long _totalCount;
00133 unsigned long _totalChildCount;
00134 void _freeTree(CommonPartTreeNode* seed);
00135 CommonPartTreeNode* _newNode(const short int symb,const unsigned long count,CommonPartTreeNode *ch,CommonPartTreeNode *br);
00136 CommonPartTreeNode* _findInsert(const short int symb,unsigned long count,CommonPartTreeNode * ptr);
00137 unsigned long _findMaxSeq(CommonPartTreeNode *ptr,Seq &seq, short int &);
00138 CommonPartTreeNode* _suppressSeq(CommonPartTreeNode * ptr,Seq &seq,
00139 short int order, unsigned long &childCountDelta);
00140 void _show(CommonPartTreeNode *ptr,unsigned long order);
00141 CommonPartTreeNode* _findPartSeq(Seq &,short int,CommonPartTreeNode *);
00142 public:
00143 CommonPartTree();
00144 CommonPartTree(const CommonPartTree &);
00145 CommonPartTree operator =(const CommonPartTree &);
00146 ~CommonPartTree();
00147 void addNGram(NGram& nGram);
00148 unsigned long findMaxSeq(Seq&);
00149 unsigned long findMaxEndSeq(Seq&);
00150 void suppressSeq(Seq &seq);
00151 unsigned long getTotalChildCount(){return _totalChildCount;}
00152 void setTotalCount(unsigned long total){_totalCount=total;}
00153 unsigned long getTotalCount(){return _totalCount;}
00154 void show();
00155 };
00156
00157
00158
00159 struct SequenceDecoderNode{
00160 SymbTab *symbTab;
00161 short int outputSymb;
00162 SequenceDecoderNode *ch;
00163 SequenceDecoderNode *br;
00164 };
00165
00166 class SequenceDecoder{
00167 short int _nbInputSymb;
00168 unsigned long _nbOutputSeqPart;
00169 unsigned long _nbOutputSeq;
00170 SequenceDecoderNode *_seed;
00171 void _freeTree(SequenceDecoderNode* seed);
00172 SequenceDecoderNode* _newNode(const SymbTab &symbTab,const short int outputSymb,SequenceDecoderNode *ch,SequenceDecoderNode *br);
00173 SequenceDecoderNode* _findInsert(const SymbTab &symbTab,SequenceDecoderNode *ptr);
00174 void _show(SequenceDecoderNode *,short int);
00175 void _toFile(SequenceDecoderNode *,short int,Seq &,ostream &);
00176 void _save(SequenceDecoderNode *,ostream & );
00177 SequenceDecoderNode *_load(istream &);
00178 bool _decode(SequenceDecoderNode *ptr,ReadMemory & inp,ostream & outputFile,unsigned long &begin,short int &);
00179 public:
00180 SequenceDecoder(short int);
00181 SequenceDecoder(const SequenceDecoder &);
00182 SequenceDecoder operator =(const SequenceDecoder &);
00183 ~SequenceDecoder();
00184 short int getNbInputSymb(){return _nbInputSymb;}
00185 short int getNbOutputSeq(){return (short)_nbOutputSeq;}
00186 void setNbOutputSeq(unsigned long nb){_nbOutputSeq=nb;}
00187 unsigned long getNbOutputSeqPart(){return _nbOutputSeqPart;}
00188 void addSequence(Seq &, const short int);
00189 void show();
00190 void toFile(ostream &);
00191 void save(ostream &);
00192 void load(istream &);
00193 void decode(String,ostream &,unsigned long,unsigned long,bool,bool,BNgram &);
00194 };
00195
00196
00197 int sequenceExtractor(alize::Config&);
00198 int sequenceDecoder(alize::Config &);
00199
00200 #endif //