00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008
00009 #include "libsvm.h"
00010
00011 int libsvm_version = LIBSVM_VERSION;
00012 typedef float Qfloat;
00013 typedef signed char schar;
00014 #ifndef min
00015 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
00016 #endif
00017 #ifndef max
00018 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
00019 #endif
00020 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00021 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
00022 {
00023 dst = new T[n];
00024 memcpy((void *)dst,(void *)src,sizeof(T)*n);
00025 }
00026 static inline double powi(double base, int times)
00027 {
00028 double tmp = base, ret = 1.0;
00029
00030 for(int t=times; t>0; t/=2)
00031 {
00032 if(t%2==1) ret*=tmp;
00033 tmp = tmp * tmp;
00034 }
00035 return ret;
00036 }
00037 #define INF HUGE_VAL
00038 #define TAU 1e-12
00039 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00040
00041 static void print_string_stdout(const char *s)
00042 {
00043 fputs(s,stdout);
00044 fflush(stdout);
00045 }
00046 static void (*svm_print_string) (const char *) = &print_string_stdout;
00047 #if 1
00048 static void info(const char *fmt,...)
00049 {
00050 char buf[BUFSIZ];
00051 va_list ap;
00052 va_start(ap,fmt);
00053 vsprintf(buf,fmt,ap);
00054 va_end(ap);
00055 (*svm_print_string)(buf);
00056 }
00057 #else
00058 static void info(const char *fmt,...) {}
00059 #endif
00060
00061
00062
00063
00064
00065
00066
00067 class Cache
00068 {
00069 public:
00070 Cache(int l,long int size);
00071 ~Cache();
00072
00073
00074
00075
00076 int get_data(const int index, Qfloat **data, int len);
00077 void swap_index(int i, int j);
00078 private:
00079 int l;
00080 long int size;
00081 struct head_t
00082 {
00083 head_t *prev, *next;
00084 Qfloat *data;
00085 int len;
00086 };
00087
00088 head_t *head;
00089 head_t lru_head;
00090 void lru_delete(head_t *h);
00091 void lru_insert(head_t *h);
00092 };
00093
00094 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00095 {
00096 head = (head_t *)calloc(l,sizeof(head_t));
00097 size /= sizeof(Qfloat);
00098 size -= l * sizeof(head_t) / sizeof(Qfloat);
00099 size = max(size, 2 * (long int) l);
00100 lru_head.next = lru_head.prev = &lru_head;
00101 }
00102
00103 Cache::~Cache()
00104 {
00105 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00106 free(h->data);
00107 free(head);
00108 }
00109
00110 void Cache::lru_delete(head_t *h)
00111 {
00112
00113 h->prev->next = h->next;
00114 h->next->prev = h->prev;
00115 }
00116
00117 void Cache::lru_insert(head_t *h)
00118 {
00119
00120 h->next = &lru_head;
00121 h->prev = lru_head.prev;
00122 h->prev->next = h;
00123 h->next->prev = h;
00124 }
00125
00126 int Cache::get_data(const int index, Qfloat **data, int len)
00127 {
00128 head_t *h = &head[index];
00129 if(h->len) lru_delete(h);
00130 int more = len - h->len;
00131
00132 if(more > 0)
00133 {
00134
00135 while(size < more)
00136 {
00137 head_t *old = lru_head.next;
00138 lru_delete(old);
00139 free(old->data);
00140 size += old->len;
00141 old->data = 0;
00142 old->len = 0;
00143 }
00144
00145
00146 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00147 size -= more;
00148 swap(h->len,len);
00149 }
00150
00151 lru_insert(h);
00152 *data = h->data;
00153 return len;
00154 }
00155
00156 void Cache::swap_index(int i, int j)
00157 {
00158 if(i==j) return;
00159
00160 if(head[i].len) lru_delete(&head[i]);
00161 if(head[j].len) lru_delete(&head[j]);
00162 swap(head[i].data,head[j].data);
00163 swap(head[i].len,head[j].len);
00164 if(head[i].len) lru_insert(&head[i]);
00165 if(head[j].len) lru_insert(&head[j]);
00166
00167 if(i>j) swap(i,j);
00168 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00169 {
00170 if(h->len > i)
00171 {
00172 if(h->len > j)
00173 swap(h->data[i],h->data[j]);
00174 else
00175 {
00176
00177 lru_delete(h);
00178 free(h->data);
00179 size += h->len;
00180 h->data = 0;
00181 h->len = 0;
00182 }
00183 }
00184 }
00185 }
00186
00187
00188
00189
00190
00191
00192
00193
00194 class QMatrix {
00195 public:
00196 virtual Qfloat *get_Q(int column, int len) const = 0;
00197 virtual double *get_QD() const = 0;
00198 virtual void swap_index(int i, int j) const = 0;
00199 virtual ~QMatrix() {}
00200 };
00201
00202 class Kernel: public QMatrix {
00203 public:
00204 Kernel(int l, svm_node * const * x, const svm_parameter& param);
00205 virtual ~Kernel();
00206
00207 static double k_function(const svm_node *x, const svm_node *y,
00208 const svm_parameter& param);
00209 virtual Qfloat *get_Q(int column, int len) const = 0;
00210 virtual double *get_QD() const = 0;
00211 virtual void swap_index(int i, int j) const
00212 {
00213 swap(x[i],x[j]);
00214 if(x_square) swap(x_square[i],x_square[j]);
00215 }
00216 protected:
00217
00218 double (Kernel::*kernel_function)(int i, int j) const;
00219
00220 private:
00221 const svm_node **x;
00222 double *x_square;
00223
00224
00225 const int kernel_type;
00226 const int degree;
00227 const double gamma;
00228 const double coef0;
00229
00230 static double dot(const svm_node *px, const svm_node *py);
00231 double kernel_linear(int i, int j) const
00232 {
00233 return dot(x[i],x[j]);
00234 }
00235 double kernel_poly(int i, int j) const
00236 {
00237 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00238 }
00239 double kernel_rbf(int i, int j) const
00240 {
00241 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00242 }
00243 double kernel_sigmoid(int i, int j) const
00244 {
00245 return tanh(gamma*dot(x[i],x[j])+coef0);
00246 }
00247 double kernel_precomputed(int i, int j) const
00248 {
00249 return x[i][(int)(x[j][0].value)].value;
00250 }
00251 };
00252
00253 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00254 :kernel_type(param.kernel_type), degree(param.degree),
00255 gamma(param.gamma), coef0(param.coef0)
00256 {
00257 switch(kernel_type)
00258 {
00259 case LINEAR:
00260 kernel_function = &Kernel::kernel_linear;
00261 break;
00262 case POLY:
00263 kernel_function = &Kernel::kernel_poly;
00264 break;
00265 case RBF:
00266 kernel_function = &Kernel::kernel_rbf;
00267 break;
00268 case SIGMOID:
00269 kernel_function = &Kernel::kernel_sigmoid;
00270 break;
00271 case PRECOMPUTED:
00272 kernel_function = &Kernel::kernel_precomputed;
00273 break;
00274 }
00275
00276 clone(x,x_,l);
00277
00278 if(kernel_type == RBF)
00279 {
00280 x_square = new double[l];
00281 for(int i=0;i<l;i++)
00282 x_square[i] = dot(x[i],x[i]);
00283 }
00284 else
00285 x_square = 0;
00286 }
00287
00288 Kernel::~Kernel()
00289 {
00290 delete[] x;
00291 delete[] x_square;
00292 }
00293
00294 double Kernel::dot(const svm_node *px, const svm_node *py)
00295 {
00296 double sum = 0;
00297 while(px->index != -1 && py->index != -1)
00298 {
00299 if(px->index == py->index)
00300 {
00301 sum += px->value * py->value;
00302 ++px;
00303 ++py;
00304 }
00305 else
00306 {
00307 if(px->index > py->index)
00308 ++py;
00309 else
00310 ++px;
00311 }
00312 }
00313 return sum;
00314 }
00315
00316 double Kernel::k_function(const svm_node *x, const svm_node *y,
00317 const svm_parameter& param)
00318 {
00319 switch(param.kernel_type)
00320 {
00321 case LINEAR:
00322 return dot(x,y);
00323 case POLY:
00324 return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00325 case RBF:
00326 {
00327 double sum = 0;
00328 while(x->index != -1 && y->index !=-1)
00329 {
00330 if(x->index == y->index)
00331 {
00332 double d = x->value - y->value;
00333 sum += d*d;
00334 ++x;
00335 ++y;
00336 }
00337 else
00338 {
00339 if(x->index > y->index)
00340 {
00341 sum += y->value * y->value;
00342 ++y;
00343 }
00344 else
00345 {
00346 sum += x->value * x->value;
00347 ++x;
00348 }
00349 }
00350 }
00351
00352 while(x->index != -1)
00353 {
00354 sum += x->value * x->value;
00355 ++x;
00356 }
00357
00358 while(y->index != -1)
00359 {
00360 sum += y->value * y->value;
00361 ++y;
00362 }
00363
00364 return exp(-param.gamma*sum);
00365 }
00366 case SIGMOID:
00367 return tanh(param.gamma*dot(x,y)+param.coef0);
00368 case PRECOMPUTED:
00369 return x[(int)(y->value)].value;
00370 default:
00371 return 0;
00372 }
00373 }
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393 class Solver {
00394 public:
00395 Solver() {};
00396 virtual ~Solver() {};
00397
00398 struct SolutionInfo {
00399 double obj;
00400 double rho;
00401 double upper_bound_p;
00402 double upper_bound_n;
00403 double r;
00404 };
00405
00406 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00407 double *alpha_, double Cp, double Cn, double eps,
00408 SolutionInfo* si, int shrinking);
00409 protected:
00410 int active_size;
00411 schar *y;
00412 double *G;
00413 enum { LOWER_BOUND, UPPER_BOUND, FREE };
00414 char *alpha_status;
00415 double *alpha;
00416 const QMatrix *Q;
00417 const double *QD;
00418 double eps;
00419 double Cp,Cn;
00420 double *p;
00421 int *active_set;
00422 double *G_bar;
00423 int l;
00424 bool unshrink;
00425
00426 double get_C(int i)
00427 {
00428 return (y[i] > 0)? Cp : Cn;
00429 }
00430 void update_alpha_status(int i)
00431 {
00432 if(alpha[i] >= get_C(i))
00433 alpha_status[i] = UPPER_BOUND;
00434 else if(alpha[i] <= 0)
00435 alpha_status[i] = LOWER_BOUND;
00436 else alpha_status[i] = FREE;
00437 }
00438 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00439 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00440 bool is_free(int i) { return alpha_status[i] == FREE; }
00441 void swap_index(int i, int j);
00442 void reconstruct_gradient();
00443 virtual int select_working_set(int &i, int &j);
00444 virtual double calculate_rho();
00445 virtual void do_shrinking();
00446 private:
00447 bool be_shrunk(int i, double Gmax1, double Gmax2);
00448 };
00449
00450 void Solver::swap_index(int i, int j)
00451 {
00452 Q->swap_index(i,j);
00453 swap(y[i],y[j]);
00454 swap(G[i],G[j]);
00455 swap(alpha_status[i],alpha_status[j]);
00456 swap(alpha[i],alpha[j]);
00457 swap(p[i],p[j]);
00458 swap(active_set[i],active_set[j]);
00459 swap(G_bar[i],G_bar[j]);
00460 }
00461
00462 void Solver::reconstruct_gradient()
00463 {
00464
00465
00466 if(active_size == l) return;
00467
00468 int i,j;
00469 int nr_free = 0;
00470
00471 for(j=active_size;j<l;j++)
00472 G[j] = G_bar[j] + p[j];
00473
00474 for(j=0;j<active_size;j++)
00475 if(is_free(j))
00476 nr_free++;
00477
00478 if(2*nr_free < active_size)
00479 info("\nWarning: using -h 0 may be faster\n");
00480
00481 if (nr_free*l > 2*active_size*(l-active_size))
00482 {
00483 for(i=active_size;i<l;i++)
00484 {
00485 const Qfloat *Q_i = Q->get_Q(i,active_size);
00486 for(j=0;j<active_size;j++)
00487 if(is_free(j))
00488 G[i] += alpha[j] * Q_i[j];
00489 }
00490 }
00491 else
00492 {
00493 for(i=0;i<active_size;i++)
00494 if(is_free(i))
00495 {
00496 const Qfloat *Q_i = Q->get_Q(i,l);
00497 double alpha_i = alpha[i];
00498 for(j=active_size;j<l;j++)
00499 G[j] += alpha_i * Q_i[j];
00500 }
00501 }
00502 }
00503
00504 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00505 double *alpha_, double Cp, double Cn, double eps,
00506 SolutionInfo* si, int shrinking)
00507 {
00508 this->l = l;
00509 this->Q = &Q;
00510 QD=Q.get_QD();
00511 clone(p, p_,l);
00512 clone(y, y_,l);
00513 clone(alpha,alpha_,l);
00514 this->Cp = Cp;
00515 this->Cn = Cn;
00516 this->eps = eps;
00517 unshrink = false;
00518
00519
00520 {
00521 alpha_status = new char[l];
00522 for(int i=0;i<l;i++)
00523 update_alpha_status(i);
00524 }
00525
00526
00527 {
00528 active_set = new int[l];
00529 for(int i=0;i<l;i++)
00530 active_set[i] = i;
00531 active_size = l;
00532 }
00533
00534
00535 {
00536 G = new double[l];
00537 G_bar = new double[l];
00538 int i;
00539 for(i=0;i<l;i++)
00540 {
00541 G[i] = p[i];
00542 G_bar[i] = 0;
00543 }
00544 for(i=0;i<l;i++)
00545 if(!is_lower_bound(i))
00546 {
00547 const Qfloat *Q_i = Q.get_Q(i,l);
00548 double alpha_i = alpha[i];
00549 int j;
00550 for(j=0;j<l;j++)
00551 G[j] += alpha_i*Q_i[j];
00552 if(is_upper_bound(i))
00553 for(j=0;j<l;j++)
00554 G_bar[j] += get_C(i) * Q_i[j];
00555 }
00556 }
00557
00558
00559
00560 int iter = 0;
00561 int counter = min(l,1000)+1;
00562
00563 while(1)
00564 {
00565
00566
00567 if(--counter == 0)
00568 {
00569 counter = min(l,1000);
00570 if(shrinking) do_shrinking();
00571 info(".");
00572 }
00573
00574 int i,j;
00575 if(select_working_set(i,j)!=0)
00576 {
00577
00578 reconstruct_gradient();
00579
00580 active_size = l;
00581 info("*");
00582 if(select_working_set(i,j)!=0)
00583 break;
00584 else
00585 counter = 1;
00586 }
00587
00588 ++iter;
00589
00590
00591
00592 const Qfloat *Q_i = Q.get_Q(i,active_size);
00593 const Qfloat *Q_j = Q.get_Q(j,active_size);
00594
00595 double C_i = get_C(i);
00596 double C_j = get_C(j);
00597
00598 double old_alpha_i = alpha[i];
00599 double old_alpha_j = alpha[j];
00600
00601 if(y[i]!=y[j])
00602 {
00603 double quad_coef = QD[i]+QD[j]+2*Q_i[j];
00604 if (quad_coef <= 0)
00605 quad_coef = TAU;
00606 double delta = (-G[i]-G[j])/quad_coef;
00607 double diff = alpha[i] - alpha[j];
00608 alpha[i] += delta;
00609 alpha[j] += delta;
00610
00611 if(diff > 0)
00612 {
00613 if(alpha[j] < 0)
00614 {
00615 alpha[j] = 0;
00616 alpha[i] = diff;
00617 }
00618 }
00619 else
00620 {
00621 if(alpha[i] < 0)
00622 {
00623 alpha[i] = 0;
00624 alpha[j] = -diff;
00625 }
00626 }
00627 if(diff > C_i - C_j)
00628 {
00629 if(alpha[i] > C_i)
00630 {
00631 alpha[i] = C_i;
00632 alpha[j] = C_i - diff;
00633 }
00634 }
00635 else
00636 {
00637 if(alpha[j] > C_j)
00638 {
00639 alpha[j] = C_j;
00640 alpha[i] = C_j + diff;
00641 }
00642 }
00643 }
00644 else
00645 {
00646 double quad_coef = QD[i]+QD[j]-2*Q_i[j];
00647 if (quad_coef <= 0)
00648 quad_coef = TAU;
00649 double delta = (G[i]-G[j])/quad_coef;
00650 double sum = alpha[i] + alpha[j];
00651 alpha[i] -= delta;
00652 alpha[j] += delta;
00653
00654 if(sum > C_i)
00655 {
00656 if(alpha[i] > C_i)
00657 {
00658 alpha[i] = C_i;
00659 alpha[j] = sum - C_i;
00660 }
00661 }
00662 else
00663 {
00664 if(alpha[j] < 0)
00665 {
00666 alpha[j] = 0;
00667 alpha[i] = sum;
00668 }
00669 }
00670 if(sum > C_j)
00671 {
00672 if(alpha[j] > C_j)
00673 {
00674 alpha[j] = C_j;
00675 alpha[i] = sum - C_j;
00676 }
00677 }
00678 else
00679 {
00680 if(alpha[i] < 0)
00681 {
00682 alpha[i] = 0;
00683 alpha[j] = sum;
00684 }
00685 }
00686 }
00687
00688
00689
00690 double delta_alpha_i = alpha[i] - old_alpha_i;
00691 double delta_alpha_j = alpha[j] - old_alpha_j;
00692
00693 for(int k=0;k<active_size;k++)
00694 {
00695 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00696 }
00697
00698
00699
00700 {
00701 bool ui = is_upper_bound(i);
00702 bool uj = is_upper_bound(j);
00703 update_alpha_status(i);
00704 update_alpha_status(j);
00705 int k;
00706 if(ui != is_upper_bound(i))
00707 {
00708 Q_i = Q.get_Q(i,l);
00709 if(ui)
00710 for(k=0;k<l;k++)
00711 G_bar[k] -= C_i * Q_i[k];
00712 else
00713 for(k=0;k<l;k++)
00714 G_bar[k] += C_i * Q_i[k];
00715 }
00716
00717 if(uj != is_upper_bound(j))
00718 {
00719 Q_j = Q.get_Q(j,l);
00720 if(uj)
00721 for(k=0;k<l;k++)
00722 G_bar[k] -= C_j * Q_j[k];
00723 else
00724 for(k=0;k<l;k++)
00725 G_bar[k] += C_j * Q_j[k];
00726 }
00727 }
00728 }
00729
00730
00731
00732 si->rho = calculate_rho();
00733
00734
00735 {
00736 double v = 0;
00737 int i;
00738 for(i=0;i<l;i++)
00739 v += alpha[i] * (G[i] + p[i]);
00740
00741 si->obj = v/2;
00742 }
00743
00744
00745 {
00746 for(int i=0;i<l;i++)
00747 alpha_[active_set[i]] = alpha[i];
00748 }
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758 si->upper_bound_p = Cp;
00759 si->upper_bound_n = Cn;
00760
00761 info("\noptimization finished, #iter = %d\n",iter);
00762
00763 delete[] p;
00764 delete[] y;
00765 delete[] alpha;
00766 delete[] alpha_status;
00767 delete[] active_set;
00768 delete[] G;
00769 delete[] G_bar;
00770 }
00771
00772
00773 int Solver::select_working_set(int &out_i, int &out_j)
00774 {
00775
00776
00777
00778
00779
00780
00781 double Gmax = -INF;
00782 double Gmax2 = -INF;
00783 int Gmax_idx = -1;
00784 int Gmin_idx = -1;
00785 double obj_diff_min = INF;
00786
00787 for(int t=0;t<active_size;t++)
00788 if(y[t]==+1)
00789 {
00790 if(!is_upper_bound(t))
00791 if(-G[t] >= Gmax)
00792 {
00793 Gmax = -G[t];
00794 Gmax_idx = t;
00795 }
00796 }
00797 else
00798 {
00799 if(!is_lower_bound(t))
00800 if(G[t] >= Gmax)
00801 {
00802 Gmax = G[t];
00803 Gmax_idx = t;
00804 }
00805 }
00806
00807 int i = Gmax_idx;
00808 const Qfloat *Q_i = NULL;
00809 if(i != -1)
00810 Q_i = Q->get_Q(i,active_size);
00811
00812 for(int j=0;j<active_size;j++)
00813 {
00814 if(y[j]==+1)
00815 {
00816 if (!is_lower_bound(j))
00817 {
00818 double grad_diff=Gmax+G[j];
00819 if (G[j] >= Gmax2)
00820 Gmax2 = G[j];
00821 if (grad_diff > 0)
00822 {
00823 double obj_diff;
00824 double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
00825 if (quad_coef > 0)
00826 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00827 else
00828 obj_diff = -(grad_diff*grad_diff)/TAU;
00829
00830 if (obj_diff <= obj_diff_min)
00831 {
00832 Gmin_idx=j;
00833 obj_diff_min = obj_diff;
00834 }
00835 }
00836 }
00837 }
00838 else
00839 {
00840 if (!is_upper_bound(j))
00841 {
00842 double grad_diff= Gmax-G[j];
00843 if (-G[j] >= Gmax2)
00844 Gmax2 = -G[j];
00845 if (grad_diff > 0)
00846 {
00847 double obj_diff;
00848 double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
00849 if (quad_coef > 0)
00850 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00851 else
00852 obj_diff = -(grad_diff*grad_diff)/TAU;
00853
00854 if (obj_diff <= obj_diff_min)
00855 {
00856 Gmin_idx=j;
00857 obj_diff_min = obj_diff;
00858 }
00859 }
00860 }
00861 }
00862 }
00863
00864 if(Gmax+Gmax2 < eps)
00865 return 1;
00866
00867 out_i = Gmax_idx;
00868 out_j = Gmin_idx;
00869 return 0;
00870 }
00871
00872 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
00873 {
00874 if(is_upper_bound(i))
00875 {
00876 if(y[i]==+1)
00877 return(-G[i] > Gmax1);
00878 else
00879 return(-G[i] > Gmax2);
00880 }
00881 else if(is_lower_bound(i))
00882 {
00883 if(y[i]==+1)
00884 return(G[i] > Gmax2);
00885 else
00886 return(G[i] > Gmax1);
00887 }
00888 else
00889 return(false);
00890 }
00891
00892 void Solver::do_shrinking()
00893 {
00894 int i;
00895 double Gmax1 = -INF;
00896 double Gmax2 = -INF;
00897
00898
00899 for(i=0;i<active_size;i++)
00900 {
00901 if(y[i]==+1)
00902 {
00903 if(!is_upper_bound(i))
00904 {
00905 if(-G[i] >= Gmax1)
00906 Gmax1 = -G[i];
00907 }
00908 if(!is_lower_bound(i))
00909 {
00910 if(G[i] >= Gmax2)
00911 Gmax2 = G[i];
00912 }
00913 }
00914 else
00915 {
00916 if(!is_upper_bound(i))
00917 {
00918 if(-G[i] >= Gmax2)
00919 Gmax2 = -G[i];
00920 }
00921 if(!is_lower_bound(i))
00922 {
00923 if(G[i] >= Gmax1)
00924 Gmax1 = G[i];
00925 }
00926 }
00927 }
00928
00929 if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
00930 {
00931 unshrink = true;
00932 reconstruct_gradient();
00933 active_size = l;
00934 info("*");
00935 }
00936
00937 for(i=0;i<active_size;i++)
00938 if (be_shrunk(i, Gmax1, Gmax2))
00939 {
00940 active_size--;
00941 while (active_size > i)
00942 {
00943 if (!be_shrunk(active_size, Gmax1, Gmax2))
00944 {
00945 swap_index(i,active_size);
00946 break;
00947 }
00948 active_size--;
00949 }
00950 }
00951 }
00952
00953 double Solver::calculate_rho()
00954 {
00955 double r;
00956 int nr_free = 0;
00957 double ub = INF, lb = -INF, sum_free = 0;
00958 for(int i=0;i<active_size;i++)
00959 {
00960 double yG = y[i]*G[i];
00961
00962 if(is_upper_bound(i))
00963 {
00964 if(y[i]==-1)
00965 ub = min(ub,yG);
00966 else
00967 lb = max(lb,yG);
00968 }
00969 else if(is_lower_bound(i))
00970 {
00971 if(y[i]==+1)
00972 ub = min(ub,yG);
00973 else
00974 lb = max(lb,yG);
00975 }
00976 else
00977 {
00978 ++nr_free;
00979 sum_free += yG;
00980 }
00981 }
00982
00983 if(nr_free>0)
00984 r = sum_free/nr_free;
00985 else
00986 r = (ub+lb)/2;
00987
00988 return r;
00989 }
00990
00991
00992
00993
00994
00995
00996 class Solver_NU : public Solver
00997 {
00998 public:
00999 Solver_NU() {}
01000 void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01001 double *alpha, double Cp, double Cn, double eps,
01002 SolutionInfo* si, int shrinking)
01003 {
01004 this->si = si;
01005 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01006 }
01007 private:
01008 SolutionInfo *si;
01009 int select_working_set(int &i, int &j);
01010 double calculate_rho();
01011 bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01012 void do_shrinking();
01013 };
01014
01015
01016 int Solver_NU::select_working_set(int &out_i, int &out_j)
01017 {
01018
01019
01020
01021
01022
01023
01024 double Gmaxp = -INF;
01025 double Gmaxp2 = -INF;
01026 int Gmaxp_idx = -1;
01027
01028 double Gmaxn = -INF;
01029 double Gmaxn2 = -INF;
01030 int Gmaxn_idx = -1;
01031
01032 int Gmin_idx = -1;
01033 double obj_diff_min = INF;
01034
01035 for(int t=0;t<active_size;t++)
01036 if(y[t]==+1)
01037 {
01038 if(!is_upper_bound(t))
01039 if(-G[t] >= Gmaxp)
01040 {
01041 Gmaxp = -G[t];
01042 Gmaxp_idx = t;
01043 }
01044 }
01045 else
01046 {
01047 if(!is_lower_bound(t))
01048 if(G[t] >= Gmaxn)
01049 {
01050 Gmaxn = G[t];
01051 Gmaxn_idx = t;
01052 }
01053 }
01054
01055 int ip = Gmaxp_idx;
01056 int in = Gmaxn_idx;
01057 const Qfloat *Q_ip = NULL;
01058 const Qfloat *Q_in = NULL;
01059 if(ip != -1)
01060 Q_ip = Q->get_Q(ip,active_size);
01061 if(in != -1)
01062 Q_in = Q->get_Q(in,active_size);
01063
01064 for(int j=0;j<active_size;j++)
01065 {
01066 if(y[j]==+1)
01067 {
01068 if (!is_lower_bound(j))
01069 {
01070 double grad_diff=Gmaxp+G[j];
01071 if (G[j] >= Gmaxp2)
01072 Gmaxp2 = G[j];
01073 if (grad_diff > 0)
01074 {
01075 double obj_diff;
01076 double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
01077 if (quad_coef > 0)
01078 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01079 else
01080 obj_diff = -(grad_diff*grad_diff)/TAU;
01081
01082 if (obj_diff <= obj_diff_min)
01083 {
01084 Gmin_idx=j;
01085 obj_diff_min = obj_diff;
01086 }
01087 }
01088 }
01089 }
01090 else
01091 {
01092 if (!is_upper_bound(j))
01093 {
01094 double grad_diff=Gmaxn-G[j];
01095 if (-G[j] >= Gmaxn2)
01096 Gmaxn2 = -G[j];
01097 if (grad_diff > 0)
01098 {
01099 double obj_diff;
01100 double quad_coef = QD[in]+QD[j]-2*Q_in[j];
01101 if (quad_coef > 0)
01102 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01103 else
01104 obj_diff = -(grad_diff*grad_diff)/TAU;
01105
01106 if (obj_diff <= obj_diff_min)
01107 {
01108 Gmin_idx=j;
01109 obj_diff_min = obj_diff;
01110 }
01111 }
01112 }
01113 }
01114 }
01115
01116 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01117 return 1;
01118
01119 if (y[Gmin_idx] == +1)
01120 out_i = Gmaxp_idx;
01121 else
01122 out_i = Gmaxn_idx;
01123 out_j = Gmin_idx;
01124
01125 return 0;
01126 }
01127
01128 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01129 {
01130 if(is_upper_bound(i))
01131 {
01132 if(y[i]==+1)
01133 return(-G[i] > Gmax1);
01134 else
01135 return(-G[i] > Gmax4);
01136 }
01137 else if(is_lower_bound(i))
01138 {
01139 if(y[i]==+1)
01140 return(G[i] > Gmax2);
01141 else
01142 return(G[i] > Gmax3);
01143 }
01144 else
01145 return(false);
01146 }
01147
01148 void Solver_NU::do_shrinking()
01149 {
01150 double Gmax1 = -INF;
01151 double Gmax2 = -INF;
01152 double Gmax3 = -INF;
01153 double Gmax4 = -INF;
01154
01155
01156 int i;
01157 for(i=0;i<active_size;i++)
01158 {
01159 if(!is_upper_bound(i))
01160 {
01161 if(y[i]==+1)
01162 {
01163 if(-G[i] > Gmax1) Gmax1 = -G[i];
01164 }
01165 else if(-G[i] > Gmax4) Gmax4 = -G[i];
01166 }
01167 if(!is_lower_bound(i))
01168 {
01169 if(y[i]==+1)
01170 {
01171 if(G[i] > Gmax2) Gmax2 = G[i];
01172 }
01173 else if(G[i] > Gmax3) Gmax3 = G[i];
01174 }
01175 }
01176
01177 if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
01178 {
01179 unshrink = true;
01180 reconstruct_gradient();
01181 active_size = l;
01182 }
01183
01184 for(i=0;i<active_size;i++)
01185 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01186 {
01187 active_size--;
01188 while (active_size > i)
01189 {
01190 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01191 {
01192 swap_index(i,active_size);
01193 break;
01194 }
01195 active_size--;
01196 }
01197 }
01198 }
01199
01200 double Solver_NU::calculate_rho()
01201 {
01202 int nr_free1 = 0,nr_free2 = 0;
01203 double ub1 = INF, ub2 = INF;
01204 double lb1 = -INF, lb2 = -INF;
01205 double sum_free1 = 0, sum_free2 = 0;
01206
01207 for(int i=0;i<active_size;i++)
01208 {
01209 if(y[i]==+1)
01210 {
01211 if(is_upper_bound(i))
01212 lb1 = max(lb1,G[i]);
01213 else if(is_lower_bound(i))
01214 ub1 = min(ub1,G[i]);
01215 else
01216 {
01217 ++nr_free1;
01218 sum_free1 += G[i];
01219 }
01220 }
01221 else
01222 {
01223 if(is_upper_bound(i))
01224 lb2 = max(lb2,G[i]);
01225 else if(is_lower_bound(i))
01226 ub2 = min(ub2,G[i]);
01227 else
01228 {
01229 ++nr_free2;
01230 sum_free2 += G[i];
01231 }
01232 }
01233 }
01234
01235 double r1,r2;
01236 if(nr_free1 > 0)
01237 r1 = sum_free1/nr_free1;
01238 else
01239 r1 = (ub1+lb1)/2;
01240
01241 if(nr_free2 > 0)
01242 r2 = sum_free2/nr_free2;
01243 else
01244 r2 = (ub2+lb2)/2;
01245
01246 si->r = (r1+r2)/2;
01247 return (r1-r2)/2;
01248 }
01249
01250
01251
01252
01253 class SVC_Q: public Kernel
01254 {
01255 public:
01256 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01257 :Kernel(prob.l, prob.x, param)
01258 {
01259 clone(y,y_,prob.l);
01260 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01261 QD = new double[prob.l];
01262 for(int i=0;i<prob.l;i++)
01263 QD[i] = (this->*kernel_function)(i,i);
01264 }
01265
01266 Qfloat *get_Q(int i, int len) const
01267 {
01268 Qfloat *data;
01269 int start, j;
01270 if((start = cache->get_data(i,&data,len)) < len)
01271 {
01272 for(j=start;j<len;j++)
01273 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01274 }
01275 return data;
01276 }
01277
01278 double *get_QD() const
01279 {
01280 return QD;
01281 }
01282
01283 void swap_index(int i, int j) const
01284 {
01285 cache->swap_index(i,j);
01286 Kernel::swap_index(i,j);
01287 swap(y[i],y[j]);
01288 swap(QD[i],QD[j]);
01289 }
01290
01291 ~SVC_Q()
01292 {
01293 delete[] y;
01294 delete cache;
01295 delete[] QD;
01296 }
01297 private:
01298 schar *y;
01299 Cache *cache;
01300 double *QD;
01301 };
01302
01303 class ONE_CLASS_Q: public Kernel
01304 {
01305 public:
01306 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01307 :Kernel(prob.l, prob.x, param)
01308 {
01309 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01310 QD = new double[prob.l];
01311 for(int i=0;i<prob.l;i++)
01312 QD[i] = (this->*kernel_function)(i,i);
01313 }
01314
01315 Qfloat *get_Q(int i, int len) const
01316 {
01317 Qfloat *data;
01318 int start, j;
01319 if((start = cache->get_data(i,&data,len)) < len)
01320 {
01321 for(j=start;j<len;j++)
01322 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01323 }
01324 return data;
01325 }
01326
01327 double *get_QD() const
01328 {
01329 return QD;
01330 }
01331
01332 void swap_index(int i, int j) const
01333 {
01334 cache->swap_index(i,j);
01335 Kernel::swap_index(i,j);
01336 swap(QD[i],QD[j]);
01337 }
01338
01339 ~ONE_CLASS_Q()
01340 {
01341 delete cache;
01342 delete[] QD;
01343 }
01344 private:
01345 Cache *cache;
01346 double *QD;
01347 };
01348
01349 class SVR_Q: public Kernel
01350 {
01351 public:
01352 SVR_Q(const svm_problem& prob, const svm_parameter& param)
01353 :Kernel(prob.l, prob.x, param)
01354 {
01355 l = prob.l;
01356 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01357 QD = new double[2*l];
01358 sign = new schar[2*l];
01359 index = new int[2*l];
01360 for(int k=0;k<l;k++)
01361 {
01362 sign[k] = 1;
01363 sign[k+l] = -1;
01364 index[k] = k;
01365 index[k+l] = k;
01366 QD[k] = (this->*kernel_function)(k,k);
01367 QD[k+l] = QD[k];
01368 }
01369 buffer[0] = new Qfloat[2*l];
01370 buffer[1] = new Qfloat[2*l];
01371 next_buffer = 0;
01372 }
01373
01374 void swap_index(int i, int j) const
01375 {
01376 swap(sign[i],sign[j]);
01377 swap(index[i],index[j]);
01378 swap(QD[i],QD[j]);
01379 }
01380
01381 Qfloat *get_Q(int i, int len) const
01382 {
01383 Qfloat *data;
01384 int j, real_i = index[i];
01385 if(cache->get_data(real_i,&data,l) < l)
01386 {
01387 for(j=0;j<l;j++)
01388 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01389 }
01390
01391
01392 Qfloat *buf = buffer[next_buffer];
01393 next_buffer = 1 - next_buffer;
01394 schar si = sign[i];
01395 for(j=0;j<len;j++)
01396 buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
01397 return buf;
01398 }
01399
01400 double *get_QD() const
01401 {
01402 return QD;
01403 }
01404
01405 ~SVR_Q()
01406 {
01407 delete cache;
01408 delete[] sign;
01409 delete[] index;
01410 delete[] buffer[0];
01411 delete[] buffer[1];
01412 delete[] QD;
01413 }
01414 private:
01415 int l;
01416 Cache *cache;
01417 schar *sign;
01418 int *index;
01419 mutable int next_buffer;
01420 Qfloat *buffer[2];
01421 double *QD;
01422 };
01423
01424
01425
01426
01427 static void solve_c_svc(
01428 const svm_problem *prob, const svm_parameter* param,
01429 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01430 {
01431 int l = prob->l;
01432 double *minus_ones = new double[l];
01433 schar *y = new schar[l];
01434
01435 int i;
01436
01437 for(i=0;i<l;i++)
01438 {
01439 alpha[i] = 0;
01440 minus_ones[i] = -1;
01441 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
01442 }
01443
01444 Solver s;
01445 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01446 alpha, Cp, Cn, param->eps, si, param->shrinking);
01447
01448 double sum_alpha=0;
01449 for(i=0;i<l;i++)
01450 sum_alpha += alpha[i];
01451
01452 if (Cp==Cn)
01453 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01454
01455 for(i=0;i<l;i++)
01456 alpha[i] *= y[i];
01457
01458 delete[] minus_ones;
01459 delete[] y;
01460 }
01461
01462 static void solve_nu_svc(
01463 const svm_problem *prob, const svm_parameter *param,
01464 double *alpha, Solver::SolutionInfo* si)
01465 {
01466 int i;
01467 int l = prob->l;
01468 double nu = param->nu;
01469
01470 schar *y = new schar[l];
01471
01472 for(i=0;i<l;i++)
01473 if(prob->y[i]>0)
01474 y[i] = +1;
01475 else
01476 y[i] = -1;
01477
01478 double sum_pos = nu*l/2;
01479 double sum_neg = nu*l/2;
01480
01481 for(i=0;i<l;i++)
01482 if(y[i] == +1)
01483 {
01484 alpha[i] = min(1.0,sum_pos);
01485 sum_pos -= alpha[i];
01486 }
01487 else
01488 {
01489 alpha[i] = min(1.0,sum_neg);
01490 sum_neg -= alpha[i];
01491 }
01492
01493 double *zeros = new double[l];
01494
01495 for(i=0;i<l;i++)
01496 zeros[i] = 0;
01497
01498 Solver_NU s;
01499 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01500 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01501 double r = si->r;
01502
01503 info("C = %f\n",1/r);
01504
01505 for(i=0;i<l;i++)
01506 alpha[i] *= y[i]/r;
01507
01508 si->rho /= r;
01509 si->obj /= (r*r);
01510 si->upper_bound_p = 1/r;
01511 si->upper_bound_n = 1/r;
01512
01513 delete[] y;
01514 delete[] zeros;
01515 }
01516
01517 static void solve_one_class(
01518 const svm_problem *prob, const svm_parameter *param,
01519 double *alpha, Solver::SolutionInfo* si)
01520 {
01521 int l = prob->l;
01522 double *zeros = new double[l];
01523 schar *ones = new schar[l];
01524 int i;
01525
01526 int n = (int)(param->nu*prob->l);
01527
01528 for(i=0;i<n;i++)
01529 alpha[i] = 1;
01530 if(n<prob->l)
01531 alpha[n] = param->nu * prob->l - n;
01532 for(i=n+1;i<l;i++)
01533 alpha[i] = 0;
01534
01535 for(i=0;i<l;i++)
01536 {
01537 zeros[i] = 0;
01538 ones[i] = 1;
01539 }
01540
01541 Solver s;
01542 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01543 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01544
01545 delete[] zeros;
01546 delete[] ones;
01547 }
01548
01549 static void solve_epsilon_svr(
01550 const svm_problem *prob, const svm_parameter *param,
01551 double *alpha, Solver::SolutionInfo* si)
01552 {
01553 int l = prob->l;
01554 double *alpha2 = new double[2*l];
01555 double *linear_term = new double[2*l];
01556 schar *y = new schar[2*l];
01557 int i;
01558
01559 for(i=0;i<l;i++)
01560 {
01561 alpha2[i] = 0;
01562 linear_term[i] = param->p - prob->y[i];
01563 y[i] = 1;
01564
01565 alpha2[i+l] = 0;
01566 linear_term[i+l] = param->p + prob->y[i];
01567 y[i+l] = -1;
01568 }
01569
01570 Solver s;
01571 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01572 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01573
01574 double sum_alpha = 0;
01575 for(i=0;i<l;i++)
01576 {
01577 alpha[i] = alpha2[i] - alpha2[i+l];
01578 sum_alpha += fabs(alpha[i]);
01579 }
01580 info("nu = %f\n",sum_alpha/(param->C*l));
01581
01582 delete[] alpha2;
01583 delete[] linear_term;
01584 delete[] y;
01585 }
01586
01587 static void solve_nu_svr(
01588 const svm_problem *prob, const svm_parameter *param,
01589 double *alpha, Solver::SolutionInfo* si)
01590 {
01591 int l = prob->l;
01592 double C = param->C;
01593 double *alpha2 = new double[2*l];
01594 double *linear_term = new double[2*l];
01595 schar *y = new schar[2*l];
01596 int i;
01597
01598 double sum = C * param->nu * l / 2;
01599 for(i=0;i<l;i++)
01600 {
01601 alpha2[i] = alpha2[i+l] = min(sum,C);
01602 sum -= alpha2[i];
01603
01604 linear_term[i] = - prob->y[i];
01605 y[i] = 1;
01606
01607 linear_term[i+l] = prob->y[i];
01608 y[i+l] = -1;
01609 }
01610
01611 Solver_NU s;
01612 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01613 alpha2, C, C, param->eps, si, param->shrinking);
01614
01615 info("epsilon = %f\n",-si->r);
01616
01617 for(i=0;i<l;i++)
01618 alpha[i] = alpha2[i] - alpha2[i+l];
01619
01620 delete[] alpha2;
01621 delete[] linear_term;
01622 delete[] y;
01623 }
01624
01625
01626
01627
01628 struct decision_function
01629 {
01630 double *alpha;
01631 double rho;
01632 };
01633
01634 static decision_function svm_train_one(
01635 const svm_problem *prob, const svm_parameter *param,
01636 double Cp, double Cn)
01637 {
01638 double *alpha = Malloc(double,prob->l);
01639 Solver::SolutionInfo si;
01640 switch(param->svm_type)
01641 {
01642 case C_SVC:
01643 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01644 break;
01645 case NU_SVC:
01646 solve_nu_svc(prob,param,alpha,&si);
01647 break;
01648 case ONE_CLASS:
01649 solve_one_class(prob,param,alpha,&si);
01650 break;
01651 case EPSILON_SVR:
01652 solve_epsilon_svr(prob,param,alpha,&si);
01653 break;
01654 case NU_SVR:
01655 solve_nu_svr(prob,param,alpha,&si);
01656 break;
01657 }
01658
01659 info("obj = %f, rho = %f\n",si.obj,si.rho);
01660
01661
01662
01663 int nSV = 0;
01664 int nBSV = 0;
01665 for(int i=0;i<prob->l;i++)
01666 {
01667 if(fabs(alpha[i]) > 0)
01668 {
01669 ++nSV;
01670 if(prob->y[i] > 0)
01671 {
01672 if(fabs(alpha[i]) >= si.upper_bound_p)
01673 ++nBSV;
01674 }
01675 else
01676 {
01677 if(fabs(alpha[i]) >= si.upper_bound_n)
01678 ++nBSV;
01679 }
01680 }
01681 }
01682
01683 info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01684
01685 decision_function f;
01686 f.alpha = alpha;
01687 f.rho = si.rho;
01688 return f;
01689 }
01690
01691
01692 static void sigmoid_train(
01693 int l, const double *dec_values, const double *labels,
01694 double& A, double& B)
01695 {
01696 double prior1=0, prior0 = 0;
01697 int i;
01698
01699 for (i=0;i<l;i++)
01700 if (labels[i] > 0) prior1+=1;
01701 else prior0+=1;
01702
01703 int max_iter=100;
01704 double min_step=1e-10;
01705 double sigma=1e-12;
01706 double eps=1e-5;
01707 double hiTarget=(prior1+1.0)/(prior1+2.0);
01708 double loTarget=1/(prior0+2.0);
01709 double *t=Malloc(double,l);
01710 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01711 double newA,newB,newf,d1,d2;
01712 int iter;
01713
01714
01715 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01716 double fval = 0.0;
01717
01718 for (i=0;i<l;i++)
01719 {
01720 if (labels[i]>0) t[i]=hiTarget;
01721 else t[i]=loTarget;
01722 fApB = dec_values[i]*A+B;
01723 if (fApB>=0)
01724 fval += t[i]*fApB + log(1+exp(-fApB));
01725 else
01726 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01727 }
01728 for (iter=0;iter<max_iter;iter++)
01729 {
01730
01731 h11=sigma;
01732 h22=sigma;
01733 h21=0.0;g1=0.0;g2=0.0;
01734 for (i=0;i<l;i++)
01735 {
01736 fApB = dec_values[i]*A+B;
01737 if (fApB >= 0)
01738 {
01739 p=exp(-fApB)/(1.0+exp(-fApB));
01740 q=1.0/(1.0+exp(-fApB));
01741 }
01742 else
01743 {
01744 p=1.0/(1.0+exp(fApB));
01745 q=exp(fApB)/(1.0+exp(fApB));
01746 }
01747 d2=p*q;
01748 h11+=dec_values[i]*dec_values[i]*d2;
01749 h22+=d2;
01750 h21+=dec_values[i]*d2;
01751 d1=t[i]-p;
01752 g1+=dec_values[i]*d1;
01753 g2+=d1;
01754 }
01755
01756
01757 if (fabs(g1)<eps && fabs(g2)<eps)
01758 break;
01759
01760
01761 det=h11*h22-h21*h21;
01762 dA=-(h22*g1 - h21 * g2) / det;
01763 dB=-(-h21*g1+ h11 * g2) / det;
01764 gd=g1*dA+g2*dB;
01765
01766
01767 stepsize = 1;
01768 while (stepsize >= min_step)
01769 {
01770 newA = A + stepsize * dA;
01771 newB = B + stepsize * dB;
01772
01773
01774 newf = 0.0;
01775 for (i=0;i<l;i++)
01776 {
01777 fApB = dec_values[i]*newA+newB;
01778 if (fApB >= 0)
01779 newf += t[i]*fApB + log(1+exp(-fApB));
01780 else
01781 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01782 }
01783
01784 if (newf<fval+0.0001*stepsize*gd)
01785 {
01786 A=newA;B=newB;fval=newf;
01787 break;
01788 }
01789 else
01790 stepsize = stepsize / 2.0;
01791 }
01792
01793 if (stepsize < min_step)
01794 {
01795 info("Line search fails in two-class probability estimates\n");
01796 break;
01797 }
01798 }
01799
01800 if (iter>=max_iter)
01801 info("Reaching maximal iterations in two-class probability estimates\n");
01802 free(t);
01803 }
01804
01805 static double sigmoid_predict(double decision_value, double A, double B)
01806 {
01807 double fApB = decision_value*A+B;
01808 if (fApB >= 0)
01809 return exp(-fApB)/(1.0+exp(-fApB));
01810 else
01811 return 1.0/(1+exp(fApB)) ;
01812 }
01813
01814
01815 static void multiclass_probability(int k, double **r, double *p)
01816 {
01817 int t,j;
01818 int iter = 0, max_iter=max(100,k);
01819 double **Q=Malloc(double *,k);
01820 double *Qp=Malloc(double,k);
01821 double pQp, eps=0.005/k;
01822
01823 for (t=0;t<k;t++)
01824 {
01825 p[t]=1.0/k;
01826 Q[t]=Malloc(double,k);
01827 Q[t][t]=0;
01828 for (j=0;j<t;j++)
01829 {
01830 Q[t][t]+=r[j][t]*r[j][t];
01831 Q[t][j]=Q[j][t];
01832 }
01833 for (j=t+1;j<k;j++)
01834 {
01835 Q[t][t]+=r[j][t]*r[j][t];
01836 Q[t][j]=-r[j][t]*r[t][j];
01837 }
01838 }
01839 for (iter=0;iter<max_iter;iter++)
01840 {
01841
01842 pQp=0;
01843 for (t=0;t<k;t++)
01844 {
01845 Qp[t]=0;
01846 for (j=0;j<k;j++)
01847 Qp[t]+=Q[t][j]*p[j];
01848 pQp+=p[t]*Qp[t];
01849 }
01850 double max_error=0;
01851 for (t=0;t<k;t++)
01852 {
01853 double error=fabs(Qp[t]-pQp);
01854 if (error>max_error)
01855 max_error=error;
01856 }
01857 if (max_error<eps) break;
01858
01859 for (t=0;t<k;t++)
01860 {
01861 double diff=(-Qp[t]+pQp)/Q[t][t];
01862 p[t]+=diff;
01863 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01864 for (j=0;j<k;j++)
01865 {
01866 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01867 p[j]/=(1+diff);
01868 }
01869 }
01870 }
01871 if (iter>=max_iter)
01872 info("Exceeds max_iter in multiclass_prob\n");
01873 for(t=0;t<k;t++) free(Q[t]);
01874 free(Q);
01875 free(Qp);
01876 }
01877
01878
01879 static void svm_binary_svc_probability(
01880 const svm_problem *prob, const svm_parameter *param,
01881 double Cp, double Cn, double& probA, double& probB)
01882 {
01883 int i;
01884 int nr_fold = 5;
01885 int *perm = Malloc(int,prob->l);
01886 double *dec_values = Malloc(double,prob->l);
01887
01888
01889 for(i=0;i<prob->l;i++) perm[i]=i;
01890 for(i=0;i<prob->l;i++)
01891 {
01892 int j = i+rand()%(prob->l-i);
01893 swap(perm[i],perm[j]);
01894 }
01895 for(i=0;i<nr_fold;i++)
01896 {
01897 int begin = i*prob->l/nr_fold;
01898 int end = (i+1)*prob->l/nr_fold;
01899 int j,k;
01900 struct svm_problem subprob;
01901
01902 subprob.l = prob->l-(end-begin);
01903 subprob.x = Malloc(struct svm_node*,subprob.l);
01904 subprob.y = Malloc(double,subprob.l);
01905
01906 k=0;
01907 for(j=0;j<begin;j++)
01908 {
01909 subprob.x[k] = prob->x[perm[j]];
01910 subprob.y[k] = prob->y[perm[j]];
01911 ++k;
01912 }
01913 for(j=end;j<prob->l;j++)
01914 {
01915 subprob.x[k] = prob->x[perm[j]];
01916 subprob.y[k] = prob->y[perm[j]];
01917 ++k;
01918 }
01919 int p_count=0,n_count=0;
01920 for(j=0;j<k;j++)
01921 if(subprob.y[j]>0)
01922 p_count++;
01923 else
01924 n_count++;
01925
01926 if(p_count==0 && n_count==0)
01927 for(j=begin;j<end;j++)
01928 dec_values[perm[j]] = 0;
01929 else if(p_count > 0 && n_count == 0)
01930 for(j=begin;j<end;j++)
01931 dec_values[perm[j]] = 1;
01932 else if(p_count == 0 && n_count > 0)
01933 for(j=begin;j<end;j++)
01934 dec_values[perm[j]] = -1;
01935 else
01936 {
01937 svm_parameter subparam = *param;
01938 subparam.probability=0;
01939 subparam.C=1.0;
01940 subparam.nr_weight=2;
01941 subparam.weight_label = Malloc(int,2);
01942 subparam.weight = Malloc(double,2);
01943 subparam.weight_label[0]=+1;
01944 subparam.weight_label[1]=-1;
01945 subparam.weight[0]=Cp;
01946 subparam.weight[1]=Cn;
01947 struct svm_model *submodel = svm_train(&subprob,&subparam);
01948 for(j=begin;j<end;j++)
01949 {
01950 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
01951
01952 dec_values[perm[j]] *= submodel->label[0];
01953 }
01954 svm_free_and_destroy_model(&submodel);
01955 svm_destroy_param(&subparam);
01956 }
01957 free(subprob.x);
01958 free(subprob.y);
01959 }
01960 sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01961 free(dec_values);
01962 free(perm);
01963 }
01964
01965
01966 static double svm_svr_probability(
01967 const svm_problem *prob, const svm_parameter *param)
01968 {
01969 int i;
01970 int nr_fold = 5;
01971 double *ymv = Malloc(double,prob->l);
01972 double mae = 0;
01973
01974 svm_parameter newparam = *param;
01975 newparam.probability = 0;
01976 svm_cross_validation(prob,&newparam,nr_fold,ymv);
01977 for(i=0;i<prob->l;i++)
01978 {
01979 ymv[i]=prob->y[i]-ymv[i];
01980 mae += fabs(ymv[i]);
01981 }
01982 mae /= prob->l;
01983 double std=sqrt(2*mae*mae);
01984 int count=0;
01985 mae=0;
01986 for(i=0;i<prob->l;i++)
01987 if (fabs(ymv[i]) > 5*std)
01988 count=count+1;
01989 else
01990 mae+=fabs(ymv[i]);
01991 mae /= (prob->l-count);
01992 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
01993 free(ymv);
01994 return mae;
01995 }
01996
01997
01998
01999
02000 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02001 {
02002 int l = prob->l;
02003 int max_nr_class = 16;
02004 int nr_class = 0;
02005 int *label = Malloc(int,max_nr_class);
02006 int *count = Malloc(int,max_nr_class);
02007 int *data_label = Malloc(int,l);
02008 int i;
02009
02010 for(i=0;i<l;i++)
02011 {
02012 int this_label = (int)prob->y[i];
02013 int j;
02014 for(j=0;j<nr_class;j++)
02015 {
02016 if(this_label == label[j])
02017 {
02018 ++count[j];
02019 break;
02020 }
02021 }
02022 data_label[i] = j;
02023 if(j == nr_class)
02024 {
02025 if(nr_class == max_nr_class)
02026 {
02027 max_nr_class *= 2;
02028 label = (int *)realloc(label,max_nr_class*sizeof(int));
02029 count = (int *)realloc(count,max_nr_class*sizeof(int));
02030 }
02031 label[nr_class] = this_label;
02032 count[nr_class] = 1;
02033 ++nr_class;
02034 }
02035 }
02036
02037 int *start = Malloc(int,nr_class);
02038 start[0] = 0;
02039 for(i=1;i<nr_class;i++)
02040 start[i] = start[i-1]+count[i-1];
02041 for(i=0;i<l;i++)
02042 {
02043 perm[start[data_label[i]]] = i;
02044 ++start[data_label[i]];
02045 }
02046 start[0] = 0;
02047 for(i=1;i<nr_class;i++)
02048 start[i] = start[i-1]+count[i-1];
02049
02050 *nr_class_ret = nr_class;
02051 *label_ret = label;
02052 *start_ret = start;
02053 *count_ret = count;
02054 free(data_label);
02055 }
02056
02057
02058
02059
02060 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02061 {
02062 svm_model *model = Malloc(svm_model,1);
02063 model->param = *param;
02064 model->free_sv = 0;
02065
02066 if(param->svm_type == ONE_CLASS ||
02067 param->svm_type == EPSILON_SVR ||
02068 param->svm_type == NU_SVR)
02069 {
02070
02071 model->nr_class = 2;
02072 model->label = NULL;
02073 model->nSV = NULL;
02074 model->probA = NULL; model->probB = NULL;
02075 model->sv_coef = Malloc(double *,1);
02076
02077 if(param->probability &&
02078 (param->svm_type == EPSILON_SVR ||
02079 param->svm_type == NU_SVR))
02080 {
02081 model->probA = Malloc(double,1);
02082 model->probA[0] = svm_svr_probability(prob,param);
02083 }
02084
02085 decision_function f = svm_train_one(prob,param,0,0);
02086 model->rho = Malloc(double,1);
02087 model->rho[0] = f.rho;
02088
02089 int nSV = 0;
02090 int i;
02091 for(i=0;i<prob->l;i++)
02092 if(fabs(f.alpha[i]) > 0) ++nSV;
02093 model->l = nSV;
02094 model->SV = Malloc(svm_node *,nSV);
02095 model->sv_coef[0] = Malloc(double,nSV);
02096 int j = 0;
02097 for(i=0;i<prob->l;i++)
02098 if(fabs(f.alpha[i]) > 0)
02099 {
02100 model->SV[j] = prob->x[i];
02101 model->sv_coef[0][j] = f.alpha[i];
02102 ++j;
02103 }
02104
02105 free(f.alpha);
02106 }
02107 else
02108 {
02109
02110 int l = prob->l;
02111 int nr_class;
02112 int *label = NULL;
02113 int *start = NULL;
02114 int *count = NULL;
02115 int *perm = Malloc(int,l);
02116
02117
02118 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02119 svm_node **x = Malloc(svm_node *,l);
02120 int i;
02121 for(i=0;i<l;i++)
02122 x[i] = prob->x[perm[i]];
02123
02124
02125
02126 double *weighted_C = Malloc(double, nr_class);
02127 for(i=0;i<nr_class;i++)
02128 weighted_C[i] = param->C;
02129 for(i=0;i<param->nr_weight;i++)
02130 {
02131 int j;
02132 for(j=0;j<nr_class;j++)
02133 if(param->weight_label[i] == label[j])
02134 break;
02135 if(j == nr_class)
02136 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02137 else
02138 weighted_C[j] *= param->weight[i];
02139 }
02140
02141
02142
02143 bool *nonzero = Malloc(bool,l);
02144 for(i=0;i<l;i++)
02145 nonzero[i] = false;
02146 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02147
02148 double *probA=NULL,*probB=NULL;
02149 if (param->probability)
02150 {
02151 probA=Malloc(double,nr_class*(nr_class-1)/2);
02152 probB=Malloc(double,nr_class*(nr_class-1)/2);
02153 }
02154
02155 int p = 0;
02156 for(i=0;i<nr_class;i++)
02157 for(int j=i+1;j<nr_class;j++)
02158 {
02159 svm_problem sub_prob;
02160 int si = start[i], sj = start[j];
02161 int ci = count[i], cj = count[j];
02162 sub_prob.l = ci+cj;
02163 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02164 sub_prob.y = Malloc(double,sub_prob.l);
02165 int k;
02166 for(k=0;k<ci;k++)
02167 {
02168 sub_prob.x[k] = x[si+k];
02169 sub_prob.y[k] = +1;
02170 }
02171 for(k=0;k<cj;k++)
02172 {
02173 sub_prob.x[ci+k] = x[sj+k];
02174 sub_prob.y[ci+k] = -1;
02175 }
02176
02177 if(param->probability)
02178 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02179
02180 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02181 for(k=0;k<ci;k++)
02182 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02183 nonzero[si+k] = true;
02184 for(k=0;k<cj;k++)
02185 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02186 nonzero[sj+k] = true;
02187 free(sub_prob.x);
02188 free(sub_prob.y);
02189 ++p;
02190 }
02191
02192
02193
02194 model->nr_class = nr_class;
02195
02196 model->label = Malloc(int,nr_class);
02197 for(i=0;i<nr_class;i++)
02198 model->label[i] = label[i];
02199
02200 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02201 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02202 model->rho[i] = f[i].rho;
02203
02204 if(param->probability)
02205 {
02206 model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02207 model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02208 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02209 {
02210 model->probA[i] = probA[i];
02211 model->probB[i] = probB[i];
02212 }
02213 }
02214 else
02215 {
02216 model->probA=NULL;
02217 model->probB=NULL;
02218 }
02219
02220 int total_sv = 0;
02221 int *nz_count = Malloc(int,nr_class);
02222 model->nSV = Malloc(int,nr_class);
02223 for(i=0;i<nr_class;i++)
02224 {
02225 int nSV = 0;
02226 for(int j=0;j<count[i];j++)
02227 if(nonzero[start[i]+j])
02228 {
02229 ++nSV;
02230 ++total_sv;
02231 }
02232 model->nSV[i] = nSV;
02233 nz_count[i] = nSV;
02234 }
02235
02236 info("Total nSV = %d\n",total_sv);
02237
02238 model->l = total_sv;
02239 model->SV = Malloc(svm_node *,total_sv);
02240 p = 0;
02241 for(i=0;i<l;i++)
02242 if(nonzero[i]) model->SV[p++] = x[i];
02243
02244 int *nz_start = Malloc(int,nr_class);
02245 nz_start[0] = 0;
02246 for(i=1;i<nr_class;i++)
02247 nz_start[i] = nz_start[i-1]+nz_count[i-1];
02248
02249 model->sv_coef = Malloc(double *,nr_class-1);
02250 for(i=0;i<nr_class-1;i++)
02251 model->sv_coef[i] = Malloc(double,total_sv);
02252
02253 p = 0;
02254 for(i=0;i<nr_class;i++)
02255 for(int j=i+1;j<nr_class;j++)
02256 {
02257
02258
02259
02260
02261 int si = start[i];
02262 int sj = start[j];
02263 int ci = count[i];
02264 int cj = count[j];
02265
02266 int q = nz_start[i];
02267 int k;
02268 for(k=0;k<ci;k++)
02269 if(nonzero[si+k])
02270 model->sv_coef[j-1][q++] = f[p].alpha[k];
02271 q = nz_start[j];
02272 for(k=0;k<cj;k++)
02273 if(nonzero[sj+k])
02274 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02275 ++p;
02276 }
02277
02278 free(label);
02279 free(probA);
02280 free(probB);
02281 free(count);
02282 free(perm);
02283 free(start);
02284 free(x);
02285 free(weighted_C);
02286 free(nonzero);
02287 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02288 free(f[i].alpha);
02289 free(f);
02290 free(nz_count);
02291 free(nz_start);
02292 }
02293 return model;
02294 }
02295
02296
02297 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02298 {
02299 int i;
02300 int *fold_start = Malloc(int,nr_fold+1);
02301 int l = prob->l;
02302 int *perm = Malloc(int,l);
02303 int nr_class;
02304
02305
02306
02307 if((param->svm_type == C_SVC ||
02308 param->svm_type == NU_SVC) && nr_fold < l)
02309 {
02310 int *start = NULL;
02311 int *label = NULL;
02312 int *count = NULL;
02313 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02314
02315
02316 int *fold_count = Malloc(int,nr_fold);
02317 int c;
02318 int *index = Malloc(int,l);
02319 for(i=0;i<l;i++)
02320 index[i]=perm[i];
02321 for (c=0; c<nr_class; c++)
02322 for(i=0;i<count[c];i++)
02323 {
02324 int j = i+rand()%(count[c]-i);
02325 swap(index[start[c]+j],index[start[c]+i]);
02326 }
02327 for(i=0;i<nr_fold;i++)
02328 {
02329 fold_count[i] = 0;
02330 for (c=0; c<nr_class;c++)
02331 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02332 }
02333 fold_start[0]=0;
02334 for (i=1;i<=nr_fold;i++)
02335 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02336 for (c=0; c<nr_class;c++)
02337 for(i=0;i<nr_fold;i++)
02338 {
02339 int begin = start[c]+i*count[c]/nr_fold;
02340 int end = start[c]+(i+1)*count[c]/nr_fold;
02341 for(int j=begin;j<end;j++)
02342 {
02343 perm[fold_start[i]] = index[j];
02344 fold_start[i]++;
02345 }
02346 }
02347 fold_start[0]=0;
02348 for (i=1;i<=nr_fold;i++)
02349 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02350 free(start);
02351 free(label);
02352 free(count);
02353 free(index);
02354 free(fold_count);
02355 }
02356 else
02357 {
02358 for(i=0;i<l;i++) perm[i]=i;
02359 for(i=0;i<l;i++)
02360 {
02361 int j = i+rand()%(l-i);
02362 swap(perm[i],perm[j]);
02363 }
02364 for(i=0;i<=nr_fold;i++)
02365 fold_start[i]=i*l/nr_fold;
02366 }
02367
02368 for(i=0;i<nr_fold;i++)
02369 {
02370 int begin = fold_start[i];
02371 int end = fold_start[i+1];
02372 int j,k;
02373 struct svm_problem subprob;
02374
02375 subprob.l = l-(end-begin);
02376 subprob.x = Malloc(struct svm_node*,subprob.l);
02377 subprob.y = Malloc(double,subprob.l);
02378
02379 k=0;
02380 for(j=0;j<begin;j++)
02381 {
02382 subprob.x[k] = prob->x[perm[j]];
02383 subprob.y[k] = prob->y[perm[j]];
02384 ++k;
02385 }
02386 for(j=end;j<l;j++)
02387 {
02388 subprob.x[k] = prob->x[perm[j]];
02389 subprob.y[k] = prob->y[perm[j]];
02390 ++k;
02391 }
02392 struct svm_model *submodel = svm_train(&subprob,param);
02393 if(param->probability &&
02394 (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02395 {
02396 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02397 for(j=begin;j<end;j++)
02398 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02399 free(prob_estimates);
02400 }
02401 else
02402 for(j=begin;j<end;j++)
02403 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02404 svm_free_and_destroy_model(&submodel);
02405 free(subprob.x);
02406 free(subprob.y);
02407 }
02408 free(fold_start);
02409 free(perm);
02410 }
02411
02412
02413 int svm_get_svm_type(const svm_model *model)
02414 {
02415 return model->param.svm_type;
02416 }
02417
02418 int svm_get_nr_class(const svm_model *model)
02419 {
02420 return model->nr_class;
02421 }
02422
02423 void svm_get_labels(const svm_model *model, int* label)
02424 {
02425 if (model->label != NULL)
02426 for(int i=0;i<model->nr_class;i++)
02427 label[i] = model->label[i];
02428 }
02429
02430 double svm_get_svr_probability(const svm_model *model)
02431 {
02432 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02433 model->probA!=NULL)
02434 return model->probA[0];
02435 else
02436 {
02437 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
02438 return 0;
02439 }
02440 }
02441
02442 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02443 {
02444 if(model->param.svm_type == ONE_CLASS ||
02445 model->param.svm_type == EPSILON_SVR ||
02446 model->param.svm_type == NU_SVR)
02447 {
02448 double *sv_coef = model->sv_coef[0];
02449 double sum = 0;
02450 for(int i=0;i<model->l;i++)
02451 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02452 sum -= model->rho[0];
02453 *dec_values = sum;
02454
02455 if(model->param.svm_type == ONE_CLASS)
02456 return (sum>0)?1:-1;
02457 else
02458 return sum;
02459 }
02460 else
02461 {
02462 int i;
02463 int nr_class = model->nr_class;
02464 int l = model->l;
02465
02466 double *kvalue = Malloc(double,l);
02467 for(i=0;i<l;i++)
02468 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02469
02470 int *start = Malloc(int,nr_class);
02471 start[0] = 0;
02472 for(i=1;i<nr_class;i++)
02473 start[i] = start[i-1]+model->nSV[i-1];
02474
02475 int *vote = Malloc(int,nr_class);
02476 for(i=0;i<nr_class;i++)
02477 vote[i] = 0;
02478
02479 int p=0;
02480 for(i=0;i<nr_class;i++)
02481 for(int j=i+1;j<nr_class;j++)
02482 {
02483 double sum = 0;
02484 int si = start[i];
02485 int sj = start[j];
02486 int ci = model->nSV[i];
02487 int cj = model->nSV[j];
02488
02489 int k;
02490 double *coef1 = model->sv_coef[j-1];
02491 double *coef2 = model->sv_coef[i];
02492 for(k=0;k<ci;k++)
02493 sum += coef1[si+k] * kvalue[si+k];
02494 for(k=0;k<cj;k++)
02495 sum += coef2[sj+k] * kvalue[sj+k];
02496 sum -= model->rho[p];
02497 dec_values[p] = sum;
02498
02499 if(dec_values[p] > 0)
02500 ++vote[i];
02501 else
02502 ++vote[j];
02503 p++;
02504 }
02505
02506 int vote_max_idx = 0;
02507 for(i=1;i<nr_class;i++)
02508 if(vote[i] > vote[vote_max_idx])
02509 vote_max_idx = i;
02510
02511 free(kvalue);
02512 free(start);
02513 free(vote);
02514 return model->label[vote_max_idx];
02515 }
02516 }
02517
02518 double svm_predict(const svm_model *model, const svm_node *x)
02519 {
02520 int nr_class = model->nr_class;
02521 double *dec_values;
02522 if(model->param.svm_type == ONE_CLASS ||
02523 model->param.svm_type == EPSILON_SVR ||
02524 model->param.svm_type == NU_SVR)
02525 dec_values = Malloc(double, 1);
02526 else
02527 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02528 double pred_result = svm_predict_values(model, x, dec_values);
02529 free(dec_values);
02530 return pred_result;
02531 }
02532
02533 double svm_predict_probability(
02534 const svm_model *model, const svm_node *x, double *prob_estimates)
02535 {
02536 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02537 model->probA!=NULL && model->probB!=NULL)
02538 {
02539 int i;
02540 int nr_class = model->nr_class;
02541 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02542 svm_predict_values(model, x, dec_values);
02543
02544 double min_prob=1e-7;
02545 double **pairwise_prob=Malloc(double *,nr_class);
02546 for(i=0;i<nr_class;i++)
02547 pairwise_prob[i]=Malloc(double,nr_class);
02548 int k=0;
02549 for(i=0;i<nr_class;i++)
02550 for(int j=i+1;j<nr_class;j++)
02551 {
02552 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02553 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02554 k++;
02555 }
02556 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02557
02558 int prob_max_idx = 0;
02559 for(i=1;i<nr_class;i++)
02560 if(prob_estimates[i] > prob_estimates[prob_max_idx])
02561 prob_max_idx = i;
02562 for(i=0;i<nr_class;i++)
02563 free(pairwise_prob[i]);
02564 free(dec_values);
02565 free(pairwise_prob);
02566 return model->label[prob_max_idx];
02567 }
02568 else
02569 return svm_predict(model, x);
02570 }
02571
02572 static const char *svm_type_table[] =
02573 {
02574 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02575 };
02576
02577 static const char *kernel_type_table[]=
02578 {
02579 "linear","polynomial","rbf","sigmoid","precomputed",NULL
02580 };
02581
02582 int svm_save_model(const char *model_file_name, const svm_model *model)
02583 {
02584 FILE *fp = fopen(model_file_name,"w");
02585 if(fp==NULL) return -1;
02586
02587 const svm_parameter& param = model->param;
02588
02589 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02590 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02591
02592 if(param.kernel_type == POLY)
02593 fprintf(fp,"degree %d\n", param.degree);
02594
02595 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02596 fprintf(fp,"gamma %g\n", param.gamma);
02597
02598 if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02599 fprintf(fp,"coef0 %g\n", param.coef0);
02600
02601 int nr_class = model->nr_class;
02602 int l = model->l;
02603 fprintf(fp, "nr_class %d\n", nr_class);
02604 fprintf(fp, "total_sv %d\n",l);
02605
02606 {
02607 fprintf(fp, "rho");
02608 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02609 fprintf(fp," %g",model->rho[i]);
02610 fprintf(fp, "\n");
02611 }
02612
02613 if(model->label)
02614 {
02615 fprintf(fp, "label");
02616 for(int i=0;i<nr_class;i++)
02617 fprintf(fp," %d",model->label[i]);
02618 fprintf(fp, "\n");
02619 }
02620
02621 if(model->probA)
02622 {
02623 fprintf(fp, "probA");
02624 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02625 fprintf(fp," %g",model->probA[i]);
02626 fprintf(fp, "\n");
02627 }
02628 if(model->probB)
02629 {
02630 fprintf(fp, "probB");
02631 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02632 fprintf(fp," %g",model->probB[i]);
02633 fprintf(fp, "\n");
02634 }
02635
02636 if(model->nSV)
02637 {
02638 fprintf(fp, "nr_sv");
02639 for(int i=0;i<nr_class;i++)
02640 fprintf(fp," %d",model->nSV[i]);
02641 fprintf(fp, "\n");
02642 }
02643
02644 fprintf(fp, "SV\n");
02645 const double * const *sv_coef = model->sv_coef;
02646 const svm_node * const *SV = model->SV;
02647
02648 for(int i=0;i<l;i++)
02649 {
02650 for(int j=0;j<nr_class-1;j++)
02651 fprintf(fp, "%.16g ",sv_coef[j][i]);
02652
02653 const svm_node *p = SV[i];
02654
02655 if(param.kernel_type == PRECOMPUTED)
02656 fprintf(fp,"0:%d ",(int)(p->value));
02657 else
02658 while(p->index != -1)
02659 {
02660 fprintf(fp,"%d:%.8g ",p->index,p->value);
02661 p++;
02662 }
02663 fprintf(fp, "\n");
02664 }
02665 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02666 else return 0;
02667 }
02668
02669 static char *line = NULL;
02670 static int max_line_len;
02671
02672 static char* readline(FILE *input)
02673 {
02674 int len;
02675
02676 if(fgets(line,max_line_len,input) == NULL)
02677 return NULL;
02678
02679 while(strrchr(line,'\n') == NULL)
02680 {
02681 max_line_len *= 2;
02682 line = (char *) realloc(line,max_line_len);
02683 len = (int) strlen(line);
02684 if(fgets(line+len,max_line_len-len,input) == NULL)
02685 break;
02686 }
02687 return line;
02688 }
02689
02690 svm_model *svm_load_model(const char *model_file_name)
02691 {
02692 FILE *fp = fopen(model_file_name,"rb");
02693 if(fp==NULL) return NULL;
02694
02695
02696
02697 svm_model *model = Malloc(svm_model,1);
02698 svm_parameter& param = model->param;
02699 model->rho = NULL;
02700 model->probA = NULL;
02701 model->probB = NULL;
02702 model->label = NULL;
02703 model->nSV = NULL;
02704
02705 char cmd[81];
02706 while(1)
02707 {
02708 fscanf(fp,"%80s",cmd);
02709
02710 if(strcmp(cmd,"svm_type")==0)
02711 {
02712 fscanf(fp,"%80s",cmd);
02713 int i;
02714 for(i=0;svm_type_table[i];i++)
02715 {
02716 if(strcmp(svm_type_table[i],cmd)==0)
02717 {
02718 param.svm_type=i;
02719 break;
02720 }
02721 }
02722 if(svm_type_table[i] == NULL)
02723 {
02724 fprintf(stderr,"unknown svm type.\n");
02725 free(model->rho);
02726 free(model->label);
02727 free(model->nSV);
02728 free(model);
02729 return NULL;
02730 }
02731 }
02732 else if(strcmp(cmd,"kernel_type")==0)
02733 {
02734 fscanf(fp,"%80s",cmd);
02735 int i;
02736 for(i=0;kernel_type_table[i];i++)
02737 {
02738 if(strcmp(kernel_type_table[i],cmd)==0)
02739 {
02740 param.kernel_type=i;
02741 break;
02742 }
02743 }
02744 if(kernel_type_table[i] == NULL)
02745 {
02746 fprintf(stderr,"unknown kernel function.\n");
02747 free(model->rho);
02748 free(model->label);
02749 free(model->nSV);
02750 free(model);
02751 return NULL;
02752 }
02753 }
02754 else if(strcmp(cmd,"degree")==0)
02755 fscanf(fp,"%d",¶m.degree);
02756 else if(strcmp(cmd,"gamma")==0)
02757 fscanf(fp,"%lf",¶m.gamma);
02758 else if(strcmp(cmd,"coef0")==0)
02759 fscanf(fp,"%lf",¶m.coef0);
02760 else if(strcmp(cmd,"nr_class")==0)
02761 fscanf(fp,"%d",&model->nr_class);
02762 else if(strcmp(cmd,"total_sv")==0)
02763 fscanf(fp,"%d",&model->l);
02764 else if(strcmp(cmd,"rho")==0)
02765 {
02766 int n = model->nr_class * (model->nr_class-1)/2;
02767 model->rho = Malloc(double,n);
02768 for(int i=0;i<n;i++)
02769 fscanf(fp,"%lf",&model->rho[i]);
02770 }
02771 else if(strcmp(cmd,"label")==0)
02772 {
02773 int n = model->nr_class;
02774 model->label = Malloc(int,n);
02775 for(int i=0;i<n;i++)
02776 fscanf(fp,"%d",&model->label[i]);
02777 }
02778 else if(strcmp(cmd,"probA")==0)
02779 {
02780 int n = model->nr_class * (model->nr_class-1)/2;
02781 model->probA = Malloc(double,n);
02782 for(int i=0;i<n;i++)
02783 fscanf(fp,"%lf",&model->probA[i]);
02784 }
02785 else if(strcmp(cmd,"probB")==0)
02786 {
02787 int n = model->nr_class * (model->nr_class-1)/2;
02788 model->probB = Malloc(double,n);
02789 for(int i=0;i<n;i++)
02790 fscanf(fp,"%lf",&model->probB[i]);
02791 }
02792 else if(strcmp(cmd,"nr_sv")==0)
02793 {
02794 int n = model->nr_class;
02795 model->nSV = Malloc(int,n);
02796 for(int i=0;i<n;i++)
02797 fscanf(fp,"%d",&model->nSV[i]);
02798 }
02799 else if(strcmp(cmd,"SV")==0)
02800 {
02801 while(1)
02802 {
02803 int c = getc(fp);
02804 if(c==EOF || c=='\n') break;
02805 }
02806 break;
02807 }
02808 else
02809 {
02810 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02811 free(model->rho);
02812 free(model->label);
02813 free(model->nSV);
02814 free(model);
02815 return NULL;
02816 }
02817 }
02818
02819
02820
02821 int elements = 0;
02822 long pos = ftell(fp);
02823
02824 max_line_len = 1024;
02825 line = Malloc(char,max_line_len);
02826 char *p,*endptr,*idx,*val;
02827
02828 while(readline(fp)!=NULL)
02829 {
02830 p = strtok(line,":");
02831 while(1)
02832 {
02833 p = strtok(NULL,":");
02834 if(p == NULL)
02835 break;
02836 ++elements;
02837 }
02838 }
02839 elements += model->l;
02840
02841 fseek(fp,pos,SEEK_SET);
02842
02843 int m = model->nr_class - 1;
02844 int l = model->l;
02845 model->sv_coef = Malloc(double *,m);
02846 int i;
02847 for(i=0;i<m;i++)
02848 model->sv_coef[i] = Malloc(double,l);
02849 model->SV = Malloc(svm_node*,l);
02850 svm_node *x_space = NULL;
02851 if(l>0) x_space = Malloc(svm_node,elements);
02852
02853 int j=0;
02854 for(i=0;i<l;i++)
02855 {
02856 readline(fp);
02857 model->SV[i] = &x_space[j];
02858
02859 p = strtok(line, " \t");
02860 model->sv_coef[0][i] = strtod(p,&endptr);
02861 for(int k=1;k<m;k++)
02862 {
02863 p = strtok(NULL, " \t");
02864 model->sv_coef[k][i] = strtod(p,&endptr);
02865 }
02866
02867 while(1)
02868 {
02869 idx = strtok(NULL, ":");
02870 val = strtok(NULL, " \t");
02871
02872 if(val == NULL)
02873 break;
02874 x_space[j].index = (int) strtol(idx,&endptr,10);
02875 x_space[j].value = strtod(val,&endptr);
02876
02877 ++j;
02878 }
02879 x_space[j++].index = -1;
02880 }
02881 free(line);
02882
02883 if (ferror(fp) != 0 || fclose(fp) != 0)
02884 return NULL;
02885
02886 model->free_sv = 1;
02887 return model;
02888 }
02889
02890 void svm_free_model_content(svm_model* model_ptr)
02891 {
02892 if(model_ptr->free_sv && model_ptr->l > 0)
02893 free((void *)(model_ptr->SV[0]));
02894 for(int i=0;i<model_ptr->nr_class-1;i++)
02895 free(model_ptr->sv_coef[i]);
02896 free(model_ptr->SV);
02897 free(model_ptr->sv_coef);
02898 free(model_ptr->rho);
02899 free(model_ptr->label);
02900 free(model_ptr->probA);
02901 free(model_ptr->probB);
02902 free(model_ptr->nSV);
02903 }
02904
02905 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
02906 {
02907 svm_model* model_ptr = *model_ptr_ptr;
02908 if(model_ptr != NULL)
02909 {
02910 svm_free_model_content(model_ptr);
02911 free(model_ptr);
02912 }
02913 }
02914
02915 void svm_destroy_model(svm_model* model_ptr)
02916 {
02917 fprintf(stderr,"warning: svm_destroy_model is deprecated and should not be used. Please use svm_free_and_destroy_model(svm_model **model_ptr_ptr)\n");
02918 svm_free_and_destroy_model(&model_ptr);
02919 }
02920
02921 void svm_destroy_param(svm_parameter* param)
02922 {
02923 free(param->weight_label);
02924 free(param->weight);
02925 }
02926
02927 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02928 {
02929
02930
02931 int svm_type = param->svm_type;
02932 if(svm_type != C_SVC &&
02933 svm_type != NU_SVC &&
02934 svm_type != ONE_CLASS &&
02935 svm_type != EPSILON_SVR &&
02936 svm_type != NU_SVR)
02937 return "unknown svm type";
02938
02939
02940
02941 int kernel_type = param->kernel_type;
02942 if(kernel_type != LINEAR &&
02943 kernel_type != POLY &&
02944 kernel_type != RBF &&
02945 kernel_type != SIGMOID &&
02946 kernel_type != PRECOMPUTED)
02947 return "unknown kernel type";
02948
02949 if(param->gamma < 0)
02950 return "gamma < 0";
02951
02952 if(param->degree < 0)
02953 return "degree of polynomial kernel < 0";
02954
02955
02956
02957 if(param->cache_size <= 0)
02958 return "cache_size <= 0";
02959
02960 if(param->eps <= 0)
02961 return "eps <= 0";
02962
02963 if(svm_type == C_SVC ||
02964 svm_type == EPSILON_SVR ||
02965 svm_type == NU_SVR)
02966 if(param->C <= 0)
02967 return "C <= 0";
02968
02969 if(svm_type == NU_SVC ||
02970 svm_type == ONE_CLASS ||
02971 svm_type == NU_SVR)
02972 if(param->nu <= 0 || param->nu > 1)
02973 return "nu <= 0 or nu > 1";
02974
02975 if(svm_type == EPSILON_SVR)
02976 if(param->p < 0)
02977 return "p < 0";
02978
02979 if(param->shrinking != 0 &&
02980 param->shrinking != 1)
02981 return "shrinking != 0 and shrinking != 1";
02982
02983 if(param->probability != 0 &&
02984 param->probability != 1)
02985 return "probability != 0 and probability != 1";
02986
02987 if(param->probability == 1 &&
02988 svm_type == ONE_CLASS)
02989 return "one-class SVM probability output not supported yet";
02990
02991
02992
02993
02994 if(svm_type == NU_SVC)
02995 {
02996 int l = prob->l;
02997 int max_nr_class = 16;
02998 int nr_class = 0;
02999 int *label = Malloc(int,max_nr_class);
03000 int *count = Malloc(int,max_nr_class);
03001
03002 int i;
03003 for(i=0;i<l;i++)
03004 {
03005 int this_label = (int)prob->y[i];
03006 int j;
03007 for(j=0;j<nr_class;j++)
03008 if(this_label == label[j])
03009 {
03010 ++count[j];
03011 break;
03012 }
03013 if(j == nr_class)
03014 {
03015 if(nr_class == max_nr_class)
03016 {
03017 max_nr_class *= 2;
03018 label = (int *)realloc(label,max_nr_class*sizeof(int));
03019 count = (int *)realloc(count,max_nr_class*sizeof(int));
03020 }
03021 label[nr_class] = this_label;
03022 count[nr_class] = 1;
03023 ++nr_class;
03024 }
03025 }
03026
03027 for(i=0;i<nr_class;i++)
03028 {
03029 int n1 = count[i];
03030 for(int j=i+1;j<nr_class;j++)
03031 {
03032 int n2 = count[j];
03033 if(param->nu*(n1+n2)/2 > min(n1,n2))
03034 {
03035 free(label);
03036 free(count);
03037 return "specified nu is infeasible";
03038 }
03039 }
03040 }
03041 free(label);
03042 free(count);
03043 }
03044
03045 return NULL;
03046 }
03047
03048 int svm_check_probability_model(const svm_model *model)
03049 {
03050 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03051 model->probA!=NULL && model->probB!=NULL) ||
03052 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03053 model->probA!=NULL);
03054 }
03055
03056 void svm_set_print_string_function(void (*print_func)(const char *))
03057 {
03058 if(print_func == NULL)
03059 svm_print_string = &print_string_stdout;
03060 else
03061 svm_print_string = print_func;
03062 }