libsvm.cpp

Go to the documentation of this file.
00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008 
00009 #include "libsvm.h"
00010 
00011 int libsvm_version = LIBSVM_VERSION;
00012 typedef float Qfloat;
00013 typedef signed char schar;
00014 #ifndef min
00015 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
00016 #endif
00017 #ifndef max
00018 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
00019 #endif
00020 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00021 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
00022 {
00023         dst = new T[n];
00024         memcpy((void *)dst,(void *)src,sizeof(T)*n);
00025 }
00026 static inline double powi(double base, int times)
00027 {
00028         double tmp = base, ret = 1.0;
00029 
00030         for(int t=times; t>0; t/=2)
00031         {
00032                 if(t%2==1) ret*=tmp;
00033                 tmp = tmp * tmp;
00034         }
00035         return ret;
00036 }
00037 #define INF HUGE_VAL
00038 #define TAU 1e-12
00039 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00040 
00041 static void print_string_stdout(const char *s)
00042 {
00043         fputs(s,stdout);
00044         fflush(stdout);
00045 }
00046 static void (*svm_print_string) (const char *) = &print_string_stdout;
00047 #if 1
00048 static void info(const char *fmt,...)
00049 {
00050         char buf[BUFSIZ];
00051         va_list ap;
00052         va_start(ap,fmt);
00053         vsprintf(buf,fmt,ap);
00054         va_end(ap);
00055         (*svm_print_string)(buf);
00056 }
00057 #else
00058 static void info(const char *fmt,...) {}
00059 #endif
00060 
00061 //
00062 // Kernel Cache
00063 //
00064 // l is the number of total data items
00065 // size is the cache size limit in bytes
00066 //
00067 class Cache
00068 {
00069 public:
00070         Cache(int l,long int size);
00071         ~Cache();
00072 
00073         // request data [0,len)
00074         // return some position p where [p,len) need to be filled
00075         // (p >= len if nothing needs to be filled)
00076         int get_data(const int index, Qfloat **data, int len);
00077         void swap_index(int i, int j);  
00078 private:
00079         int l;
00080         long int size;
00081         struct head_t
00082         {
00083                 head_t *prev, *next;    // a circular list
00084                 Qfloat *data;
00085                 int len;                // data[0,len) is cached in this entry
00086         };
00087 
00088         head_t *head;
00089         head_t lru_head;
00090         void lru_delete(head_t *h);
00091         void lru_insert(head_t *h);
00092 };
00093 
00094 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00095 {
00096         head = (head_t *)calloc(l,sizeof(head_t));      // initialized to 0
00097         size /= sizeof(Qfloat);
00098         size -= l * sizeof(head_t) / sizeof(Qfloat);
00099         size = max(size, 2 * (long int) l);     // cache must be large enough for two columns
00100         lru_head.next = lru_head.prev = &lru_head;
00101 }
00102 
00103 Cache::~Cache()
00104 {
00105         for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00106                 free(h->data);
00107         free(head);
00108 }
00109 
00110 void Cache::lru_delete(head_t *h)
00111 {
00112         // delete from current location
00113         h->prev->next = h->next;
00114         h->next->prev = h->prev;
00115 }
00116 
00117 void Cache::lru_insert(head_t *h)
00118 {
00119         // insert to last position
00120         h->next = &lru_head;
00121         h->prev = lru_head.prev;
00122         h->prev->next = h;
00123         h->next->prev = h;
00124 }
00125 
00126 int Cache::get_data(const int index, Qfloat **data, int len)
00127 {
00128         head_t *h = &head[index];
00129         if(h->len) lru_delete(h);
00130         int more = len - h->len;
00131 
00132         if(more > 0)
00133         {
00134                 // free old space
00135                 while(size < more)
00136                 {
00137                         head_t *old = lru_head.next;
00138                         lru_delete(old);
00139                         free(old->data);
00140                         size += old->len;
00141                         old->data = 0;
00142                         old->len = 0;
00143                 }
00144 
00145                 // allocate new space
00146                 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00147                 size -= more;
00148                 swap(h->len,len);
00149         }
00150 
00151         lru_insert(h);
00152         *data = h->data;
00153         return len;
00154 }
00155 
00156 void Cache::swap_index(int i, int j)
00157 {
00158         if(i==j) return;
00159 
00160         if(head[i].len) lru_delete(&head[i]);
00161         if(head[j].len) lru_delete(&head[j]);
00162         swap(head[i].data,head[j].data);
00163         swap(head[i].len,head[j].len);
00164         if(head[i].len) lru_insert(&head[i]);
00165         if(head[j].len) lru_insert(&head[j]);
00166 
00167         if(i>j) swap(i,j);
00168         for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00169         {
00170                 if(h->len > i)
00171                 {
00172                         if(h->len > j)
00173                                 swap(h->data[i],h->data[j]);
00174                         else
00175                         {
00176                                 // give up
00177                                 lru_delete(h);
00178                                 free(h->data);
00179                                 size += h->len;
00180                                 h->data = 0;
00181                                 h->len = 0;
00182                         }
00183                 }
00184         }
00185 }
00186 
00187 //
00188 // Kernel evaluation
00189 //
00190 // the static method k_function is for doing single kernel evaluation
00191 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00192 // the member function get_Q is for getting one column from the Q Matrix
00193 //
00194 class QMatrix {
00195 public:
00196         virtual Qfloat *get_Q(int column, int len) const = 0;
00197         virtual double *get_QD() const = 0;
00198         virtual void swap_index(int i, int j) const = 0;
00199         virtual ~QMatrix() {}
00200 };
00201 
00202 class Kernel: public QMatrix {
00203 public:
00204         Kernel(int l, svm_node * const * x, const svm_parameter& param);
00205         virtual ~Kernel();
00206 
00207         static double k_function(const svm_node *x, const svm_node *y,
00208                                  const svm_parameter& param);
00209         virtual Qfloat *get_Q(int column, int len) const = 0;
00210         virtual double *get_QD() const = 0;
00211         virtual void swap_index(int i, int j) const     // no so const...
00212         {
00213                 swap(x[i],x[j]);
00214                 if(x_square) swap(x_square[i],x_square[j]);
00215         }
00216 protected:
00217 
00218         double (Kernel::*kernel_function)(int i, int j) const;
00219 
00220 private:
00221         const svm_node **x;
00222         double *x_square;
00223 
00224         // svm_parameter
00225         const int kernel_type;
00226         const int degree;
00227         const double gamma;
00228         const double coef0;
00229 
00230         static double dot(const svm_node *px, const svm_node *py);
00231         double kernel_linear(int i, int j) const
00232         {
00233                 return dot(x[i],x[j]);
00234         }
00235         double kernel_poly(int i, int j) const
00236         {
00237                 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00238         }
00239         double kernel_rbf(int i, int j) const
00240         {
00241                 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00242         }
00243         double kernel_sigmoid(int i, int j) const
00244         {
00245                 return tanh(gamma*dot(x[i],x[j])+coef0);
00246         }
00247         double kernel_precomputed(int i, int j) const
00248         {
00249                 return x[i][(int)(x[j][0].value)].value;
00250         }
00251 };
00252 
00253 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00254 :kernel_type(param.kernel_type), degree(param.degree),
00255  gamma(param.gamma), coef0(param.coef0)
00256 {
00257         switch(kernel_type)
00258         {
00259                 case LINEAR:
00260                         kernel_function = &Kernel::kernel_linear;
00261                         break;
00262                 case POLY:
00263                         kernel_function = &Kernel::kernel_poly;
00264                         break;
00265                 case RBF:
00266                         kernel_function = &Kernel::kernel_rbf;
00267                         break;
00268                 case SIGMOID:
00269                         kernel_function = &Kernel::kernel_sigmoid;
00270                         break;
00271                 case PRECOMPUTED:
00272                         kernel_function = &Kernel::kernel_precomputed;
00273                         break;
00274         }
00275 
00276         clone(x,x_,l);
00277 
00278         if(kernel_type == RBF)
00279         {
00280                 x_square = new double[l];
00281                 for(int i=0;i<l;i++)
00282                         x_square[i] = dot(x[i],x[i]);
00283         }
00284         else
00285                 x_square = 0;
00286 }
00287 
00288 Kernel::~Kernel()
00289 {
00290         delete[] x;
00291         delete[] x_square;
00292 }
00293 
00294 double Kernel::dot(const svm_node *px, const svm_node *py)
00295 {
00296         double sum = 0;
00297         while(px->index != -1 && py->index != -1)
00298         {
00299                 if(px->index == py->index)
00300                 {
00301                         sum += px->value * py->value;
00302                         ++px;
00303                         ++py;
00304                 }
00305                 else
00306                 {
00307                         if(px->index > py->index)
00308                                 ++py;
00309                         else
00310                                 ++px;
00311                 }                       
00312         }
00313         return sum;
00314 }
00315 
00316 double Kernel::k_function(const svm_node *x, const svm_node *y,
00317                           const svm_parameter& param)
00318 {
00319         switch(param.kernel_type)
00320         {
00321                 case LINEAR:
00322                         return dot(x,y);
00323                 case POLY:
00324                         return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00325                 case RBF:
00326                 {
00327                         double sum = 0;
00328                         while(x->index != -1 && y->index !=-1)
00329                         {
00330                                 if(x->index == y->index)
00331                                 {
00332                                         double d = x->value - y->value;
00333                                         sum += d*d;
00334                                         ++x;
00335                                         ++y;
00336                                 }
00337                                 else
00338                                 {
00339                                         if(x->index > y->index)
00340                                         {       
00341                                                 sum += y->value * y->value;
00342                                                 ++y;
00343                                         }
00344                                         else
00345                                         {
00346                                                 sum += x->value * x->value;
00347                                                 ++x;
00348                                         }
00349                                 }
00350                         }
00351 
00352                         while(x->index != -1)
00353                         {
00354                                 sum += x->value * x->value;
00355                                 ++x;
00356                         }
00357 
00358                         while(y->index != -1)
00359                         {
00360                                 sum += y->value * y->value;
00361                                 ++y;
00362                         }
00363                         
00364                         return exp(-param.gamma*sum);
00365                 }
00366                 case SIGMOID:
00367                         return tanh(param.gamma*dot(x,y)+param.coef0);
00368                 case PRECOMPUTED:  //x: test (validation), y: SV
00369                         return x[(int)(y->value)].value;
00370                 default:
00371                         return 0;  // Unreachable 
00372         }
00373 }
00374 
00375 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
00376 // Solves:
00377 //
00378 //      min 0.5(\alpha^T Q \alpha) + p^T \alpha
00379 //
00380 //              y^T \alpha = \delta
00381 //              y_i = +1 or -1
00382 //              0 <= alpha_i <= Cp for y_i = 1
00383 //              0 <= alpha_i <= Cn for y_i = -1
00384 //
00385 // Given:
00386 //
00387 //      Q, p, y, Cp, Cn, and an initial feasible point \alpha
00388 //      l is the size of vectors and matrices
00389 //      eps is the stopping tolerance
00390 //
00391 // solution will be put in \alpha, objective value will be put in obj
00392 //
00393 class Solver {
00394 public:
00395         Solver() {};
00396         virtual ~Solver() {};
00397 
00398         struct SolutionInfo {
00399                 double obj;
00400                 double rho;
00401                 double upper_bound_p;
00402                 double upper_bound_n;
00403                 double r;       // for Solver_NU
00404         };
00405 
00406         void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00407                    double *alpha_, double Cp, double Cn, double eps,
00408                    SolutionInfo* si, int shrinking);
00409 protected:
00410         int active_size;
00411         schar *y;
00412         double *G;              // gradient of objective function
00413         enum { LOWER_BOUND, UPPER_BOUND, FREE };
00414         char *alpha_status;     // LOWER_BOUND, UPPER_BOUND, FREE
00415         double *alpha;
00416         const QMatrix *Q;
00417         const double *QD;
00418         double eps;
00419         double Cp,Cn;
00420         double *p;
00421         int *active_set;
00422         double *G_bar;          // gradient, if we treat free variables as 0
00423         int l;
00424         bool unshrink;  // XXX
00425 
00426         double get_C(int i)
00427         {
00428                 return (y[i] > 0)? Cp : Cn;
00429         }
00430         void update_alpha_status(int i)
00431         {
00432                 if(alpha[i] >= get_C(i))
00433                         alpha_status[i] = UPPER_BOUND;
00434                 else if(alpha[i] <= 0)
00435                         alpha_status[i] = LOWER_BOUND;
00436                 else alpha_status[i] = FREE;
00437         }
00438         bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00439         bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00440         bool is_free(int i) { return alpha_status[i] == FREE; }
00441         void swap_index(int i, int j);
00442         void reconstruct_gradient();
00443         virtual int select_working_set(int &i, int &j);
00444         virtual double calculate_rho();
00445         virtual void do_shrinking();
00446 private:
00447         bool be_shrunk(int i, double Gmax1, double Gmax2);      
00448 };
00449 
00450 void Solver::swap_index(int i, int j)
00451 {
00452         Q->swap_index(i,j);
00453         swap(y[i],y[j]);
00454         swap(G[i],G[j]);
00455         swap(alpha_status[i],alpha_status[j]);
00456         swap(alpha[i],alpha[j]);
00457         swap(p[i],p[j]);
00458         swap(active_set[i],active_set[j]);
00459         swap(G_bar[i],G_bar[j]);
00460 }
00461 
00462 void Solver::reconstruct_gradient()
00463 {
00464         // reconstruct inactive elements of G from G_bar and free variables
00465 
00466         if(active_size == l) return;
00467 
00468         int i,j;
00469         int nr_free = 0;
00470 
00471         for(j=active_size;j<l;j++)
00472                 G[j] = G_bar[j] + p[j];
00473 
00474         for(j=0;j<active_size;j++)
00475                 if(is_free(j))
00476                         nr_free++;
00477 
00478         if(2*nr_free < active_size)
00479                 info("\nWarning: using -h 0 may be faster\n");
00480 
00481         if (nr_free*l > 2*active_size*(l-active_size))
00482         {
00483                 for(i=active_size;i<l;i++)
00484                 {
00485                         const Qfloat *Q_i = Q->get_Q(i,active_size);
00486                         for(j=0;j<active_size;j++)
00487                                 if(is_free(j))
00488                                         G[i] += alpha[j] * Q_i[j];
00489                 }
00490         }
00491         else
00492         {
00493                 for(i=0;i<active_size;i++)
00494                         if(is_free(i))
00495                         {
00496                                 const Qfloat *Q_i = Q->get_Q(i,l);
00497                                 double alpha_i = alpha[i];
00498                                 for(j=active_size;j<l;j++)
00499                                         G[j] += alpha_i * Q_i[j];
00500                         }
00501         }
00502 }
00503 
00504 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00505                    double *alpha_, double Cp, double Cn, double eps,
00506                    SolutionInfo* si, int shrinking)
00507 {
00508         this->l = l;
00509         this->Q = &Q;
00510         QD=Q.get_QD();
00511         clone(p, p_,l);
00512         clone(y, y_,l);
00513         clone(alpha,alpha_,l);
00514         this->Cp = Cp;
00515         this->Cn = Cn;
00516         this->eps = eps;
00517         unshrink = false;
00518 
00519         // initialize alpha_status
00520         {
00521                 alpha_status = new char[l];
00522                 for(int i=0;i<l;i++)
00523                         update_alpha_status(i);
00524         }
00525 
00526         // initialize active set (for shrinking)
00527         {
00528                 active_set = new int[l];
00529                 for(int i=0;i<l;i++)
00530                         active_set[i] = i;
00531                 active_size = l;
00532         }
00533 
00534         // initialize gradient
00535         {
00536                 G = new double[l];
00537                 G_bar = new double[l];
00538                 int i;
00539                 for(i=0;i<l;i++)
00540                 {
00541                         G[i] = p[i];
00542                         G_bar[i] = 0;
00543                 }
00544                 for(i=0;i<l;i++)
00545                         if(!is_lower_bound(i))
00546                         {
00547                                 const Qfloat *Q_i = Q.get_Q(i,l);
00548                                 double alpha_i = alpha[i];
00549                                 int j;
00550                                 for(j=0;j<l;j++)
00551                                         G[j] += alpha_i*Q_i[j];
00552                                 if(is_upper_bound(i))
00553                                         for(j=0;j<l;j++)
00554                                                 G_bar[j] += get_C(i) * Q_i[j];
00555                         }
00556         }
00557 
00558         // optimization step
00559 
00560         int iter = 0;
00561         int counter = min(l,1000)+1;
00562 
00563         while(1)
00564         {
00565                 // show progress and do shrinking
00566 
00567                 if(--counter == 0)
00568                 {
00569                         counter = min(l,1000);
00570                         if(shrinking) do_shrinking();
00571                         info(".");
00572                 }
00573 
00574                 int i,j;
00575                 if(select_working_set(i,j)!=0)
00576                 {
00577                         // reconstruct the whole gradient
00578                         reconstruct_gradient();
00579                         // reset active set size and check
00580                         active_size = l;
00581                         info("*");
00582                         if(select_working_set(i,j)!=0)
00583                                 break;
00584                         else
00585                                 counter = 1;    // do shrinking next iteration
00586                 }
00587                 
00588                 ++iter;
00589 
00590                 // update alpha[i] and alpha[j], handle bounds carefully
00591                 
00592                 const Qfloat *Q_i = Q.get_Q(i,active_size);
00593                 const Qfloat *Q_j = Q.get_Q(j,active_size);
00594 
00595                 double C_i = get_C(i);
00596                 double C_j = get_C(j);
00597 
00598                 double old_alpha_i = alpha[i];
00599                 double old_alpha_j = alpha[j];
00600 
00601                 if(y[i]!=y[j])
00602                 {
00603                         double quad_coef = QD[i]+QD[j]+2*Q_i[j];
00604                         if (quad_coef <= 0)
00605                                 quad_coef = TAU;
00606                         double delta = (-G[i]-G[j])/quad_coef;
00607                         double diff = alpha[i] - alpha[j];
00608                         alpha[i] += delta;
00609                         alpha[j] += delta;
00610                         
00611                         if(diff > 0)
00612                         {
00613                                 if(alpha[j] < 0)
00614                                 {
00615                                         alpha[j] = 0;
00616                                         alpha[i] = diff;
00617                                 }
00618                         }
00619                         else
00620                         {
00621                                 if(alpha[i] < 0)
00622                                 {
00623                                         alpha[i] = 0;
00624                                         alpha[j] = -diff;
00625                                 }
00626                         }
00627                         if(diff > C_i - C_j)
00628                         {
00629                                 if(alpha[i] > C_i)
00630                                 {
00631                                         alpha[i] = C_i;
00632                                         alpha[j] = C_i - diff;
00633                                 }
00634                         }
00635                         else
00636                         {
00637                                 if(alpha[j] > C_j)
00638                                 {
00639                                         alpha[j] = C_j;
00640                                         alpha[i] = C_j + diff;
00641                                 }
00642                         }
00643                 }
00644                 else
00645                 {
00646                         double quad_coef = QD[i]+QD[j]-2*Q_i[j];
00647                         if (quad_coef <= 0)
00648                                 quad_coef = TAU;
00649                         double delta = (G[i]-G[j])/quad_coef;
00650                         double sum = alpha[i] + alpha[j];
00651                         alpha[i] -= delta;
00652                         alpha[j] += delta;
00653 
00654                         if(sum > C_i)
00655                         {
00656                                 if(alpha[i] > C_i)
00657                                 {
00658                                         alpha[i] = C_i;
00659                                         alpha[j] = sum - C_i;
00660                                 }
00661                         }
00662                         else
00663                         {
00664                                 if(alpha[j] < 0)
00665                                 {
00666                                         alpha[j] = 0;
00667                                         alpha[i] = sum;
00668                                 }
00669                         }
00670                         if(sum > C_j)
00671                         {
00672                                 if(alpha[j] > C_j)
00673                                 {
00674                                         alpha[j] = C_j;
00675                                         alpha[i] = sum - C_j;
00676                                 }
00677                         }
00678                         else
00679                         {
00680                                 if(alpha[i] < 0)
00681                                 {
00682                                         alpha[i] = 0;
00683                                         alpha[j] = sum;
00684                                 }
00685                         }
00686                 }
00687 
00688                 // update G
00689 
00690                 double delta_alpha_i = alpha[i] - old_alpha_i;
00691                 double delta_alpha_j = alpha[j] - old_alpha_j;
00692                 
00693                 for(int k=0;k<active_size;k++)
00694                 {
00695                         G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00696                 }
00697 
00698                 // update alpha_status and G_bar
00699 
00700                 {
00701                         bool ui = is_upper_bound(i);
00702                         bool uj = is_upper_bound(j);
00703                         update_alpha_status(i);
00704                         update_alpha_status(j);
00705                         int k;
00706                         if(ui != is_upper_bound(i))
00707                         {
00708                                 Q_i = Q.get_Q(i,l);
00709                                 if(ui)
00710                                         for(k=0;k<l;k++)
00711                                                 G_bar[k] -= C_i * Q_i[k];
00712                                 else
00713                                         for(k=0;k<l;k++)
00714                                                 G_bar[k] += C_i * Q_i[k];
00715                         }
00716 
00717                         if(uj != is_upper_bound(j))
00718                         {
00719                                 Q_j = Q.get_Q(j,l);
00720                                 if(uj)
00721                                         for(k=0;k<l;k++)
00722                                                 G_bar[k] -= C_j * Q_j[k];
00723                                 else
00724                                         for(k=0;k<l;k++)
00725                                                 G_bar[k] += C_j * Q_j[k];
00726                         }
00727                 }
00728         }
00729 
00730         // calculate rho
00731 
00732         si->rho = calculate_rho();
00733 
00734         // calculate objective value
00735         {
00736                 double v = 0;
00737                 int i;
00738                 for(i=0;i<l;i++)
00739                         v += alpha[i] * (G[i] + p[i]);
00740 
00741                 si->obj = v/2;
00742         }
00743 
00744         // put back the solution
00745         {
00746                 for(int i=0;i<l;i++)
00747                         alpha_[active_set[i]] = alpha[i];
00748         }
00749 
00750         // juggle everything back
00751         /*{
00752                 for(int i=0;i<l;i++)
00753                         while(active_set[i] != i)
00754                                 swap_index(i,active_set[i]);
00755                                 // or Q.swap_index(i,active_set[i]);
00756         }*/
00757 
00758         si->upper_bound_p = Cp;
00759         si->upper_bound_n = Cn;
00760 
00761         info("\noptimization finished, #iter = %d\n",iter);
00762 
00763         delete[] p;
00764         delete[] y;
00765         delete[] alpha;
00766         delete[] alpha_status;
00767         delete[] active_set;
00768         delete[] G;
00769         delete[] G_bar;
00770 }
00771 
00772 // return 1 if already optimal, return 0 otherwise
00773 int Solver::select_working_set(int &out_i, int &out_j)
00774 {
00775         // return i,j such that
00776         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00777         // j: minimizes the decrease of obj value
00778         //    (if quadratic coefficeint <= 0, replace it with tau)
00779         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00780         
00781         double Gmax = -INF;
00782         double Gmax2 = -INF;
00783         int Gmax_idx = -1;
00784         int Gmin_idx = -1;
00785         double obj_diff_min = INF;
00786 
00787         for(int t=0;t<active_size;t++)
00788                 if(y[t]==+1)    
00789                 {
00790                         if(!is_upper_bound(t))
00791                                 if(-G[t] >= Gmax)
00792                                 {
00793                                         Gmax = -G[t];
00794                                         Gmax_idx = t;
00795                                 }
00796                 }
00797                 else
00798                 {
00799                         if(!is_lower_bound(t))
00800                                 if(G[t] >= Gmax)
00801                                 {
00802                                         Gmax = G[t];
00803                                         Gmax_idx = t;
00804                                 }
00805                 }
00806 
00807         int i = Gmax_idx;
00808         const Qfloat *Q_i = NULL;
00809         if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00810                 Q_i = Q->get_Q(i,active_size);
00811 
00812         for(int j=0;j<active_size;j++)
00813         {
00814                 if(y[j]==+1)
00815                 {
00816                         if (!is_lower_bound(j))
00817                         {
00818                                 double grad_diff=Gmax+G[j];
00819                                 if (G[j] >= Gmax2)
00820                                         Gmax2 = G[j];
00821                                 if (grad_diff > 0)
00822                                 {
00823                                         double obj_diff; 
00824                                         double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
00825                                         if (quad_coef > 0)
00826                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00827                                         else
00828                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00829 
00830                                         if (obj_diff <= obj_diff_min)
00831                                         {
00832                                                 Gmin_idx=j;
00833                                                 obj_diff_min = obj_diff;
00834                                         }
00835                                 }
00836                         }
00837                 }
00838                 else
00839                 {
00840                         if (!is_upper_bound(j))
00841                         {
00842                                 double grad_diff= Gmax-G[j];
00843                                 if (-G[j] >= Gmax2)
00844                                         Gmax2 = -G[j];
00845                                 if (grad_diff > 0)
00846                                 {
00847                                         double obj_diff; 
00848                                         double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
00849                                         if (quad_coef > 0)
00850                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00851                                         else
00852                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00853 
00854                                         if (obj_diff <= obj_diff_min)
00855                                         {
00856                                                 Gmin_idx=j;
00857                                                 obj_diff_min = obj_diff;
00858                                         }
00859                                 }
00860                         }
00861                 }
00862         }
00863 
00864         if(Gmax+Gmax2 < eps)
00865                 return 1;
00866 
00867         out_i = Gmax_idx;
00868         out_j = Gmin_idx;
00869         return 0;
00870 }
00871 
00872 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
00873 {
00874         if(is_upper_bound(i))
00875         {
00876                 if(y[i]==+1)
00877                         return(-G[i] > Gmax1);
00878                 else
00879                         return(-G[i] > Gmax2);
00880         }
00881         else if(is_lower_bound(i))
00882         {
00883                 if(y[i]==+1)
00884                         return(G[i] > Gmax2);
00885                 else    
00886                         return(G[i] > Gmax1);
00887         }
00888         else
00889                 return(false);
00890 }
00891 
00892 void Solver::do_shrinking()
00893 {
00894         int i;
00895         double Gmax1 = -INF;            // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00896         double Gmax2 = -INF;            // max { y_i * grad(f)_i | i in I_low(\alpha) }
00897 
00898         // find maximal violating pair first
00899         for(i=0;i<active_size;i++)
00900         {
00901                 if(y[i]==+1)    
00902                 {
00903                         if(!is_upper_bound(i))  
00904                         {
00905                                 if(-G[i] >= Gmax1)
00906                                         Gmax1 = -G[i];
00907                         }
00908                         if(!is_lower_bound(i))  
00909                         {
00910                                 if(G[i] >= Gmax2)
00911                                         Gmax2 = G[i];
00912                         }
00913                 }
00914                 else    
00915                 {
00916                         if(!is_upper_bound(i))  
00917                         {
00918                                 if(-G[i] >= Gmax2)
00919                                         Gmax2 = -G[i];
00920                         }
00921                         if(!is_lower_bound(i))  
00922                         {
00923                                 if(G[i] >= Gmax1)
00924                                         Gmax1 = G[i];
00925                         }
00926                 }
00927         }
00928 
00929         if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 
00930         {
00931                 unshrink = true;
00932                 reconstruct_gradient();
00933                 active_size = l;
00934                 info("*");
00935         }
00936 
00937         for(i=0;i<active_size;i++)
00938                 if (be_shrunk(i, Gmax1, Gmax2))
00939                 {
00940                         active_size--;
00941                         while (active_size > i)
00942                         {
00943                                 if (!be_shrunk(active_size, Gmax1, Gmax2))
00944                                 {
00945                                         swap_index(i,active_size);
00946                                         break;
00947                                 }
00948                                 active_size--;
00949                         }
00950                 }
00951 }
00952 
00953 double Solver::calculate_rho()
00954 {
00955         double r;
00956         int nr_free = 0;
00957         double ub = INF, lb = -INF, sum_free = 0;
00958         for(int i=0;i<active_size;i++)
00959         {
00960                 double yG = y[i]*G[i];
00961 
00962                 if(is_upper_bound(i))
00963                 {
00964                         if(y[i]==-1)
00965                                 ub = min(ub,yG);
00966                         else
00967                                 lb = max(lb,yG);
00968                 }
00969                 else if(is_lower_bound(i))
00970                 {
00971                         if(y[i]==+1)
00972                                 ub = min(ub,yG);
00973                         else
00974                                 lb = max(lb,yG);
00975                 }
00976                 else
00977                 {
00978                         ++nr_free;
00979                         sum_free += yG;
00980                 }
00981         }
00982 
00983         if(nr_free>0)
00984                 r = sum_free/nr_free;
00985         else
00986                 r = (ub+lb)/2;
00987 
00988         return r;
00989 }
00990 
00991 //
00992 // Solver for nu-svm classification and regression
00993 //
00994 // additional constraint: e^T \alpha = constant
00995 //
00996 class Solver_NU : public Solver
00997 {
00998 public:
00999         Solver_NU() {}
01000         void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01001                    double *alpha, double Cp, double Cn, double eps,
01002                    SolutionInfo* si, int shrinking)
01003         {
01004                 this->si = si;
01005                 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01006         }
01007 private:
01008         SolutionInfo *si;
01009         int select_working_set(int &i, int &j);
01010         double calculate_rho();
01011         bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01012         void do_shrinking();
01013 };
01014 
01015 // return 1 if already optimal, return 0 otherwise
01016 int Solver_NU::select_working_set(int &out_i, int &out_j)
01017 {
01018         // return i,j such that y_i = y_j and
01019         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01020         // j: minimizes the decrease of obj value
01021         //    (if quadratic coefficeint <= 0, replace it with tau)
01022         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01023 
01024         double Gmaxp = -INF;
01025         double Gmaxp2 = -INF;
01026         int Gmaxp_idx = -1;
01027 
01028         double Gmaxn = -INF;
01029         double Gmaxn2 = -INF;
01030         int Gmaxn_idx = -1;
01031 
01032         int Gmin_idx = -1;
01033         double obj_diff_min = INF;
01034 
01035         for(int t=0;t<active_size;t++)
01036                 if(y[t]==+1)
01037                 {
01038                         if(!is_upper_bound(t))
01039                                 if(-G[t] >= Gmaxp)
01040                                 {
01041                                         Gmaxp = -G[t];
01042                                         Gmaxp_idx = t;
01043                                 }
01044                 }
01045                 else
01046                 {
01047                         if(!is_lower_bound(t))
01048                                 if(G[t] >= Gmaxn)
01049                                 {
01050                                         Gmaxn = G[t];
01051                                         Gmaxn_idx = t;
01052                                 }
01053                 }
01054 
01055         int ip = Gmaxp_idx;
01056         int in = Gmaxn_idx;
01057         const Qfloat *Q_ip = NULL;
01058         const Qfloat *Q_in = NULL;
01059         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01060                 Q_ip = Q->get_Q(ip,active_size);
01061         if(in != -1)
01062                 Q_in = Q->get_Q(in,active_size);
01063 
01064         for(int j=0;j<active_size;j++)
01065         {
01066                 if(y[j]==+1)
01067                 {
01068                         if (!is_lower_bound(j)) 
01069                         {
01070                                 double grad_diff=Gmaxp+G[j];
01071                                 if (G[j] >= Gmaxp2)
01072                                         Gmaxp2 = G[j];
01073                                 if (grad_diff > 0)
01074                                 {
01075                                         double obj_diff; 
01076                                         double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
01077                                         if (quad_coef > 0)
01078                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01079                                         else
01080                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01081 
01082                                         if (obj_diff <= obj_diff_min)
01083                                         {
01084                                                 Gmin_idx=j;
01085                                                 obj_diff_min = obj_diff;
01086                                         }
01087                                 }
01088                         }
01089                 }
01090                 else
01091                 {
01092                         if (!is_upper_bound(j))
01093                         {
01094                                 double grad_diff=Gmaxn-G[j];
01095                                 if (-G[j] >= Gmaxn2)
01096                                         Gmaxn2 = -G[j];
01097                                 if (grad_diff > 0)
01098                                 {
01099                                         double obj_diff; 
01100                                         double quad_coef = QD[in]+QD[j]-2*Q_in[j];
01101                                         if (quad_coef > 0)
01102                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01103                                         else
01104                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01105 
01106                                         if (obj_diff <= obj_diff_min)
01107                                         {
01108                                                 Gmin_idx=j;
01109                                                 obj_diff_min = obj_diff;
01110                                         }
01111                                 }
01112                         }
01113                 }
01114         }
01115 
01116         if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01117                 return 1;
01118 
01119         if (y[Gmin_idx] == +1)
01120                 out_i = Gmaxp_idx;
01121         else
01122                 out_i = Gmaxn_idx;
01123         out_j = Gmin_idx;
01124 
01125         return 0;
01126 }
01127 
01128 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01129 {
01130         if(is_upper_bound(i))
01131         {
01132                 if(y[i]==+1)
01133                         return(-G[i] > Gmax1);
01134                 else    
01135                         return(-G[i] > Gmax4);
01136         }
01137         else if(is_lower_bound(i))
01138         {
01139                 if(y[i]==+1)
01140                         return(G[i] > Gmax2);
01141                 else    
01142                         return(G[i] > Gmax3);
01143         }
01144         else
01145                 return(false);
01146 }
01147 
01148 void Solver_NU::do_shrinking()
01149 {
01150         double Gmax1 = -INF;    // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01151         double Gmax2 = -INF;    // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01152         double Gmax3 = -INF;    // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01153         double Gmax4 = -INF;    // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01154 
01155         // find maximal violating pair first
01156         int i;
01157         for(i=0;i<active_size;i++)
01158         {
01159                 if(!is_upper_bound(i))
01160                 {
01161                         if(y[i]==+1)
01162                         {
01163                                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01164                         }
01165                         else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01166                 }
01167                 if(!is_lower_bound(i))
01168                 {
01169                         if(y[i]==+1)
01170                         {       
01171                                 if(G[i] > Gmax2) Gmax2 = G[i];
01172                         }
01173                         else    if(G[i] > Gmax3) Gmax3 = G[i];
01174                 }
01175         }
01176 
01177         if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 
01178         {
01179                 unshrink = true;
01180                 reconstruct_gradient();
01181                 active_size = l;
01182         }
01183 
01184         for(i=0;i<active_size;i++)
01185                 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01186                 {
01187                         active_size--;
01188                         while (active_size > i)
01189                         {
01190                                 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01191                                 {
01192                                         swap_index(i,active_size);
01193                                         break;
01194                                 }
01195                                 active_size--;
01196                         }
01197                 }
01198 }
01199 
01200 double Solver_NU::calculate_rho()
01201 {
01202         int nr_free1 = 0,nr_free2 = 0;
01203         double ub1 = INF, ub2 = INF;
01204         double lb1 = -INF, lb2 = -INF;
01205         double sum_free1 = 0, sum_free2 = 0;
01206 
01207         for(int i=0;i<active_size;i++)
01208         {
01209                 if(y[i]==+1)
01210                 {
01211                         if(is_upper_bound(i))
01212                                 lb1 = max(lb1,G[i]);
01213                         else if(is_lower_bound(i))
01214                                 ub1 = min(ub1,G[i]);
01215                         else
01216                         {
01217                                 ++nr_free1;
01218                                 sum_free1 += G[i];
01219                         }
01220                 }
01221                 else
01222                 {
01223                         if(is_upper_bound(i))
01224                                 lb2 = max(lb2,G[i]);
01225                         else if(is_lower_bound(i))
01226                                 ub2 = min(ub2,G[i]);
01227                         else
01228                         {
01229                                 ++nr_free2;
01230                                 sum_free2 += G[i];
01231                         }
01232                 }
01233         }
01234 
01235         double r1,r2;
01236         if(nr_free1 > 0)
01237                 r1 = sum_free1/nr_free1;
01238         else
01239                 r1 = (ub1+lb1)/2;
01240         
01241         if(nr_free2 > 0)
01242                 r2 = sum_free2/nr_free2;
01243         else
01244                 r2 = (ub2+lb2)/2;
01245         
01246         si->r = (r1+r2)/2;
01247         return (r1-r2)/2;
01248 }
01249 
01250 //
01251 // Q matrices for various formulations
01252 //
01253 class SVC_Q: public Kernel
01254 { 
01255 public:
01256         SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01257         :Kernel(prob.l, prob.x, param)
01258         {
01259                 clone(y,y_,prob.l);
01260                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01261                 QD = new double[prob.l];
01262                 for(int i=0;i<prob.l;i++)
01263                         QD[i] = (this->*kernel_function)(i,i);
01264         }
01265         
01266         Qfloat *get_Q(int i, int len) const
01267         {
01268                 Qfloat *data;
01269                 int start, j;
01270                 if((start = cache->get_data(i,&data,len)) < len)
01271                 {
01272                         for(j=start;j<len;j++)
01273                                 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01274                 }
01275                 return data;
01276         }
01277 
01278         double *get_QD() const
01279         {
01280                 return QD;
01281         }
01282 
01283         void swap_index(int i, int j) const
01284         {
01285                 cache->swap_index(i,j);
01286                 Kernel::swap_index(i,j);
01287                 swap(y[i],y[j]);
01288                 swap(QD[i],QD[j]);
01289         }
01290 
01291         ~SVC_Q()
01292         {
01293                 delete[] y;
01294                 delete cache;
01295                 delete[] QD;
01296         }
01297 private:
01298         schar *y;
01299         Cache *cache;
01300         double *QD;
01301 };
01302 
01303 class ONE_CLASS_Q: public Kernel
01304 {
01305 public:
01306         ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01307         :Kernel(prob.l, prob.x, param)
01308         {
01309                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01310                 QD = new double[prob.l];
01311                 for(int i=0;i<prob.l;i++)
01312                         QD[i] = (this->*kernel_function)(i,i);
01313         }
01314         
01315         Qfloat *get_Q(int i, int len) const
01316         {
01317                 Qfloat *data;
01318                 int start, j;
01319                 if((start = cache->get_data(i,&data,len)) < len)
01320                 {
01321                         for(j=start;j<len;j++)
01322                                 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01323                 }
01324                 return data;
01325         }
01326 
01327         double *get_QD() const
01328         {
01329                 return QD;
01330         }
01331 
01332         void swap_index(int i, int j) const
01333         {
01334                 cache->swap_index(i,j);
01335                 Kernel::swap_index(i,j);
01336                 swap(QD[i],QD[j]);
01337         }
01338 
01339         ~ONE_CLASS_Q()
01340         {
01341                 delete cache;
01342                 delete[] QD;
01343         }
01344 private:
01345         Cache *cache;
01346         double *QD;
01347 };
01348 
01349 class SVR_Q: public Kernel
01350 { 
01351 public:
01352         SVR_Q(const svm_problem& prob, const svm_parameter& param)
01353         :Kernel(prob.l, prob.x, param)
01354         {
01355                 l = prob.l;
01356                 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01357                 QD = new double[2*l];
01358                 sign = new schar[2*l];
01359                 index = new int[2*l];
01360                 for(int k=0;k<l;k++)
01361                 {
01362                         sign[k] = 1;
01363                         sign[k+l] = -1;
01364                         index[k] = k;
01365                         index[k+l] = k;
01366                         QD[k] = (this->*kernel_function)(k,k);
01367                         QD[k+l] = QD[k];
01368                 }
01369                 buffer[0] = new Qfloat[2*l];
01370                 buffer[1] = new Qfloat[2*l];
01371                 next_buffer = 0;
01372         }
01373 
01374         void swap_index(int i, int j) const
01375         {
01376                 swap(sign[i],sign[j]);
01377                 swap(index[i],index[j]);
01378                 swap(QD[i],QD[j]);
01379         }
01380         
01381         Qfloat *get_Q(int i, int len) const
01382         {
01383                 Qfloat *data;
01384                 int j, real_i = index[i];
01385                 if(cache->get_data(real_i,&data,l) < l)
01386                 {
01387                         for(j=0;j<l;j++)
01388                                 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01389                 }
01390 
01391                 // reorder and copy
01392                 Qfloat *buf = buffer[next_buffer];
01393                 next_buffer = 1 - next_buffer;
01394                 schar si = sign[i];
01395                 for(j=0;j<len;j++)
01396                         buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
01397                 return buf;
01398         }
01399 
01400         double *get_QD() const
01401         {
01402                 return QD;
01403         }
01404 
01405         ~SVR_Q()
01406         {
01407                 delete cache;
01408                 delete[] sign;
01409                 delete[] index;
01410                 delete[] buffer[0];
01411                 delete[] buffer[1];
01412                 delete[] QD;
01413         }
01414 private:
01415         int l;
01416         Cache *cache;
01417         schar *sign;
01418         int *index;
01419         mutable int next_buffer;
01420         Qfloat *buffer[2];
01421         double *QD;
01422 };
01423 
01424 //
01425 // construct and solve various formulations
01426 //
01427 static void solve_c_svc(
01428         const svm_problem *prob, const svm_parameter* param,
01429         double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01430 {
01431         int l = prob->l;
01432         double *minus_ones = new double[l];
01433         schar *y = new schar[l];
01434 
01435         int i;
01436 
01437         for(i=0;i<l;i++)
01438         {
01439                 alpha[i] = 0;
01440                 minus_ones[i] = -1;
01441                 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
01442         }
01443 
01444         Solver s;
01445         s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01446                 alpha, Cp, Cn, param->eps, si, param->shrinking);
01447 
01448         double sum_alpha=0;
01449         for(i=0;i<l;i++)
01450                 sum_alpha += alpha[i];
01451 
01452         if (Cp==Cn)
01453                 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01454 
01455         for(i=0;i<l;i++)
01456                 alpha[i] *= y[i];
01457 
01458         delete[] minus_ones;
01459         delete[] y;
01460 }
01461 
01462 static void solve_nu_svc(
01463         const svm_problem *prob, const svm_parameter *param,
01464         double *alpha, Solver::SolutionInfo* si)
01465 {
01466         int i;
01467         int l = prob->l;
01468         double nu = param->nu;
01469 
01470         schar *y = new schar[l];
01471 
01472         for(i=0;i<l;i++)
01473                 if(prob->y[i]>0)
01474                         y[i] = +1;
01475                 else
01476                         y[i] = -1;
01477 
01478         double sum_pos = nu*l/2;
01479         double sum_neg = nu*l/2;
01480 
01481         for(i=0;i<l;i++)
01482                 if(y[i] == +1)
01483                 {
01484                         alpha[i] = min(1.0,sum_pos);
01485                         sum_pos -= alpha[i];
01486                 }
01487                 else
01488                 {
01489                         alpha[i] = min(1.0,sum_neg);
01490                         sum_neg -= alpha[i];
01491                 }
01492 
01493         double *zeros = new double[l];
01494 
01495         for(i=0;i<l;i++)
01496                 zeros[i] = 0;
01497 
01498         Solver_NU s;
01499         s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01500                 alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01501         double r = si->r;
01502 
01503         info("C = %f\n",1/r);
01504 
01505         for(i=0;i<l;i++)
01506                 alpha[i] *= y[i]/r;
01507 
01508         si->rho /= r;
01509         si->obj /= (r*r);
01510         si->upper_bound_p = 1/r;
01511         si->upper_bound_n = 1/r;
01512 
01513         delete[] y;
01514         delete[] zeros;
01515 }
01516 
01517 static void solve_one_class(
01518         const svm_problem *prob, const svm_parameter *param,
01519         double *alpha, Solver::SolutionInfo* si)
01520 {
01521         int l = prob->l;
01522         double *zeros = new double[l];
01523         schar *ones = new schar[l];
01524         int i;
01525 
01526         int n = (int)(param->nu*prob->l);       // # of alpha's at upper bound
01527 
01528         for(i=0;i<n;i++)
01529                 alpha[i] = 1;
01530         if(n<prob->l)
01531                 alpha[n] = param->nu * prob->l - n;
01532         for(i=n+1;i<l;i++)
01533                 alpha[i] = 0;
01534 
01535         for(i=0;i<l;i++)
01536         {
01537                 zeros[i] = 0;
01538                 ones[i] = 1;
01539         }
01540 
01541         Solver s;
01542         s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01543                 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01544 
01545         delete[] zeros;
01546         delete[] ones;
01547 }
01548 
01549 static void solve_epsilon_svr(
01550         const svm_problem *prob, const svm_parameter *param,
01551         double *alpha, Solver::SolutionInfo* si)
01552 {
01553         int l = prob->l;
01554         double *alpha2 = new double[2*l];
01555         double *linear_term = new double[2*l];
01556         schar *y = new schar[2*l];
01557         int i;
01558 
01559         for(i=0;i<l;i++)
01560         {
01561                 alpha2[i] = 0;
01562                 linear_term[i] = param->p - prob->y[i];
01563                 y[i] = 1;
01564 
01565                 alpha2[i+l] = 0;
01566                 linear_term[i+l] = param->p + prob->y[i];
01567                 y[i+l] = -1;
01568         }
01569 
01570         Solver s;
01571         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01572                 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01573 
01574         double sum_alpha = 0;
01575         for(i=0;i<l;i++)
01576         {
01577                 alpha[i] = alpha2[i] - alpha2[i+l];
01578                 sum_alpha += fabs(alpha[i]);
01579         }
01580         info("nu = %f\n",sum_alpha/(param->C*l));
01581 
01582         delete[] alpha2;
01583         delete[] linear_term;
01584         delete[] y;
01585 }
01586 
01587 static void solve_nu_svr(
01588         const svm_problem *prob, const svm_parameter *param,
01589         double *alpha, Solver::SolutionInfo* si)
01590 {
01591         int l = prob->l;
01592         double C = param->C;
01593         double *alpha2 = new double[2*l];
01594         double *linear_term = new double[2*l];
01595         schar *y = new schar[2*l];
01596         int i;
01597 
01598         double sum = C * param->nu * l / 2;
01599         for(i=0;i<l;i++)
01600         {
01601                 alpha2[i] = alpha2[i+l] = min(sum,C);
01602                 sum -= alpha2[i];
01603 
01604                 linear_term[i] = - prob->y[i];
01605                 y[i] = 1;
01606 
01607                 linear_term[i+l] = prob->y[i];
01608                 y[i+l] = -1;
01609         }
01610 
01611         Solver_NU s;
01612         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01613                 alpha2, C, C, param->eps, si, param->shrinking);
01614 
01615         info("epsilon = %f\n",-si->r);
01616 
01617         for(i=0;i<l;i++)
01618                 alpha[i] = alpha2[i] - alpha2[i+l];
01619 
01620         delete[] alpha2;
01621         delete[] linear_term;
01622         delete[] y;
01623 }
01624 
01625 //
01626 // decision_function
01627 //
01628 struct decision_function
01629 {
01630         double *alpha;
01631         double rho;     
01632 };
01633 
01634 static decision_function svm_train_one(
01635         const svm_problem *prob, const svm_parameter *param,
01636         double Cp, double Cn)
01637 {
01638         double *alpha = Malloc(double,prob->l);
01639         Solver::SolutionInfo si;
01640         switch(param->svm_type)
01641         {
01642                 case C_SVC:
01643                         solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01644                         break;
01645                 case NU_SVC:
01646                         solve_nu_svc(prob,param,alpha,&si);
01647                         break;
01648                 case ONE_CLASS:
01649                         solve_one_class(prob,param,alpha,&si);
01650                         break;
01651                 case EPSILON_SVR:
01652                         solve_epsilon_svr(prob,param,alpha,&si);
01653                         break;
01654                 case NU_SVR:
01655                         solve_nu_svr(prob,param,alpha,&si);
01656                         break;
01657         }
01658 
01659         info("obj = %f, rho = %f\n",si.obj,si.rho);
01660 
01661         // output SVs
01662 
01663         int nSV = 0;
01664         int nBSV = 0;
01665         for(int i=0;i<prob->l;i++)
01666         {
01667                 if(fabs(alpha[i]) > 0)
01668                 {
01669                         ++nSV;
01670                         if(prob->y[i] > 0)
01671                         {
01672                                 if(fabs(alpha[i]) >= si.upper_bound_p)
01673                                         ++nBSV;
01674                         }
01675                         else
01676                         {
01677                                 if(fabs(alpha[i]) >= si.upper_bound_n)
01678                                         ++nBSV;
01679                         }
01680                 }
01681         }
01682 
01683         info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01684 
01685         decision_function f;
01686         f.alpha = alpha;
01687         f.rho = si.rho;
01688         return f;
01689 }
01690 
01691 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
01692 static void sigmoid_train(
01693         int l, const double *dec_values, const double *labels, 
01694         double& A, double& B)
01695 {
01696         double prior1=0, prior0 = 0;
01697         int i;
01698 
01699         for (i=0;i<l;i++)
01700                 if (labels[i] > 0) prior1+=1;
01701                 else prior0+=1;
01702         
01703         int max_iter=100;       // Maximal number of iterations
01704         double min_step=1e-10;  // Minimal step taken in line search
01705         double sigma=1e-12;     // For numerically strict PD of Hessian
01706         double eps=1e-5;
01707         double hiTarget=(prior1+1.0)/(prior1+2.0);
01708         double loTarget=1/(prior0+2.0);
01709         double *t=Malloc(double,l);
01710         double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01711         double newA,newB,newf,d1,d2;
01712         int iter; 
01713         
01714         // Initial Point and Initial Fun Value
01715         A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01716         double fval = 0.0;
01717 
01718         for (i=0;i<l;i++)
01719         {
01720                 if (labels[i]>0) t[i]=hiTarget;
01721                 else t[i]=loTarget;
01722                 fApB = dec_values[i]*A+B;
01723                 if (fApB>=0)
01724                         fval += t[i]*fApB + log(1+exp(-fApB));
01725                 else
01726                         fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01727         }
01728         for (iter=0;iter<max_iter;iter++)
01729         {
01730                 // Update Gradient and Hessian (use H' = H + sigma I)
01731                 h11=sigma; // numerically ensures strict PD
01732                 h22=sigma;
01733                 h21=0.0;g1=0.0;g2=0.0;
01734                 for (i=0;i<l;i++)
01735                 {
01736                         fApB = dec_values[i]*A+B;
01737                         if (fApB >= 0)
01738                         {
01739                                 p=exp(-fApB)/(1.0+exp(-fApB));
01740                                 q=1.0/(1.0+exp(-fApB));
01741                         }
01742                         else
01743                         {
01744                                 p=1.0/(1.0+exp(fApB));
01745                                 q=exp(fApB)/(1.0+exp(fApB));
01746                         }
01747                         d2=p*q;
01748                         h11+=dec_values[i]*dec_values[i]*d2;
01749                         h22+=d2;
01750                         h21+=dec_values[i]*d2;
01751                         d1=t[i]-p;
01752                         g1+=dec_values[i]*d1;
01753                         g2+=d1;
01754                 }
01755 
01756                 // Stopping Criteria
01757                 if (fabs(g1)<eps && fabs(g2)<eps)
01758                         break;
01759 
01760                 // Finding Newton direction: -inv(H') * g
01761                 det=h11*h22-h21*h21;
01762                 dA=-(h22*g1 - h21 * g2) / det;
01763                 dB=-(-h21*g1+ h11 * g2) / det;
01764                 gd=g1*dA+g2*dB;
01765 
01766 
01767                 stepsize = 1;           // Line Search
01768                 while (stepsize >= min_step)
01769                 {
01770                         newA = A + stepsize * dA;
01771                         newB = B + stepsize * dB;
01772 
01773                         // New function value
01774                         newf = 0.0;
01775                         for (i=0;i<l;i++)
01776                         {
01777                                 fApB = dec_values[i]*newA+newB;
01778                                 if (fApB >= 0)
01779                                         newf += t[i]*fApB + log(1+exp(-fApB));
01780                                 else
01781                                         newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01782                         }
01783                         // Check sufficient decrease
01784                         if (newf<fval+0.0001*stepsize*gd)
01785                         {
01786                                 A=newA;B=newB;fval=newf;
01787                                 break;
01788                         }
01789                         else
01790                                 stepsize = stepsize / 2.0;
01791                 }
01792 
01793                 if (stepsize < min_step)
01794                 {
01795                         info("Line search fails in two-class probability estimates\n");
01796                         break;
01797                 }
01798         }
01799 
01800         if (iter>=max_iter)
01801                 info("Reaching maximal iterations in two-class probability estimates\n");
01802         free(t);
01803 }
01804 
01805 static double sigmoid_predict(double decision_value, double A, double B)
01806 {
01807         double fApB = decision_value*A+B;
01808         if (fApB >= 0)
01809                 return exp(-fApB)/(1.0+exp(-fApB));
01810         else
01811                 return 1.0/(1+exp(fApB)) ;
01812 }
01813 
01814 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
01815 static void multiclass_probability(int k, double **r, double *p)
01816 {
01817         int t,j;
01818         int iter = 0, max_iter=max(100,k);
01819         double **Q=Malloc(double *,k);
01820         double *Qp=Malloc(double,k);
01821         double pQp, eps=0.005/k;
01822         
01823         for (t=0;t<k;t++)
01824         {
01825                 p[t]=1.0/k;  // Valid if k = 1
01826                 Q[t]=Malloc(double,k);
01827                 Q[t][t]=0;
01828                 for (j=0;j<t;j++)
01829                 {
01830                         Q[t][t]+=r[j][t]*r[j][t];
01831                         Q[t][j]=Q[j][t];
01832                 }
01833                 for (j=t+1;j<k;j++)
01834                 {
01835                         Q[t][t]+=r[j][t]*r[j][t];
01836                         Q[t][j]=-r[j][t]*r[t][j];
01837                 }
01838         }
01839         for (iter=0;iter<max_iter;iter++)
01840         {
01841                 // stopping condition, recalculate QP,pQP for numerical accuracy
01842                 pQp=0;
01843                 for (t=0;t<k;t++)
01844                 {
01845                         Qp[t]=0;
01846                         for (j=0;j<k;j++)
01847                                 Qp[t]+=Q[t][j]*p[j];
01848                         pQp+=p[t]*Qp[t];
01849                 }
01850                 double max_error=0;
01851                 for (t=0;t<k;t++)
01852                 {
01853                         double error=fabs(Qp[t]-pQp);
01854                         if (error>max_error)
01855                                 max_error=error;
01856                 }
01857                 if (max_error<eps) break;
01858                 
01859                 for (t=0;t<k;t++)
01860                 {
01861                         double diff=(-Qp[t]+pQp)/Q[t][t];
01862                         p[t]+=diff;
01863                         pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01864                         for (j=0;j<k;j++)
01865                         {
01866                                 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01867                                 p[j]/=(1+diff);
01868                         }
01869                 }
01870         }
01871         if (iter>=max_iter)
01872                 info("Exceeds max_iter in multiclass_prob\n");
01873         for(t=0;t<k;t++) free(Q[t]);
01874         free(Q);
01875         free(Qp);
01876 }
01877 
01878 // Cross-validation decision values for probability estimates
01879 static void svm_binary_svc_probability(
01880         const svm_problem *prob, const svm_parameter *param,
01881         double Cp, double Cn, double& probA, double& probB)
01882 {
01883         int i;
01884         int nr_fold = 5;
01885         int *perm = Malloc(int,prob->l);
01886         double *dec_values = Malloc(double,prob->l);
01887 
01888         // random shuffle
01889         for(i=0;i<prob->l;i++) perm[i]=i;
01890         for(i=0;i<prob->l;i++)
01891         {
01892                 int j = i+rand()%(prob->l-i);
01893                 swap(perm[i],perm[j]);
01894         }
01895         for(i=0;i<nr_fold;i++)
01896         {
01897                 int begin = i*prob->l/nr_fold;
01898                 int end = (i+1)*prob->l/nr_fold;
01899                 int j,k;
01900                 struct svm_problem subprob;
01901 
01902                 subprob.l = prob->l-(end-begin);
01903                 subprob.x = Malloc(struct svm_node*,subprob.l);
01904                 subprob.y = Malloc(double,subprob.l);
01905                         
01906                 k=0;
01907                 for(j=0;j<begin;j++)
01908                 {
01909                         subprob.x[k] = prob->x[perm[j]];
01910                         subprob.y[k] = prob->y[perm[j]];
01911                         ++k;
01912                 }
01913                 for(j=end;j<prob->l;j++)
01914                 {
01915                         subprob.x[k] = prob->x[perm[j]];
01916                         subprob.y[k] = prob->y[perm[j]];
01917                         ++k;
01918                 }
01919                 int p_count=0,n_count=0;
01920                 for(j=0;j<k;j++)
01921                         if(subprob.y[j]>0)
01922                                 p_count++;
01923                         else
01924                                 n_count++;
01925 
01926                 if(p_count==0 && n_count==0)
01927                         for(j=begin;j<end;j++)
01928                                 dec_values[perm[j]] = 0;
01929                 else if(p_count > 0 && n_count == 0)
01930                         for(j=begin;j<end;j++)
01931                                 dec_values[perm[j]] = 1;
01932                 else if(p_count == 0 && n_count > 0)
01933                         for(j=begin;j<end;j++)
01934                                 dec_values[perm[j]] = -1;
01935                 else
01936                 {
01937                         svm_parameter subparam = *param;
01938                         subparam.probability=0;
01939                         subparam.C=1.0;
01940                         subparam.nr_weight=2;
01941                         subparam.weight_label = Malloc(int,2);
01942                         subparam.weight = Malloc(double,2);
01943                         subparam.weight_label[0]=+1;
01944                         subparam.weight_label[1]=-1;
01945                         subparam.weight[0]=Cp;
01946                         subparam.weight[1]=Cn;
01947                         struct svm_model *submodel = svm_train(&subprob,&subparam);
01948                         for(j=begin;j<end;j++)
01949                         {
01950                                 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 
01951                                 // ensure +1 -1 order; reason not using CV subroutine
01952                                 dec_values[perm[j]] *= submodel->label[0];
01953                         }               
01954                         svm_free_and_destroy_model(&submodel);
01955                         svm_destroy_param(&subparam);
01956                 }
01957                 free(subprob.x);
01958                 free(subprob.y);
01959         }               
01960         sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01961         free(dec_values);
01962         free(perm);
01963 }
01964 
01965 // Return parameter of a Laplace distribution 
01966 static double svm_svr_probability(
01967         const svm_problem *prob, const svm_parameter *param)
01968 {
01969         int i;
01970         int nr_fold = 5;
01971         double *ymv = Malloc(double,prob->l);
01972         double mae = 0;
01973 
01974         svm_parameter newparam = *param;
01975         newparam.probability = 0;
01976         svm_cross_validation(prob,&newparam,nr_fold,ymv);
01977         for(i=0;i<prob->l;i++)
01978         {
01979                 ymv[i]=prob->y[i]-ymv[i];
01980                 mae += fabs(ymv[i]);
01981         }               
01982         mae /= prob->l;
01983         double std=sqrt(2*mae*mae);
01984         int count=0;
01985         mae=0;
01986         for(i=0;i<prob->l;i++)
01987                 if (fabs(ymv[i]) > 5*std) 
01988                         count=count+1;
01989                 else 
01990                         mae+=fabs(ymv[i]);
01991         mae /= (prob->l-count);
01992         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
01993         free(ymv);
01994         return mae;
01995 }
01996 
01997 
01998 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
01999 // perm, length l, must be allocated before calling this subroutine
02000 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02001 {
02002         int l = prob->l;
02003         int max_nr_class = 16;
02004         int nr_class = 0;
02005         int *label = Malloc(int,max_nr_class);
02006         int *count = Malloc(int,max_nr_class);
02007         int *data_label = Malloc(int,l);        
02008         int i;
02009 
02010         for(i=0;i<l;i++)
02011         {
02012                 int this_label = (int)prob->y[i];
02013                 int j;
02014                 for(j=0;j<nr_class;j++)
02015                 {
02016                         if(this_label == label[j])
02017                         {
02018                                 ++count[j];
02019                                 break;
02020                         }
02021                 }
02022                 data_label[i] = j;
02023                 if(j == nr_class)
02024                 {
02025                         if(nr_class == max_nr_class)
02026                         {
02027                                 max_nr_class *= 2;
02028                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
02029                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
02030                         }
02031                         label[nr_class] = this_label;
02032                         count[nr_class] = 1;
02033                         ++nr_class;
02034                 }
02035         }
02036 
02037         int *start = Malloc(int,nr_class);
02038         start[0] = 0;
02039         for(i=1;i<nr_class;i++)
02040                 start[i] = start[i-1]+count[i-1];
02041         for(i=0;i<l;i++)
02042         {
02043                 perm[start[data_label[i]]] = i;
02044                 ++start[data_label[i]];
02045         }
02046         start[0] = 0;
02047         for(i=1;i<nr_class;i++)
02048                 start[i] = start[i-1]+count[i-1];
02049 
02050         *nr_class_ret = nr_class;
02051         *label_ret = label;
02052         *start_ret = start;
02053         *count_ret = count;
02054         free(data_label);
02055 }
02056 
02057 //
02058 // Interface functions
02059 //
02060 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02061 {
02062         svm_model *model = Malloc(svm_model,1);
02063         model->param = *param;
02064         model->free_sv = 0;     // XXX
02065 
02066         if(param->svm_type == ONE_CLASS ||
02067            param->svm_type == EPSILON_SVR ||
02068            param->svm_type == NU_SVR)
02069         {
02070                 // regression or one-class-svm
02071                 model->nr_class = 2;
02072                 model->label = NULL;
02073                 model->nSV = NULL;
02074                 model->probA = NULL; model->probB = NULL;
02075                 model->sv_coef = Malloc(double *,1);
02076 
02077                 if(param->probability && 
02078                    (param->svm_type == EPSILON_SVR ||
02079                     param->svm_type == NU_SVR))
02080                 {
02081                         model->probA = Malloc(double,1);
02082                         model->probA[0] = svm_svr_probability(prob,param);
02083                 }
02084 
02085                 decision_function f = svm_train_one(prob,param,0,0);
02086                 model->rho = Malloc(double,1);
02087                 model->rho[0] = f.rho;
02088 
02089                 int nSV = 0;
02090                 int i;
02091                 for(i=0;i<prob->l;i++)
02092                         if(fabs(f.alpha[i]) > 0) ++nSV;
02093                 model->l = nSV;
02094                 model->SV = Malloc(svm_node *,nSV);
02095                 model->sv_coef[0] = Malloc(double,nSV);
02096                 int j = 0;
02097                 for(i=0;i<prob->l;i++)
02098                         if(fabs(f.alpha[i]) > 0)
02099                         {
02100                                 model->SV[j] = prob->x[i];
02101                                 model->sv_coef[0][j] = f.alpha[i];
02102                                 ++j;
02103                         }               
02104 
02105                 free(f.alpha);
02106         }
02107         else
02108         {
02109                 // classification
02110                 int l = prob->l;
02111                 int nr_class;
02112                 int *label = NULL;
02113                 int *start = NULL;
02114                 int *count = NULL;
02115                 int *perm = Malloc(int,l);
02116 
02117                 // group training data of the same class
02118                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);            
02119                 svm_node **x = Malloc(svm_node *,l);
02120                 int i;
02121                 for(i=0;i<l;i++)
02122                         x[i] = prob->x[perm[i]];
02123 
02124                 // calculate weighted C
02125 
02126                 double *weighted_C = Malloc(double, nr_class);
02127                 for(i=0;i<nr_class;i++)
02128                         weighted_C[i] = param->C;
02129                 for(i=0;i<param->nr_weight;i++)
02130                 {       
02131                         int j;
02132                         for(j=0;j<nr_class;j++)
02133                                 if(param->weight_label[i] == label[j])
02134                                         break;
02135                         if(j == nr_class)
02136                                 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02137                         else
02138                                 weighted_C[j] *= param->weight[i];
02139                 }
02140 
02141                 // train k*(k-1)/2 models
02142                 
02143                 bool *nonzero = Malloc(bool,l);
02144                 for(i=0;i<l;i++)
02145                         nonzero[i] = false;
02146                 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02147 
02148                 double *probA=NULL,*probB=NULL;
02149                 if (param->probability)
02150                 {
02151                         probA=Malloc(double,nr_class*(nr_class-1)/2);
02152                         probB=Malloc(double,nr_class*(nr_class-1)/2);
02153                 }
02154 
02155                 int p = 0;
02156                 for(i=0;i<nr_class;i++)
02157                         for(int j=i+1;j<nr_class;j++)
02158                         {
02159                                 svm_problem sub_prob;
02160                                 int si = start[i], sj = start[j];
02161                                 int ci = count[i], cj = count[j];
02162                                 sub_prob.l = ci+cj;
02163                                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02164                                 sub_prob.y = Malloc(double,sub_prob.l);
02165                                 int k;
02166                                 for(k=0;k<ci;k++)
02167                                 {
02168                                         sub_prob.x[k] = x[si+k];
02169                                         sub_prob.y[k] = +1;
02170                                 }
02171                                 for(k=0;k<cj;k++)
02172                                 {
02173                                         sub_prob.x[ci+k] = x[sj+k];
02174                                         sub_prob.y[ci+k] = -1;
02175                                 }
02176 
02177                                 if(param->probability)
02178                                         svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02179 
02180                                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02181                                 for(k=0;k<ci;k++)
02182                                         if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02183                                                 nonzero[si+k] = true;
02184                                 for(k=0;k<cj;k++)
02185                                         if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02186                                                 nonzero[sj+k] = true;
02187                                 free(sub_prob.x);
02188                                 free(sub_prob.y);
02189                                 ++p;
02190                         }
02191 
02192                 // build output
02193 
02194                 model->nr_class = nr_class;
02195                 
02196                 model->label = Malloc(int,nr_class);
02197                 for(i=0;i<nr_class;i++)
02198                         model->label[i] = label[i];
02199                 
02200                 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02201                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02202                         model->rho[i] = f[i].rho;
02203 
02204                 if(param->probability)
02205                 {
02206                         model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02207                         model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02208                         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02209                         {
02210                                 model->probA[i] = probA[i];
02211                                 model->probB[i] = probB[i];
02212                         }
02213                 }
02214                 else
02215                 {
02216                         model->probA=NULL;
02217                         model->probB=NULL;
02218                 }
02219 
02220                 int total_sv = 0;
02221                 int *nz_count = Malloc(int,nr_class);
02222                 model->nSV = Malloc(int,nr_class);
02223                 for(i=0;i<nr_class;i++)
02224                 {
02225                         int nSV = 0;
02226                         for(int j=0;j<count[i];j++)
02227                                 if(nonzero[start[i]+j])
02228                                 {       
02229                                         ++nSV;
02230                                         ++total_sv;
02231                                 }
02232                         model->nSV[i] = nSV;
02233                         nz_count[i] = nSV;
02234                 }
02235                 
02236                 info("Total nSV = %d\n",total_sv);
02237 
02238                 model->l = total_sv;
02239                 model->SV = Malloc(svm_node *,total_sv);
02240                 p = 0;
02241                 for(i=0;i<l;i++)
02242                         if(nonzero[i]) model->SV[p++] = x[i];
02243 
02244                 int *nz_start = Malloc(int,nr_class);
02245                 nz_start[0] = 0;
02246                 for(i=1;i<nr_class;i++)
02247                         nz_start[i] = nz_start[i-1]+nz_count[i-1];
02248 
02249                 model->sv_coef = Malloc(double *,nr_class-1);
02250                 for(i=0;i<nr_class-1;i++)
02251                         model->sv_coef[i] = Malloc(double,total_sv);
02252 
02253                 p = 0;
02254                 for(i=0;i<nr_class;i++)
02255                         for(int j=i+1;j<nr_class;j++)
02256                         {
02257                                 // classifier (i,j): coefficients with
02258                                 // i are in sv_coef[j-1][nz_start[i]...],
02259                                 // j are in sv_coef[i][nz_start[j]...]
02260 
02261                                 int si = start[i];
02262                                 int sj = start[j];
02263                                 int ci = count[i];
02264                                 int cj = count[j];
02265                                 
02266                                 int q = nz_start[i];
02267                                 int k;
02268                                 for(k=0;k<ci;k++)
02269                                         if(nonzero[si+k])
02270                                                 model->sv_coef[j-1][q++] = f[p].alpha[k];
02271                                 q = nz_start[j];
02272                                 for(k=0;k<cj;k++)
02273                                         if(nonzero[sj+k])
02274                                                 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02275                                 ++p;
02276                         }
02277                 
02278                 free(label);
02279                 free(probA);
02280                 free(probB);
02281                 free(count);
02282                 free(perm);
02283                 free(start);
02284                 free(x);
02285                 free(weighted_C);
02286                 free(nonzero);
02287                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02288                         free(f[i].alpha);
02289                 free(f);
02290                 free(nz_count);
02291                 free(nz_start);
02292         }
02293         return model;
02294 }
02295 
02296 // Stratified cross validation
02297 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02298 {
02299         int i;
02300         int *fold_start = Malloc(int,nr_fold+1);
02301         int l = prob->l;
02302         int *perm = Malloc(int,l);
02303         int nr_class;
02304 
02305         // stratified cv may not give leave-one-out rate
02306         // Each class to l folds -> some folds may have zero elements
02307         if((param->svm_type == C_SVC ||
02308             param->svm_type == NU_SVC) && nr_fold < l)
02309         {
02310                 int *start = NULL;
02311                 int *label = NULL;
02312                 int *count = NULL;
02313                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02314 
02315                 // random shuffle and then data grouped by fold using the array perm
02316                 int *fold_count = Malloc(int,nr_fold);
02317                 int c;
02318                 int *index = Malloc(int,l);
02319                 for(i=0;i<l;i++)
02320                         index[i]=perm[i];
02321                 for (c=0; c<nr_class; c++) 
02322                         for(i=0;i<count[c];i++)
02323                         {
02324                                 int j = i+rand()%(count[c]-i);
02325                                 swap(index[start[c]+j],index[start[c]+i]);
02326                         }
02327                 for(i=0;i<nr_fold;i++)
02328                 {
02329                         fold_count[i] = 0;
02330                         for (c=0; c<nr_class;c++)
02331                                 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02332                 }
02333                 fold_start[0]=0;
02334                 for (i=1;i<=nr_fold;i++)
02335                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02336                 for (c=0; c<nr_class;c++)
02337                         for(i=0;i<nr_fold;i++)
02338                         {
02339                                 int begin = start[c]+i*count[c]/nr_fold;
02340                                 int end = start[c]+(i+1)*count[c]/nr_fold;
02341                                 for(int j=begin;j<end;j++)
02342                                 {
02343                                         perm[fold_start[i]] = index[j];
02344                                         fold_start[i]++;
02345                                 }
02346                         }
02347                 fold_start[0]=0;
02348                 for (i=1;i<=nr_fold;i++)
02349                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02350                 free(start);    
02351                 free(label);
02352                 free(count);    
02353                 free(index);
02354                 free(fold_count);
02355         }
02356         else
02357         {
02358                 for(i=0;i<l;i++) perm[i]=i;
02359                 for(i=0;i<l;i++)
02360                 {
02361                         int j = i+rand()%(l-i);
02362                         swap(perm[i],perm[j]);
02363                 }
02364                 for(i=0;i<=nr_fold;i++)
02365                         fold_start[i]=i*l/nr_fold;
02366         }
02367 
02368         for(i=0;i<nr_fold;i++)
02369         {
02370                 int begin = fold_start[i];
02371                 int end = fold_start[i+1];
02372                 int j,k;
02373                 struct svm_problem subprob;
02374 
02375                 subprob.l = l-(end-begin);
02376                 subprob.x = Malloc(struct svm_node*,subprob.l);
02377                 subprob.y = Malloc(double,subprob.l);
02378                         
02379                 k=0;
02380                 for(j=0;j<begin;j++)
02381                 {
02382                         subprob.x[k] = prob->x[perm[j]];
02383                         subprob.y[k] = prob->y[perm[j]];
02384                         ++k;
02385                 }
02386                 for(j=end;j<l;j++)
02387                 {
02388                         subprob.x[k] = prob->x[perm[j]];
02389                         subprob.y[k] = prob->y[perm[j]];
02390                         ++k;
02391                 }
02392                 struct svm_model *submodel = svm_train(&subprob,param);
02393                 if(param->probability && 
02394                    (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02395                 {
02396                         double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02397                         for(j=begin;j<end;j++)
02398                                 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02399                         free(prob_estimates);                   
02400                 }
02401                 else
02402                         for(j=begin;j<end;j++)
02403                                 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02404                 svm_free_and_destroy_model(&submodel);
02405                 free(subprob.x);
02406                 free(subprob.y);
02407         }               
02408         free(fold_start);
02409         free(perm);     
02410 }
02411 
02412 
02413 int svm_get_svm_type(const svm_model *model)
02414 {
02415         return model->param.svm_type;
02416 }
02417 
02418 int svm_get_nr_class(const svm_model *model)
02419 {
02420         return model->nr_class;
02421 }
02422 
02423 void svm_get_labels(const svm_model *model, int* label)
02424 {
02425         if (model->label != NULL)
02426                 for(int i=0;i<model->nr_class;i++)
02427                         label[i] = model->label[i];
02428 }
02429 
02430 double svm_get_svr_probability(const svm_model *model)
02431 {
02432         if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02433             model->probA!=NULL)
02434                 return model->probA[0];
02435         else
02436         {
02437                 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
02438                 return 0;
02439         }
02440 }
02441 
02442 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02443 {
02444         if(model->param.svm_type == ONE_CLASS ||
02445            model->param.svm_type == EPSILON_SVR ||
02446            model->param.svm_type == NU_SVR)
02447         {
02448                 double *sv_coef = model->sv_coef[0];
02449                 double sum = 0;
02450                 for(int i=0;i<model->l;i++)
02451                         sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02452                 sum -= model->rho[0];
02453                 *dec_values = sum;
02454 
02455                 if(model->param.svm_type == ONE_CLASS)
02456                         return (sum>0)?1:-1;
02457                 else
02458                         return sum;
02459         }
02460         else
02461         {
02462                 int i;
02463                 int nr_class = model->nr_class;
02464                 int l = model->l;
02465                 
02466                 double *kvalue = Malloc(double,l);
02467                 for(i=0;i<l;i++)
02468                         kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02469 
02470                 int *start = Malloc(int,nr_class);
02471                 start[0] = 0;
02472                 for(i=1;i<nr_class;i++)
02473                         start[i] = start[i-1]+model->nSV[i-1];
02474 
02475                 int *vote = Malloc(int,nr_class);
02476                 for(i=0;i<nr_class;i++)
02477                         vote[i] = 0;
02478 
02479                 int p=0;
02480                 for(i=0;i<nr_class;i++)
02481                         for(int j=i+1;j<nr_class;j++)
02482                         {
02483                                 double sum = 0;
02484                                 int si = start[i];
02485                                 int sj = start[j];
02486                                 int ci = model->nSV[i];
02487                                 int cj = model->nSV[j];
02488                                 
02489                                 int k;
02490                                 double *coef1 = model->sv_coef[j-1];
02491                                 double *coef2 = model->sv_coef[i];
02492                                 for(k=0;k<ci;k++)
02493                                         sum += coef1[si+k] * kvalue[si+k];
02494                                 for(k=0;k<cj;k++)
02495                                         sum += coef2[sj+k] * kvalue[sj+k];
02496                                 sum -= model->rho[p];
02497                                 dec_values[p] = sum;
02498 
02499                                 if(dec_values[p] > 0)
02500                                         ++vote[i];
02501                                 else
02502                                         ++vote[j];
02503                                 p++;
02504                         }
02505 
02506                 int vote_max_idx = 0;
02507                 for(i=1;i<nr_class;i++)
02508                         if(vote[i] > vote[vote_max_idx])
02509                                 vote_max_idx = i;
02510 
02511                 free(kvalue);
02512                 free(start);
02513                 free(vote);
02514                 return model->label[vote_max_idx];
02515         }
02516 }
02517 
02518 double svm_predict(const svm_model *model, const svm_node *x)
02519 {
02520         int nr_class = model->nr_class;
02521         double *dec_values;
02522         if(model->param.svm_type == ONE_CLASS ||
02523            model->param.svm_type == EPSILON_SVR ||
02524            model->param.svm_type == NU_SVR)
02525                 dec_values = Malloc(double, 1);
02526         else 
02527                 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02528         double pred_result = svm_predict_values(model, x, dec_values);
02529         free(dec_values);
02530         return pred_result;
02531 }
02532 
02533 double svm_predict_probability(
02534         const svm_model *model, const svm_node *x, double *prob_estimates)
02535 {
02536         if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02537             model->probA!=NULL && model->probB!=NULL)
02538         {
02539                 int i;
02540                 int nr_class = model->nr_class;
02541                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02542                 svm_predict_values(model, x, dec_values);
02543 
02544                 double min_prob=1e-7;
02545                 double **pairwise_prob=Malloc(double *,nr_class);
02546                 for(i=0;i<nr_class;i++)
02547                         pairwise_prob[i]=Malloc(double,nr_class);
02548                 int k=0;
02549                 for(i=0;i<nr_class;i++)
02550                         for(int j=i+1;j<nr_class;j++)
02551                         {
02552                                 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02553                                 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02554                                 k++;
02555                         }
02556                 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02557 
02558                 int prob_max_idx = 0;
02559                 for(i=1;i<nr_class;i++)
02560                         if(prob_estimates[i] > prob_estimates[prob_max_idx])
02561                                 prob_max_idx = i;
02562                 for(i=0;i<nr_class;i++)
02563                         free(pairwise_prob[i]);
02564                 free(dec_values);
02565                 free(pairwise_prob);         
02566                 return model->label[prob_max_idx];
02567         }
02568         else 
02569                 return svm_predict(model, x);
02570 }
02571 
02572 static const char *svm_type_table[] =
02573 {
02574         "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02575 };
02576 
02577 static const char *kernel_type_table[]=
02578 {
02579         "linear","polynomial","rbf","sigmoid","precomputed",NULL
02580 };
02581 
02582 int svm_save_model(const char *model_file_name, const svm_model *model)
02583 {
02584         FILE *fp = fopen(model_file_name,"w");
02585         if(fp==NULL) return -1;
02586 
02587         const svm_parameter& param = model->param;
02588 
02589         fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02590         fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02591 
02592         if(param.kernel_type == POLY)
02593                 fprintf(fp,"degree %d\n", param.degree);
02594 
02595         if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02596                 fprintf(fp,"gamma %g\n", param.gamma);
02597 
02598         if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02599                 fprintf(fp,"coef0 %g\n", param.coef0);
02600 
02601         int nr_class = model->nr_class;
02602         int l = model->l;
02603         fprintf(fp, "nr_class %d\n", nr_class);
02604         fprintf(fp, "total_sv %d\n",l);
02605         
02606         {
02607                 fprintf(fp, "rho");
02608                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02609                         fprintf(fp," %g",model->rho[i]);
02610                 fprintf(fp, "\n");
02611         }
02612         
02613         if(model->label)
02614         {
02615                 fprintf(fp, "label");
02616                 for(int i=0;i<nr_class;i++)
02617                         fprintf(fp," %d",model->label[i]);
02618                 fprintf(fp, "\n");
02619         }
02620 
02621         if(model->probA) // regression has probA only
02622         {
02623                 fprintf(fp, "probA");
02624                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02625                         fprintf(fp," %g",model->probA[i]);
02626                 fprintf(fp, "\n");
02627         }
02628         if(model->probB)
02629         {
02630                 fprintf(fp, "probB");
02631                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02632                         fprintf(fp," %g",model->probB[i]);
02633                 fprintf(fp, "\n");
02634         }
02635 
02636         if(model->nSV)
02637         {
02638                 fprintf(fp, "nr_sv");
02639                 for(int i=0;i<nr_class;i++)
02640                         fprintf(fp," %d",model->nSV[i]);
02641                 fprintf(fp, "\n");
02642         }
02643 
02644         fprintf(fp, "SV\n");
02645         const double * const *sv_coef = model->sv_coef;
02646         const svm_node * const *SV = model->SV;
02647 
02648         for(int i=0;i<l;i++)
02649         {
02650                 for(int j=0;j<nr_class-1;j++)
02651                         fprintf(fp, "%.16g ",sv_coef[j][i]);
02652 
02653                 const svm_node *p = SV[i];
02654 
02655                 if(param.kernel_type == PRECOMPUTED)
02656                         fprintf(fp,"0:%d ",(int)(p->value));
02657                 else
02658                         while(p->index != -1)
02659                         {
02660                                 fprintf(fp,"%d:%.8g ",p->index,p->value);
02661                                 p++;
02662                         }
02663                 fprintf(fp, "\n");
02664         }
02665         if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02666         else return 0;
02667 }
02668 
02669 static char *line = NULL;
02670 static int max_line_len;
02671 
02672 static char* readline(FILE *input)
02673 {
02674         int len;
02675 
02676         if(fgets(line,max_line_len,input) == NULL)
02677                 return NULL;
02678 
02679         while(strrchr(line,'\n') == NULL)
02680         {
02681                 max_line_len *= 2;
02682                 line = (char *) realloc(line,max_line_len);
02683                 len = (int) strlen(line);
02684                 if(fgets(line+len,max_line_len-len,input) == NULL)
02685                         break;
02686         }
02687         return line;
02688 }
02689 
02690 svm_model *svm_load_model(const char *model_file_name)
02691 {
02692         FILE *fp = fopen(model_file_name,"rb");
02693         if(fp==NULL) return NULL;
02694         
02695         // read parameters
02696 
02697         svm_model *model = Malloc(svm_model,1);
02698         svm_parameter& param = model->param;
02699         model->rho = NULL;
02700         model->probA = NULL;
02701         model->probB = NULL;
02702         model->label = NULL;
02703         model->nSV = NULL;
02704 
02705         char cmd[81];
02706         while(1)
02707         {
02708                 fscanf(fp,"%80s",cmd);
02709 
02710                 if(strcmp(cmd,"svm_type")==0)
02711                 {
02712                         fscanf(fp,"%80s",cmd);
02713                         int i;
02714                         for(i=0;svm_type_table[i];i++)
02715                         {
02716                                 if(strcmp(svm_type_table[i],cmd)==0)
02717                                 {
02718                                         param.svm_type=i;
02719                                         break;
02720                                 }
02721                         }
02722                         if(svm_type_table[i] == NULL)
02723                         {
02724                                 fprintf(stderr,"unknown svm type.\n");
02725                                 free(model->rho);
02726                                 free(model->label);
02727                                 free(model->nSV);
02728                                 free(model);
02729                                 return NULL;
02730                         }
02731                 }
02732                 else if(strcmp(cmd,"kernel_type")==0)
02733                 {               
02734                         fscanf(fp,"%80s",cmd);
02735                         int i;
02736                         for(i=0;kernel_type_table[i];i++)
02737                         {
02738                                 if(strcmp(kernel_type_table[i],cmd)==0)
02739                                 {
02740                                         param.kernel_type=i;
02741                                         break;
02742                                 }
02743                         }
02744                         if(kernel_type_table[i] == NULL)
02745                         {
02746                                 fprintf(stderr,"unknown kernel function.\n");
02747                                 free(model->rho);
02748                                 free(model->label);
02749                                 free(model->nSV);
02750                                 free(model);
02751                                 return NULL;
02752                         }
02753                 }
02754                 else if(strcmp(cmd,"degree")==0)
02755                         fscanf(fp,"%d",&param.degree);
02756                 else if(strcmp(cmd,"gamma")==0)
02757                         fscanf(fp,"%lf",&param.gamma);
02758                 else if(strcmp(cmd,"coef0")==0)
02759                         fscanf(fp,"%lf",&param.coef0);
02760                 else if(strcmp(cmd,"nr_class")==0)
02761                         fscanf(fp,"%d",&model->nr_class);
02762                 else if(strcmp(cmd,"total_sv")==0)
02763                         fscanf(fp,"%d",&model->l);
02764                 else if(strcmp(cmd,"rho")==0)
02765                 {
02766                         int n = model->nr_class * (model->nr_class-1)/2;
02767                         model->rho = Malloc(double,n);
02768                         for(int i=0;i<n;i++)
02769                                 fscanf(fp,"%lf",&model->rho[i]);
02770                 }
02771                 else if(strcmp(cmd,"label")==0)
02772                 {
02773                         int n = model->nr_class;
02774                         model->label = Malloc(int,n);
02775                         for(int i=0;i<n;i++)
02776                                 fscanf(fp,"%d",&model->label[i]);
02777                 }
02778                 else if(strcmp(cmd,"probA")==0)
02779                 {
02780                         int n = model->nr_class * (model->nr_class-1)/2;
02781                         model->probA = Malloc(double,n);
02782                         for(int i=0;i<n;i++)
02783                                 fscanf(fp,"%lf",&model->probA[i]);
02784                 }
02785                 else if(strcmp(cmd,"probB")==0)
02786                 {
02787                         int n = model->nr_class * (model->nr_class-1)/2;
02788                         model->probB = Malloc(double,n);
02789                         for(int i=0;i<n;i++)
02790                                 fscanf(fp,"%lf",&model->probB[i]);
02791                 }
02792                 else if(strcmp(cmd,"nr_sv")==0)
02793                 {
02794                         int n = model->nr_class;
02795                         model->nSV = Malloc(int,n);
02796                         for(int i=0;i<n;i++)
02797                                 fscanf(fp,"%d",&model->nSV[i]);
02798                 }
02799                 else if(strcmp(cmd,"SV")==0)
02800                 {
02801                         while(1)
02802                         {
02803                                 int c = getc(fp);
02804                                 if(c==EOF || c=='\n') break;    
02805                         }
02806                         break;
02807                 }
02808                 else
02809                 {
02810                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02811                         free(model->rho);
02812                         free(model->label);
02813                         free(model->nSV);
02814                         free(model);
02815                         return NULL;
02816                 }
02817         }
02818 
02819         // read sv_coef and SV
02820 
02821         int elements = 0;
02822         long pos = ftell(fp);
02823 
02824         max_line_len = 1024;
02825         line = Malloc(char,max_line_len);
02826         char *p,*endptr,*idx,*val;
02827 
02828         while(readline(fp)!=NULL)
02829         {
02830                 p = strtok(line,":");
02831                 while(1)
02832                 {
02833                         p = strtok(NULL,":");
02834                         if(p == NULL)
02835                                 break;
02836                         ++elements;
02837                 }
02838         }
02839         elements += model->l;
02840 
02841         fseek(fp,pos,SEEK_SET);
02842 
02843         int m = model->nr_class - 1;
02844         int l = model->l;
02845         model->sv_coef = Malloc(double *,m);
02846         int i;
02847         for(i=0;i<m;i++)
02848                 model->sv_coef[i] = Malloc(double,l);
02849         model->SV = Malloc(svm_node*,l);
02850         svm_node *x_space = NULL;
02851         if(l>0) x_space = Malloc(svm_node,elements);
02852 
02853         int j=0;
02854         for(i=0;i<l;i++)
02855         {
02856                 readline(fp);
02857                 model->SV[i] = &x_space[j];
02858 
02859                 p = strtok(line, " \t");
02860                 model->sv_coef[0][i] = strtod(p,&endptr);
02861                 for(int k=1;k<m;k++)
02862                 {
02863                         p = strtok(NULL, " \t");
02864                         model->sv_coef[k][i] = strtod(p,&endptr);
02865                 }
02866 
02867                 while(1)
02868                 {
02869                         idx = strtok(NULL, ":");
02870                         val = strtok(NULL, " \t");
02871 
02872                         if(val == NULL)
02873                                 break;
02874                         x_space[j].index = (int) strtol(idx,&endptr,10);
02875                         x_space[j].value = strtod(val,&endptr);
02876 
02877                         ++j;
02878                 }
02879                 x_space[j++].index = -1;
02880         }
02881         free(line);
02882 
02883         if (ferror(fp) != 0 || fclose(fp) != 0)
02884                 return NULL;
02885 
02886         model->free_sv = 1;     // XXX
02887         return model;
02888 }
02889 
02890 void svm_free_model_content(svm_model* model_ptr)
02891 {
02892         if(model_ptr->free_sv && model_ptr->l > 0)
02893                 free((void *)(model_ptr->SV[0]));
02894         for(int i=0;i<model_ptr->nr_class-1;i++)
02895                 free(model_ptr->sv_coef[i]);
02896         free(model_ptr->SV);
02897         free(model_ptr->sv_coef);
02898         free(model_ptr->rho);
02899         free(model_ptr->label);
02900         free(model_ptr->probA);
02901         free(model_ptr->probB);
02902         free(model_ptr->nSV);
02903 }
02904 
02905 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
02906 {
02907         svm_model* model_ptr = *model_ptr_ptr;
02908         if(model_ptr != NULL)
02909         {
02910                 svm_free_model_content(model_ptr);
02911                 free(model_ptr);
02912         }
02913 }
02914 
02915 void svm_destroy_model(svm_model* model_ptr)
02916 {
02917         fprintf(stderr,"warning: svm_destroy_model is deprecated and should not be used. Please use svm_free_and_destroy_model(svm_model **model_ptr_ptr)\n");
02918         svm_free_and_destroy_model(&model_ptr);
02919 }
02920 
02921 void svm_destroy_param(svm_parameter* param)
02922 {
02923         free(param->weight_label);
02924         free(param->weight);
02925 }
02926 
02927 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02928 {
02929         // svm_type
02930 
02931         int svm_type = param->svm_type;
02932         if(svm_type != C_SVC &&
02933            svm_type != NU_SVC &&
02934            svm_type != ONE_CLASS &&
02935            svm_type != EPSILON_SVR &&
02936            svm_type != NU_SVR)
02937                 return "unknown svm type";
02938         
02939         // kernel_type, degree
02940         
02941         int kernel_type = param->kernel_type;
02942         if(kernel_type != LINEAR &&
02943            kernel_type != POLY &&
02944            kernel_type != RBF &&
02945            kernel_type != SIGMOID &&
02946            kernel_type != PRECOMPUTED)
02947                 return "unknown kernel type";
02948 
02949         if(param->gamma < 0)
02950                 return "gamma < 0";
02951 
02952         if(param->degree < 0)
02953                 return "degree of polynomial kernel < 0";
02954 
02955         // cache_size,eps,C,nu,p,shrinking
02956 
02957         if(param->cache_size <= 0)
02958                 return "cache_size <= 0";
02959 
02960         if(param->eps <= 0)
02961                 return "eps <= 0";
02962 
02963         if(svm_type == C_SVC ||
02964            svm_type == EPSILON_SVR ||
02965            svm_type == NU_SVR)
02966                 if(param->C <= 0)
02967                         return "C <= 0";
02968 
02969         if(svm_type == NU_SVC ||
02970            svm_type == ONE_CLASS ||
02971            svm_type == NU_SVR)
02972                 if(param->nu <= 0 || param->nu > 1)
02973                         return "nu <= 0 or nu > 1";
02974 
02975         if(svm_type == EPSILON_SVR)
02976                 if(param->p < 0)
02977                         return "p < 0";
02978 
02979         if(param->shrinking != 0 &&
02980            param->shrinking != 1)
02981                 return "shrinking != 0 and shrinking != 1";
02982 
02983         if(param->probability != 0 &&
02984            param->probability != 1)
02985                 return "probability != 0 and probability != 1";
02986 
02987         if(param->probability == 1 &&
02988            svm_type == ONE_CLASS)
02989                 return "one-class SVM probability output not supported yet";
02990 
02991 
02992         // check whether nu-svc is feasible
02993         
02994         if(svm_type == NU_SVC)
02995         {
02996                 int l = prob->l;
02997                 int max_nr_class = 16;
02998                 int nr_class = 0;
02999                 int *label = Malloc(int,max_nr_class);
03000                 int *count = Malloc(int,max_nr_class);
03001 
03002                 int i;
03003                 for(i=0;i<l;i++)
03004                 {
03005                         int this_label = (int)prob->y[i];
03006                         int j;
03007                         for(j=0;j<nr_class;j++)
03008                                 if(this_label == label[j])
03009                                 {
03010                                         ++count[j];
03011                                         break;
03012                                 }
03013                         if(j == nr_class)
03014                         {
03015                                 if(nr_class == max_nr_class)
03016                                 {
03017                                         max_nr_class *= 2;
03018                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
03019                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
03020                                 }
03021                                 label[nr_class] = this_label;
03022                                 count[nr_class] = 1;
03023                                 ++nr_class;
03024                         }
03025                 }
03026         
03027                 for(i=0;i<nr_class;i++)
03028                 {
03029                         int n1 = count[i];
03030                         for(int j=i+1;j<nr_class;j++)
03031                         {
03032                                 int n2 = count[j];
03033                                 if(param->nu*(n1+n2)/2 > min(n1,n2))
03034                                 {
03035                                         free(label);
03036                                         free(count);
03037                                         return "specified nu is infeasible";
03038                                 }
03039                         }
03040                 }
03041                 free(label);
03042                 free(count);
03043         }
03044 
03045         return NULL;
03046 }
03047 
03048 int svm_check_probability_model(const svm_model *model)
03049 {
03050         return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03051                 model->probA!=NULL && model->probB!=NULL) ||
03052                 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03053                  model->probA!=NULL);
03054 }
03055 
03056 void svm_set_print_string_function(void (*print_func)(const char *))
03057 {
03058         if(print_func == NULL)
03059                 svm_print_string = &print_string_stdout;
03060         else
03061                 svm_print_string = print_func;
03062 }