36 #ifndef SHARK_ALGORITHMS_PEGASOS_H 37 #define SHARK_ALGORITHMS_PEGASOS_H 53 template <
class VectorType>
64 template <
class WeightType>
69 std::size_t batchsize = 1,
70 double varepsilon = 0.001)
73 double lambda = 1.0 / (ell * C);
76 double initialPrimal = 1.0;
77 double normbound2 = initialPrimal /
lambda;
81 w = RealZeroVector(w.size());
84 std::size_t start = 10;
85 std::size_t checkinterval = (2 * ell) / batchsize;
86 std::size_t nextcheck = start + ell / batchsize;
87 std::size_t predictions = 0;
88 for (std::size_t t=start; ; t++)
94 gradient = (lambda * sigma * (double)ell) *
w;
95 for (std::size_t i=0; i<ell; i++)
98 unsigned int y = data(i).label;
99 double f = sigma * inner_prod(w, x);
100 lg(x, y, f, gradient);
105 double n2 = inner_prod(gradient, gradient);
106 double n = std::sqrt(n2) / (double)ell;
117 nextcheck = t + checkinterval;
123 for (
unsigned int i=0; i<batchsize; i++)
128 unsigned int y = data(active).label;
132 double f = sigma * inner_prod(w, x);
136 lg(x, y, f, gradient);
140 sigma *= (1.0 - 1.0 / (double)t);
143 double eta = 1.0 / (sigma * lambda * t * batchsize);
145 norm_w2 += inner_prod(gradient, gradient) - 2.0 * inner_prod(w, gradient);
146 noalias(w) -= gradient;
149 double n2 = sigma * sigma * norm_w2;
150 if (n2 > normbound2) sigma *= std::sqrt(normbound2 / n2);
191 template <
class VectorType>
219 template <
class WeightType>
226 std::vector<WeightType>&
w,
227 std::size_t batchsize = 1,
228 double varepsilon = 0.001)
232 unsigned int classes = w.size();
234 double lambda = 1.0 / (ell * C);
236 double initialPrimal = -1.0;
237 LossGradientFunction
lg = NULL;
238 if (margintype == emRelative)
240 if (losstype == elDiscriminativeMax || losstype == elTotalMax)
244 lg = lossGradientRDM;
246 else if (losstype == elDiscriminativeSum || losstype == elTotalSum)
249 initialPrimal = classes - 1.0;
250 lg = lossGradientRDS;
253 else if (margintype == emAbsolute)
255 if (losstype == elNaiveHinge)
259 lg = lossGradientANH;
261 else if (losstype == elDiscriminativeMax)
265 lg = lossGradientADM;
267 else if (losstype == elDiscriminativeSum)
270 initialPrimal = classes - 1.0;
271 lg = lossGradientADS;
273 else if (losstype == elTotalMax)
277 lg = lossGradientATM;
279 else if (losstype == elTotalSum)
282 initialPrimal = classes;
283 lg = lossGradientATS;
286 SHARK_RUNTIME_CHECK(initialPrimal > 0 && lg,
"The combination of margin and loss is not implemented");
288 double normbound2 = initialPrimal /
lambda;
289 double norm_w2 = 0.0;
291 double target = initialPrimal * varepsilon;
292 std::vector<VectorType> gradient(classes);
293 RealVector f(classes);
294 for (
unsigned int c=0; c<classes; c++)
296 gradient[c].resize(w[c].size());
297 w[c] = RealZeroVector(w[c].size());
301 std::size_t start = 10;
302 std::size_t checkinterval = (2 * ell) / batchsize;
303 std::size_t nextcheck = start + ell / batchsize;
304 std::size_t predictions = 0;
305 for (std::size_t t=start; ; t++)
311 for (
unsigned int c=0; c<classes; c++) gradient[c] = (lambda * sigma * (
double)ell) * w[c];
312 for (std::size_t i=0; i<ell; i++)
315 unsigned int y = data(i).label;
316 for (
unsigned int c=0; c<classes; c++) f(c) = sigma * inner_prod(w[c], x);
317 lg(x, y, f, gradient, sumToZero);
323 for (
unsigned int c=0; c<classes; c++) n2 += inner_prod(gradient[c], gradient[c]);
324 double n = std::sqrt(n2) / (double)ell;
335 nextcheck = t + checkinterval;
339 for (
unsigned int c=0; c<classes; c++) gradient[c].clear();
341 for (
unsigned int i=0; i<batchsize; i++)
346 unsigned int y = data(active).label;
350 for (
unsigned int c=0; c<classes; c++) f(c) = sigma * inner_prod(w[c], x);
354 lg(x, y, f, gradient, sumToZero);
358 sigma *= (1.0 - 1.0 / (double)t);
361 double eta = 1.0 / (sigma * lambda * t * batchsize);
362 for (
unsigned int c=0; c<classes; c++)
365 norm_w2 += inner_prod(gradient[c], gradient[c]) - 2.0 * inner_prod(w[c], gradient[c]);
366 noalias(w[c]) -= gradient[c];
370 double n2 = sigma * sigma * norm_w2;
371 if (n2 > normbound2) sigma *= std::sqrt(normbound2 / n2);
376 for (
unsigned int c=0; c<classes; c++) w[c] *= sigma;
384 typedef bool(*LossGradientFunction)(
VectorType const&,
unsigned int, RealVector
const&, std::vector<VectorType>&, bool);
391 std::vector<VectorType>& gradient,
399 VectorType xx = (1.0 / (f.size() - 1.0)) * x;
400 for (std::size_t c=0; c<f.size(); c++)
if (c != y) gradient[c] += xx;
412 std::vector<VectorType>& gradient,
415 unsigned int argmax = 0;
417 for (std::size_t c=0; c<f.size(); c++)
419 if (c != y && f(c) > max)
425 if (f(y) < 1.0 + max)
428 gradient[argmax] += x;
439 std::vector<VectorType>& gradient,
442 bool nonzero =
false;
443 for (std::size_t c=0; c<f.size(); c++)
445 if (c != y && f(y) < 1.0 + f(c))
460 std::vector<VectorType>& gradient,
463 bool nonzero =
false;
464 for (std::size_t c=0; c<f.size(); c++)
466 if (c != y && f(c) > -1.0)
472 if (sumToZero && nonzero)
474 VectorType
mean = gradient[0];
475 for (std::size_t c=1; c<f.size(); c++) mean += gradient[c];
477 for (std::size_t c=0; c<f.size(); c++) gradient[c] -= mean;
487 std::vector<VectorType>& gradient,
491 std::size_t argmax = 0;
492 for (std::size_t c=0; c<f.size(); c++)
494 if (c == y)
continue;
503 gradient[argmax] += x;
506 VectorType xx = (1.0 / (f.size() - 1.0)) * x;
507 for (std::size_t c=0; c<f.size(); c++)
if (c != argmax) gradient[c] -= xx;
519 std::vector<VectorType>& gradient,
522 bool nonzero =
false;
523 for (std::size_t c=0; c<f.size(); c++)
542 if (sumToZero && nonzero)
544 VectorType
mean = gradient[0];
545 for (std::size_t c=1; c<f.size(); c++) mean += gradient[c];
547 for (std::size_t c=0; c<f.size(); c++) gradient[c] -= mean;
557 std::vector<VectorType>& gradient,
561 std::size_t argmax = 0;
562 for (std::size_t c=0; c<f.size(); c++)
585 gradient[argmax] -= x;
588 VectorType xx = (1.0 / (f.size() - 1.0)) * x;
589 for (std::size_t c=0; c<f.size(); c++)
if (c != argmax) gradient[c] += xx;
594 gradient[argmax] += x;
597 VectorType xx = (1.0 / (f.size() - 1.0)) * x;
598 for (std::size_t c=0; c<f.size(); c++)
if (c != argmax) gradient[c] -= xx;