36 #ifndef SHARK_ALGORITHMS_QP_QPMCLINEAR_H 37 #define SHARK_ALGORITHMS_QP_QPMCLINEAR_H 53 template <
class InputT>
74 const DatasetType& dataset,
77 std::size_t strategy = ACF,
78 bool shrinking =
false)
88 for (std::size_t i=0; i<
m_data.size(); i++)
104 random::rng_type& rng,
108 bool verbose =
false)
118 std::size_t ell =
m_data.size();
119 RealMatrix alpha(ell,
m_classes + 1, 0.0);
123 RealVector pref(ell, 1.0);
124 double prefsum = (double)ell;
126 std::vector<std::size_t> schedule(ell);
129 for (std::size_t i=0; i<ell; i++) schedule[i] = i;
133 std::size_t active = ell;
136 std::size_t epoch = 0;
137 std::size_t steps = 0;
140 double objective = 0.0;
141 double max_violation = 0.0;
144 const double gain_learning_rate = 1.0 / ell;
145 double average_gain = 0.0;
155 double psum = prefsum;
158 for (std::size_t i=0; i<ell; i++)
161 double num = (psum < 1e-6) ? ell - pos : std::min((
double)(ell - pos), (ell - pos) * p / psum);
162 std::size_t n = (std::size_t)std::floor(num);
163 double prob = num - n;
166 for (std::size_t j=0; j<n; j++)
181 std::shuffle(schedule.begin(),schedule.begin()+active,rng);
192 size_t nPoints = ell;
196 for (std::size_t j=0; j<nPoints; j++)
200 const std::size_t i = schedule[j];
201 InputReferenceType x_i =
m_data[i].input;
202 const unsigned int y_i =
m_data[i].label;
204 blas::matrix_row<RealMatrix> a = row(alpha, i);
207 RealVector wx = prod(w,x_i);
213 max_violation = std::max(max_violation, kkt);
227 std::swap(schedule[j], schedule[active]);
234 if (epoch == 0) average_gain += gain / (double)ell;
238 constexpr
double CHANGE_RATE = 0.2;
239 constexpr
double PREF_MIN = 0.05;
240 constexpr
double PREF_MAX = 20.0;
242 double change = CHANGE_RATE * (gain / average_gain - 1.0);
243 double newpref = std::min(PREF_MAX, std::max(PREF_MIN, pref(i) * std::exp(change)));
244 prefsum += newpref - pref(i);
246 average_gain = (1.0 - gain_learning_rate) * average_gain + gain_learning_rate * gain;
262 if (prop != NULL) prop->type =
QpTimeout;
269 std::cout <<
"#" << std::flush;
281 for (std::size_t i=0; i<ell; i++) pref(i) = 1.0;
282 prefsum = (double)ell;
295 if (verbose) std::cout <<
"." << std::flush;
299 canstop = (active == ell);
308 for (std::size_t d=0; d<
m_dim; d++) objective -=
w(j, d) *
w(j, d);
311 for (std::size_t i=0; i<ell; i++)
313 for (std::size_t j=0; j<
m_classes; j++) objective += alpha(i, j);
319 prop->accuracy = max_violation;
320 prop->iterations = ell * epoch;
321 prop->value = objective;
322 prop->seconds = timer.
lastLap();
328 std::cout << std::endl;
329 std::cout <<
"training time (seconds): " << timer.
lastLap() << std::endl;
330 std::cout <<
"number of epochs: " << epoch << std::endl;
331 std::cout <<
"number of iterations: " << (ell * epoch) << std::endl;
332 std::cout <<
"number of non-zero steps: " << steps << std::endl;
333 std::cout <<
"dual accuracy: " << max_violation << std::endl;
334 std::cout <<
"dual objective value: " << objective << std::endl;
343 void add_scaled(RealMatrix&
w, RealVector
const& mu, InputReferenceType x)
345 for (std::size_t c=0; c<
m_classes; c++) noalias(row(w, c)) += mu(c) * x;
357 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::matrix_row<RealMatrix>
const& alpha,
double C,
unsigned int y) = 0;
377 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, blas::matrix_row<RealMatrix>& alpha, RealVector& mu) = 0;
388 template <
class InputT>
396 const DatasetType& dataset,
404 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::matrix_row<RealMatrix>
const& alpha,
double C,
unsigned int y)
406 double violation = 0.0;
407 for (std::size_t c=0; c<wx.size(); c++)
415 const double g = 1.0 - 0.5 * (wx(y) - wx(c));
417 if (g > violation && alpha(c) < C) violation = g;
418 else if (-g > violation && alpha(c) > 0.0) violation = -g;
428 for (std::size_t c=0; c<
m_classes; c++) sum_mu += mu(c);
429 unsigned int y =
m_data[index].label;
430 RealVector step(-0.5 * mu);
431 step(y) = 0.5 * sum_mu;
436 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, blas::matrix_row<RealMatrix>& alpha, RealVector& mu)
438 const double qq = 0.5 * q;
443 for (iter=0; iter<maxiter; iter++)
450 if (c == y)
continue;
452 const double g = gradient(c);
453 const double a = alpha(c);
454 if (g > kkt && a < C) { kkt = g; idx = c; }
455 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
459 if (kkt < epsilon)
break;
462 const double a = alpha(idx);
463 const double g = gradient(idx);
465 double a_new = a + m;
480 const double dg = 0.5 * m * qq;
481 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dg;
484 gain += m * (g - dg);
498 template <
class InputT>
506 const DatasetType& dataset,
514 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::matrix_row<RealMatrix>
const& alpha,
double C,
unsigned int y)
516 double violation = 0.0;
525 const double g = 1.0 + wx(c);
527 if (g > violation && alpha(c) < C) violation = g;
528 else if (-g > violation && alpha(c) > 0.0) violation = -g;
537 double mean_mu = 0.0;
538 for (std::size_t c=0; c<
m_classes; c++) mean_mu += mu(c);
539 mean_mu /= (double)m_classes;
540 RealVector step(m_classes);
541 for (std::size_t c=0; c<
m_classes; c++) step(c) = mean_mu - mu(c);
546 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, blas::matrix_row<RealMatrix>& alpha, RealVector& mu)
549 const double qq = (1.0 - ood) * q;
554 for (iter=0; iter<maxiter; iter++)
561 if (c == y)
continue;
563 const double g = gradient(c);
564 const double a = alpha(c);
565 if (g > kkt && a < C) { kkt = g; idx = c; }
566 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
570 if (kkt < epsilon)
break;
573 const double a = alpha(idx);
574 const double g = gradient(idx);
576 double a_new = a + m;
591 const double dg = m * q;
593 for (std::size_t c=0; c<
m_classes; c++) gradient(c) += dgc;
596 gain += m * (g - 0.5 * (dg - dgc));
610 template <
class InputT>
618 const DatasetType& dataset,
626 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::matrix_row<RealMatrix>
const& alpha,
double C,
unsigned int y)
628 double violation = 0.0;
631 const double g = (c == y) ? 1.0 - wx(y) : 1.0 + wx(c);
633 if (g > violation && alpha(c) < C) violation = g;
634 else if (-g > violation && alpha(c) > 0.0) violation = -g;
642 unsigned int y =
m_data[index].label;
643 double mean = -2.0 * mu(y);
644 for (std::size_t c=0; c<
m_classes; c++) mean += mu(c);
645 mean /= (double)m_classes;
646 RealVector step(m_classes);
647 for (std::size_t c=0; c<
m_classes; c++) step(c) = ((c == y) ? (mu(c) +
mean) : (mean - mu(c)));
652 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, blas::matrix_row<RealMatrix>& alpha, RealVector& mu)
655 const double qq = (1.0 - ood) * q;
660 for (iter=0; iter<maxiter; iter++)
667 const double g = gradient(c);
668 const double a = alpha(c);
669 if (g > kkt && a < C) { kkt = g; idx = c; }
670 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
674 if (kkt < epsilon)
break;
677 const double a = alpha(idx);
678 const double g = gradient(idx);
680 double a_new = a + m;
695 const double dg = m * q;
699 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dgc;
700 gradient(idx) -= dg - 2.0 * dgc;
704 for (std::size_t c=0; c<
m_classes; c++) gradient(c) += (c == y) ? -dgc : dgc;
708 gain += m * (g - 0.5 * (dg - dgc));
722 template <
class InputT>
730 const DatasetType& dataset,
738 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::matrix_row<RealMatrix>
const& alpha,
double C,
unsigned int y)
740 for (std::size_t c=0; c<
m_classes; c++) gradient(c) = 0.0;
741 const double g = 1.0 - wx(y);
743 const double a = alpha(0);
746 if (a == C)
return 0.0;
751 if (a == 0.0)
return 0.0;
759 unsigned int y =
m_data[index].label;
764 for (
size_t c=0; c<
m_classes; c++) step(c) = (c == y) ? sy : sc;
769 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, blas::matrix_row<RealMatrix>& alpha, RealVector& mu)
772 const double qq = (1.0 - ood) * q;
775 const double g = gradient(y);
776 const double a = alpha(0);
777 if (g > kkt && a < C) kkt = g;
778 else if (-g > kkt && a > 0.0) kkt = -g;
781 if (kkt < epsilon)
return 0.0;
785 double a_new = a + m;
800 return m * (g - 0.5 * m * qq);
811 template <
class InputT>
819 const DatasetType& dataset,
827 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::matrix_row<RealMatrix>
const& alpha,
double C,
unsigned int y)
831 double violation = 0.0;
832 for (std::size_t c=0; c<wx.size(); c++)
840 const double g = 1.0 - 0.5 * (wx(y) - wx(c));
842 if (g > violation) violation = g;
843 else if (-g > violation && alpha(c) > 0.0) violation = -g;
851 double kkt_up = 0.0, kkt_down = 1e100;
860 const double g = 1.0 - 0.5 * (wx(y) - wx(c));
862 if (g > kkt_up && alpha(c) < C) kkt_up = g;
863 if (g < kkt_down && alpha(c) > 0.0) kkt_down = g;
866 return std::max(0.0, kkt_up - kkt_down);
873 unsigned int y =
m_data[index].label;
875 for (std::size_t c=0; c<
m_classes; c++)
if (c != y) sum_mu += mu(c);
876 RealVector step(-0.5 * mu);
877 step(y) = 0.5 * sum_mu;
882 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, blas::matrix_row<RealMatrix>& alpha, RealVector& mu)
884 const double qq = 0.5 * q;
889 for (iter=0; iter<maxiter; iter++)
893 std::size_t idx_up = 0, idx_down = 0;
899 double kkt_up = -1e100, kkt_down = 1e100;
902 if (c == y)
continue;
904 const double g = gradient(c);
905 const double a = alpha(c);
906 if (g > kkt_up && a < C) { kkt_up = g; idx_up = c; }
907 if (g < kkt_down && a > 0.0) { kkt_down = g; idx_down = c; }
918 grad = kkt_up - kkt_down;
927 if (c == y)
continue;
929 const double g = gradient(c);
930 const double a = alpha(c);
931 if (g > kkt) { kkt = g; idx = c; }
932 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
934 grad = gradient(idx);
938 if (kkt < epsilon)
return gain;
943 const double a_up = alpha(idx_up);
944 const double a_down = alpha(idx_down);
945 double m = grad / qq;
946 double a_up_new = a_up + m;
947 double a_down_new = a_down - m;
948 if (a_down_new <= 0.0)
954 alpha(idx_up) = a_up_new;
955 alpha(idx_down) = a_down_new;
960 const double dg = 0.5 * m * qq;
961 gradient(idx_up) -= dg;
962 gradient(idx_down) += dg;
963 gain += m * (grad - 2.0 * dg);
968 const double a = alpha(idx);
970 double m = grad / qq;
971 double a_new = a + m;
972 double a_sum_new = a_sum + m;
977 a_sum_new = a_sum + m;
979 else if (a_sum_new >= C)
990 const double dg = 0.5 * m * qq;
991 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dg;
993 gain += m * (grad - dg);
1008 template <
class InputT>
1016 const DatasetType& dataset,
1018 std::size_t classes)
1024 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::matrix_row<RealMatrix>
const& alpha,
double C,
unsigned int y)
1028 double violation = 0.0;
1037 const double g = 1.0 + wx(c);
1039 if (g > violation) violation = g;
1040 else if (-g > violation && alpha(c) > 0.0) violation = -g;
1047 double kkt_up = 0.0, kkt_down = 1e100;
1056 const double g = 1.0 + wx(c);
1058 if (g > kkt_up && alpha(c) < C) kkt_up = g;
1059 if (g < kkt_down && alpha(c) > 0.0) kkt_down = g;
1062 return std::max(0.0, kkt_up - kkt_down);
1069 double mean_mu = 0.0;
1070 for (std::size_t c=0; c<
m_classes; c++) mean_mu += mu(c);
1071 mean_mu /= (double)m_classes;
1072 RealVector step(m_classes);
1073 for (
size_t c=0; c<
m_classes; c++) step(c) = mean_mu - mu(c);
1078 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, blas::matrix_row<RealMatrix>& alpha, RealVector& mu)
1081 const double qq = (1.0 - ood) * q;
1086 for (iter=0; iter<maxiter; iter++)
1089 std::size_t idx = 0;
1090 std::size_t idx_up = 0, idx_down = 0;
1096 double kkt_up = -1e100, kkt_down = 1e100;
1099 if (c == y)
continue;
1101 const double g = gradient(c);
1102 const double a = alpha(c);
1103 if (g > kkt_up && a < C) { kkt_up = g; idx_up = c; }
1104 if (g < kkt_down && a > 0.0) { kkt_down = g; idx_down = c; }
1115 grad = kkt_up - kkt_down;
1124 if (c == y)
continue;
1126 const double g = gradient(c);
1127 const double a = alpha(c);
1128 if (g > kkt) { kkt = g; idx = c; }
1129 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
1131 grad = gradient(idx);
1135 if (kkt < epsilon)
return gain;
1140 const double a_up = alpha(idx_up);
1141 const double a_down = alpha(idx_down);
1142 double m = grad / (2.0 * q);
1143 double a_up_new = a_up + m;
1144 double a_down_new = a_down - m;
1145 if (a_down_new <= 0.0)
1148 a_up_new = a_up + m;
1151 alpha(idx_up) = a_up_new;
1152 alpha(idx_down) = a_down_new;
1157 const double dg = m * q;
1159 gradient(idx_up) -= dg;
1160 gradient(idx_down) += dg;
1161 gain += m * (grad - (dg - dgc));
1166 const double a = alpha(idx);
1168 double m = grad / qq;
1169 double a_new = a + m;
1170 double a_sum_new = a_sum + m;
1175 a_sum_new = a_sum + m;
1177 else if (a_sum_new >= C)
1188 const double dg = m * q;
1190 for (std::size_t c=0; c<
m_classes; c++) gradient(c) += dgc;
1191 gradient(idx) -= dg;
1192 gain += m * (grad - 0.5 * (dg - dgc));
1207 template <
class InputT>
1215 const DatasetType& dataset,
1217 std::size_t classes)
1223 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::matrix_row<RealMatrix>
const& alpha,
double C,
unsigned int y)
1227 double violation = 0.0;
1230 const double g = (c == y) ? 1.0 - wx(y) : 1.0 + wx(c);
1232 if (g > violation) violation = g;
1233 else if (-g > violation && alpha(c) > 0.0) violation = -g;
1239 double kkt_up = 0.0, kkt_down = 1e100;
1242 const double g = (c == y) ? 1.0 - wx(y) : 1.0 + wx(c);
1244 if (g > kkt_up && alpha(c) < C) kkt_up = g;
1245 if (g < kkt_down && alpha(c) > 0.0) kkt_down = g;
1247 return std::max(0.0, kkt_up - kkt_down);
1254 unsigned int y =
m_data[index].label;
1255 double mean = -2.0 * mu(y);
1256 for (std::size_t c=0; c<
m_classes; c++) mean += mu(c);
1257 mean /= (double)m_classes;
1258 RealVector step(m_classes);
1259 for (
size_t c=0; c<
m_classes; c++) step(c) = (c == y) ? (mu(c) +
mean) : (mean - mu(c));
1264 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, blas::matrix_row<RealMatrix>& alpha, RealVector& mu)
1267 const double qq = (1.0 - ood) * q;
1272 for (iter=0; iter<maxiter; iter++)
1275 std::size_t idx = 0;
1276 std::size_t idx_up = 0, idx_down = 0;
1282 double kkt_up = -1e100, kkt_down = 1e100;
1285 const double g = gradient(c);
1286 const double a = alpha(c);
1287 if (g > kkt_up && a < C) { kkt_up = g; idx_up = c; }
1288 if (g < kkt_down && a > 0.0) { kkt_down = g; idx_down = c; }
1299 grad = kkt_up - kkt_down;
1308 const double g = gradient(c);
1309 const double a = alpha(c);
1310 if (g > kkt) { kkt = g; idx = c; }
1311 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
1313 grad = gradient(idx);
1317 if (kkt < epsilon)
return gain;
1322 const double a_up = alpha(idx_up);
1323 const double a_down = alpha(idx_down);
1324 double m = grad / (2.0 * q);
1325 double a_up_new = a_up + m;
1326 double a_down_new = a_down - m;
1327 if (a_down_new <= 0.0)
1330 a_up_new = a_up + m;
1333 alpha(idx_up) = a_up_new;
1334 alpha(idx_down) = a_down_new;
1339 const double dg = m * q;
1343 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dgc;
1344 gradient(idx_up) -= dg - 2.0 * dgc;
1345 gradient(idx_down) += dg;
1347 else if (idx_down == y)
1349 gradient(idx_up) -= dg;
1350 gradient(idx_down) += dg - 2.0 * dgc;
1354 gradient(idx_up) -= dg;
1355 gradient(idx_down) += dg;
1357 gain += m * (grad - (dg - dgc));
1362 const double a = alpha(idx);
1364 double m = grad / qq;
1365 double a_new = a + m;
1366 double a_sum_new = a_sum + m;
1371 a_sum_new = a_sum + m;
1373 else if (a_sum_new >= C)
1384 const double dg = m * q;
1388 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dgc;
1389 gradient(idx) -= dg - 2.0 * dgc;
1393 for (std::size_t c=0; c<
m_classes; c++) gradient(c) += (c == y) ? -dgc : dgc;
1394 gradient(idx) -= dg;
1396 gain += m * (grad - 0.5 * (dg - dgc));
1411 template <
class InputT>
1419 const DatasetType& dataset,
1421 std::size_t classes)
1427 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::matrix_row<RealMatrix>
const& alpha,
double C,
unsigned int y)
1429 double violation = 0.0;
1432 const double g = (c == y) ? (m_classes - 1.0) - wx(y) : 1.0 + wx(c);
1434 if (g > violation && alpha(c) < C) violation = g;
1435 else if (-g > violation && alpha(c) > 0.0) violation = -g;
1443 unsigned int y =
m_data[index].label;
1444 double mean = -2.0 * mu(y);
1445 for (std::size_t c=0; c<
m_classes; c++) mean += mu(c);
1446 mean /= (double)m_classes;
1447 RealVector step(m_classes);
1448 for (std::size_t c=0; c<
m_classes; c++) step(c) = ((c == y) ? (mu(c) +
mean) : (mean - mu(c)));
1453 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, blas::matrix_row<RealMatrix>& alpha, RealVector& mu)
1456 const double qq = (1.0 - ood) * q;
1461 for (iter=0; iter<maxiter; iter++)
1464 std::size_t idx = 0;
1468 const double g = gradient(c);
1469 const double a = alpha(c);
1470 if (g > kkt && a < C) { kkt = g; idx = c; }
1471 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
1475 if (kkt < epsilon)
break;
1478 const double a = alpha(idx);
1479 const double g = gradient(idx);
1481 double a_new = a + m;
1487 else if (a_new >= C)
1496 const double dg = m * q;
1500 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dgc;
1501 gradient(idx) -= dg - 2.0 * dgc;
1505 for (std::size_t c=0; c<
m_classes; c++) gradient(c) += (c == y) ? -dgc : dgc;
1506 gradient(idx) -= dg;
1509 gain += m * (g - 0.5 * (dg - dgc));