1 #ifndef SHARK_ALGORITHMS_CSVMTRAINER_H 2 #define SHARK_ALGORITHMS_CSVMTRAINER_H 73 template <
class InputType,
class CacheType =
float>
75 InputType, unsigned int,
76 KernelClassifier<InputType>,
77 AbstractWeightedTrainer<KernelClassifier<InputType> >
105 CSvmTrainer(KernelType* kernel,
double C,
bool offset,
bool unconstrained =
false)
106 : base_type(kernel, C, offset, unconstrained), m_computeDerivative(false), m_McSvmType(
McSvm::
WW)
115 CSvmTrainer(KernelType* kernel,
double negativeC,
double positiveC,
bool offset,
bool unconstrained =
false)
116 : base_type(kernel,negativeC, positiveC, offset, unconstrained), m_computeDerivative(false), m_McSvmType(
McSvm::
WW)
121 {
return "CSvmTrainer"; }
124 m_computeDerivative = compute;
142 if (f.basis() == dataset.
inputs() && f.kernel() == base_type::m_kernel && f.alpha().size1() == ell && f.alpha().size2() == 1) {
144 if (this->m_trainOffset) f.offset() = RealVector(1);
147 f.setStructure(base_type::m_kernel, dataset.
inputs(), this->m_trainOffset);
151 trainBinary(f,dataset);
153 if (base_type::sparsify())
160 trainOVA(svm,dataset);
165 bool sumToZero =
false;
166 bool simplex =
false;
170 switch (m_McSvmType){
174 setupMcParametersWWCS(nu,M, classes);
179 setupMcParametersWWCS(nu,M, classes);
184 setupMcParametersADMLLW(nu,M, classes);
189 setupMcParametersATMATS(nu,M, classes);
194 setupMcParametersATMATS(nu,M, classes);
199 setupMcParametersADMLLW(nu,M, classes);
204 setupMcParametersATMATS(nu,M, classes);
209 setupMcParametersMMR(nu,M, classes);
216 RealMatrix linear(ell,M.
width(),1.0);
218 auto const& labels = dataset.
labels();
220 for(
unsigned int y: labels.elements()){
221 linear(i, y) = classes - 1.0;
227 RealMatrix alpha(ell,M.
width(),0.0);
228 RealVector bias(classes,0.0);
230 solveMcSimplex(sumToZero,nu,M,linear,alpha,bias,dataset);
232 solveMcBox(sumToZero,nu,M,linear,alpha,bias,dataset);
238 for (std::size_t i=0; i<ell; i++)
240 unsigned int y = dataset.
element(i).label;
241 for (std::size_t c=0; c<classes; c++)
244 std::size_t r = alpha.size2() * y;
245 for (std::size_t p=0; p != alpha.size2(); p++, r++)
246 sum += nu(r, c) * alpha(i, p);
251 if (this->m_trainOffset)
254 if (this->sparsify())
262 std::size_t n = dataset.numberOfElements();
264 if (f.basis() == dataset.
inputs() && f.kernel() == base_type::m_kernel && f.alpha().size1() == n && f.alpha().size2() == 1) {
266 if (this->m_trainOffset) f.offset() = RealVector(1);
267 else f.offset() = RealVector();
270 f.setStructure(base_type::m_kernel, dataset.
inputs(), this->m_trainOffset);
274 trainBinary(f, dataset);
276 if (base_type::sparsify()) f.sparsify();
287 RealMatrix& alpha, RealVector& bias,
294 KernelMatrixType km(*base_type::m_kernel, dataset.
inputs());
296 if (base_type::precomputeKernel())
298 PrecomputedMatrixType matrix(&km);
301 problem.setShrinking(base_type::m_shrinking);
302 if(this->m_trainOffset){
304 biasSolver.
solve(bias,base_type::m_stoppingcondition,nu,sumToZero, &prop);
308 solver.
solve( base_type::m_stoppingcondition, &prop);
310 alpha = problem.solution();
314 CachedMatrixType matrix(&km, base_type::m_cacheSize);
317 problem.setShrinking(base_type::m_shrinking);
318 if(this->m_trainOffset){
320 biasSolver.
solve(bias,base_type::m_stoppingcondition,nu,sumToZero, &prop);
324 solver.
solve( base_type::m_stoppingcondition, &prop);
326 alpha = problem.solution();
328 base_type::m_accessCount = km.getAccessCount();
333 RealMatrix& alpha, RealVector& bias,
340 KernelMatrixType km(*base_type::m_kernel, dataset.
inputs());
342 if (base_type::precomputeKernel())
344 PrecomputedMatrixType matrix(&km);
347 problem.setShrinking(base_type::m_shrinking);
348 if(this->m_trainOffset){
350 biasSolver.
solve(bias,base_type::m_stoppingcondition,nu, sumToZero, &prop);
354 solver.
solve( base_type::m_stoppingcondition, &prop);
356 alpha = problem.solution();
360 CachedMatrixType matrix(&km, base_type::m_cacheSize);
363 problem.setShrinking(base_type::m_shrinking);
364 if(this->m_trainOffset){
366 biasSolver.
solve(bias,base_type::m_stoppingcondition,nu, sumToZero, &prop);
370 solver.
solve( base_type::m_stoppingcondition, &prop);
372 alpha = problem.solution();
374 base_type::m_accessCount = km.getAccessCount();
377 template<
class Trainer>
379 Trainer trainer(base_type::m_kernel,this->C(),this->m_trainOffset);
380 trainer.stoppingCondition() = this->stoppingCondition();
381 trainer.precomputeKernel() = this->precomputeKernel();
382 trainer.sparsify() = this->sparsify();
383 trainer.shrinking() = this->shrinking();
384 trainer.s2do() = this->s2do();
385 trainer.verbosity() = this->verbosity();
386 trainer.setCacheSize(this->cacheSize());
387 trainer.train(svm,dataset);
388 this->solutionProperties() = trainer.solutionProperties();
389 base_type::m_accessCount = trainer.accessCount();
393 nu.
resize(classes * (classes-1), classes, 2*classes*(classes-1));
394 for (
unsigned int r=0, y=0; y<classes; y++)
396 for (
unsigned int p=0, pp=0; p<classes-1; p++, pp++, r++)
412 M.
resize(classes * (classes-1) * classes, classes-1, 2 * classes * (classes-1) * (classes-1));
413 for (
unsigned int r=0, yv=0; yv<classes; yv++)
415 for (
unsigned int pv=0, ppv=0; pv<classes-1; pv++, ppv++)
417 if (ppv == yv) ppv++;
418 for (
unsigned int yw=0; yw<classes; yw++, r++)
420 QpFloatType baseM = (yv == yw ? (QpFloatType)0.25 : (QpFloatType)0.0) - (ppv == yw ? (QpFloatType)0.25 : (QpFloatType)0.0);
424 M.
add(r, ppv - (ppv >= yw ? 1 : 0), baseM + (QpFloatType)0.25);
428 M.
add(r, yv - (yv >= yw ? 1 : 0), baseM - (QpFloatType)0.25);
432 unsigned int pw = ppv - (ppv >= yw ? 1 : 0);
433 unsigned int pw2 = yv - (yv >= yw ? 1 : 0);
436 M.
add(r, pw, baseM + (QpFloatType)0.25);
437 M.
add(r, pw2, baseM - (QpFloatType)0.25);
441 M.
add(r, pw2, baseM - (QpFloatType)0.25);
442 M.
add(r, pw, baseM + (QpFloatType)0.25);
451 nu.
resize(classes*classes, classes, classes*classes);
452 for (
unsigned int r=0, y=0; y<classes; y++)
454 for (
unsigned int p=0; p<classes; p++, r++)
456 nu.
add(r, p, (QpFloatType)((p == y) ? 1.0 : -1.0));
460 M.
resize(classes * classes * classes, classes, 2 * classes * classes * classes);
461 QpFloatType c_ne = (QpFloatType)(-1.0 / (
double)classes);
462 QpFloatType c_eq = (QpFloatType)1.0 + c_ne;
463 for (
unsigned int r=0, yv=0; yv<classes; yv++)
465 for (
unsigned int pv=0; pv<classes; pv++)
467 QpFloatType sign = QpFloatType((yv == pv) ? -1 : 1);
468 for (
unsigned int yw=0; yw<classes; yw++, r++)
473 M.
add(r, pv, -sign * c_eq);
477 M.
add(r, pv, sign * c_eq);
478 M.
add(r, yw, -sign * c_ne);
486 nu.
resize(classes * (classes-1), classes, classes*(classes-1));
487 for (
unsigned int r=0, y=0; y<classes; y++)
489 for (
unsigned int p=0, pp=0; p<classes-1; p++, pp++, r++)
492 nu.
add(r, pp, (QpFloatType)-1.0);
496 M.
resize(classes * (classes-1) * classes, classes-1, classes * (classes-1) * (classes-1));
497 QpFloatType mood = (QpFloatType)(-1.0 / (
double)classes);
498 QpFloatType val = (QpFloatType)1.0 + mood;
499 for (
unsigned int r=0, yv=0; yv<classes; yv++)
501 for (
unsigned int pv=0, ppv=0; pv<classes-1; pv++, ppv++)
503 if (ppv == yv) ppv++;
504 for (
unsigned int yw=0; yw<classes; yw++, r++)
509 unsigned int pw = ppv - (ppv > yw ? 1 : 0);
518 nu.
resize(classes, classes, classes);
519 for (
unsigned int y=0; y<classes; y++)
522 M.
resize(classes * classes, 1, classes);
523 QpFloatType mood = (QpFloatType)(-1.0 / (
double)classes);
524 QpFloatType val = (QpFloatType)1.0 + mood;
525 for (
unsigned int r=0, yv=0; yv<classes; yv++)
527 for (
unsigned int yw=0; yw<classes; yw++, r++)
530 if (yv == yw) M.
add(r, 0, val);
539 base_type::m_solutionproperties.type =
QpNone;
540 base_type::m_solutionproperties.accuracy = 0.0;
541 base_type::m_solutionproperties.iterations = 0;
542 base_type::m_solutionproperties.value = 0.0;
543 base_type::m_solutionproperties.seconds = 0.0;
544 for (
unsigned int c=0; c<classes; c++)
547 KernelClassifier<InputType> binsvm;
557 bintrainer.
shrinking() = base_type::shrinking();
558 bintrainer.
s2do() = base_type::s2do();
559 bintrainer.
verbosity() = base_type::verbosity();
560 bintrainer.
train(binsvm, bindata);
561 base_type::m_solutionproperties.iterations += bintrainer.
solutionProperties().iterations;
563 base_type::m_solutionproperties.accuracy = std::max(base_type::solutionProperties().accuracy, bintrainer.
solutionProperties().accuracy);
565 if (this->m_trainOffset)
567 base_type::m_accessCount += bintrainer.
accessCount();
570 if (base_type::sparsify())
575 template<
class T,
class DatasetTypeT>
578 trainBinary(km,svm,dataset);
582 template<
class T,
class DatasetTypeT>
586 Gaussian
const* kernel =
dynamic_cast<Gaussian const*
> (base_type::m_kernel);
589 trainBinary(km,svm,dataset);
593 trainBinary(km,svm,dataset);
598 template<
class Matrix,
class T>
604 optimize(svm,svmProblem,dataset);
610 optimize(svm,svmProblem,dataset);
612 base_type::m_accessCount = km.getAccessCount();
616 template<
class Matrix,
class T>
622 matrix,dataset.
labels(),dataset.weights(),base_type::m_regularizers
624 optimize(svm,svmProblem,dataset.data());
630 matrix,dataset.
labels(),dataset.weights(),base_type::m_regularizers
632 optimize(svm,svmProblem,dataset.data());
634 base_type::m_accessCount = km.getAccessCount();
637 template<
class SVMProblemType>
639 if (this->m_trainOffset)
642 ProblemType problem(svmProblem,base_type::m_shrinking);
645 RealVector
const& reg = this->regularizationParameters();
646 double C_minus = reg(0);
647 double C_plus = (reg.size() == 1) ? reg(0) : reg(1);
650 double a = svm.
alpha()(i, 0);
651 if (label == 0) a = std::max(std::min(a, 0.0), -C_minus);
652 else a = std::min(std::max(a, 0.0), C_plus);
653 svm.
alpha()(i, 0) = a;
656 problem.setInitialSolution(blas::column(svm.
alpha(), 0));
657 solver.
solve(base_type::stoppingCondition(), &base_type::solutionProperties());
658 column(svm.
alpha(),0)= problem.getUnpermutedAlpha();
659 svm.
offset(0) = computeBias(problem,dataset);
664 ProblemType problem(svmProblem,base_type::m_shrinking);
667 RealVector
const& reg = this->regularizationParameters();
668 double C_minus = reg(0);
669 double C_plus = (reg.size() == 1) ? reg(0) : reg(1);
672 double a = svm.
alpha()(i, 0);
673 if (label == 0) a = std::max(std::min(a, 0.0), -C_minus);
674 else a = std::min(std::max(a, 0.0), C_plus);
675 svm.
alpha()(i, 0) = a;
678 problem.setInitialSolution(blas::column(svm.
alpha(), 0));
679 solver.
solve(base_type::stoppingCondition(), &base_type::solutionProperties());
680 column(svm.
alpha(),0) = problem.getUnpermutedAlpha();
684 RealVector m_db_dParams;
686 bool m_computeDerivative;
689 template<
class Problem>
691 std::size_t nkp = base_type::m_kernel->numberOfParameters();
692 m_db_dParams.resize(nkp+1);
693 m_db_dParams.clear();
695 std::size_t ell = problem.dimensions();
696 if (ell == 0)
return 0.0;
699 double lowerBound = -1e100;
700 double upperBound = 1e100;
702 std::size_t freeVars = 0;
703 std::size_t lower_i = 0;
704 std::size_t upper_i = 0;
705 for (std::size_t i=0; i<ell; i++)
707 double value = problem.gradient(i);
708 if (problem.alpha(i) == problem.boxMin(i))
710 if (value > lowerBound) {
715 else if (problem.alpha(i) == problem.boxMax(i))
717 if (value < upperBound) {
729 return sum / freeVars;
731 if(!m_computeDerivative)
732 return 0.5 * (lowerBound + upperBound);
734 lower_i = problem.permutation(lower_i);
735 upper_i = problem.permutation(upper_i);
737 SHARK_RUNTIME_CHECK(base_type::m_regularizers.size() == 1,
"derivative only implemented for SVM with one C" );
741 double dlower_dC = 0.0;
742 double dupper_dC = 0.0;
745 RealVector dupper_dkernel( nkp,0 );
746 RealVector dlower_dkernel( nkp,0 );
748 boost::shared_ptr<State> kernelState = base_type::m_kernel->createState();
749 RealVector der(nkp );
759 RealMatrix one(1,1,1);
760 RealMatrix result(1,1);
762 for (std::size_t i=0; i<ell; i++) {
763 double cur_alpha = problem.alpha(problem.permutation(i));
764 if ( cur_alpha != 0 ) {
765 int cur_label = ( cur_alpha>0.0 ? 1 : -1 );
768 base_type::m_kernel->eval( lowerInput, singleInput, result, *kernelState );
769 dlower_dC += cur_label * result(0,0);
770 base_type::m_kernel->weightedParameterDerivative( lowerInput, singleInput,one, *kernelState, der );
771 for ( std::size_t k=0; k<nkp; k++ ) {
772 dlower_dkernel(k) += cur_label * der(k);
775 base_type::m_kernel->eval( upperInput, singleInput,result, *kernelState );
776 dupper_dC += cur_label * result(0,0);
777 base_type::m_kernel->weightedParameterDerivative( upperInput, singleInput, one, *kernelState, der );
778 for ( std::size_t k=0; k<nkp; k++ ) {
779 dupper_dkernel(k) += cur_label * der(k);
784 m_db_dParams( nkp ) = -0.5 * ( dlower_dC + dupper_dC );
785 for ( std::size_t k=0; k<nkp; k++ ) {
786 m_db_dParams(k) = -0.5 * this->C() * ( dlower_dkernel(k) + dupper_dkernel(k) );
788 if ( base_type::m_unconstrained ) {
789 m_db_dParams( nkp ) *= this->C();
792 return 0.5 * (lowerBound + upperBound);
797 template <
class InputType>
808 {
return "LinearCSvmTrainer"; }
819 trainBinary(model,dataset);
822 switch (m_McSvmType){
824 trainMc<QpMcLinearWW<InputType> >(model,dataset,classes);
827 trainMc<QpMcLinearCS<InputType> >(model,dataset,classes);
830 trainMc<QpMcLinearLLW<InputType> >(model,dataset,classes);
833 trainMc<QpMcLinearATM<InputType> >(model,dataset,classes);
836 trainMc<QpMcLinearATS<InputType> >(model,dataset,classes);
839 trainMc<QpMcLinearADM<InputType> >(model,dataset,classes);
842 trainMc<QpMcLinearMMR<InputType> >(model,dataset,classes);
845 trainMc<QpMcLinearReinforced<InputType> >(model,dataset,classes);
848 trainOVA(model,dataset,classes);
866 if(!this->trainOffset()){
867 RealMatrix
w(1, dim, 0.0);
874 double stepSize = 0.1;
877 offset+= (grad < 0? -stepSize:stepSize);
895 RealMatrix
w(1, dim, 0.0);
900 template<
class Solver>
904 Solver solver(dataset, dim, classes);
905 RealMatrix
w = solver.solve(
random::globalRng, this->C(), this->stoppingCondition(), &this->solutionProperties(), this->verbosity() > 0);
911 base_type::m_solutionproperties.type =
QpNone;
912 base_type::m_solutionproperties.accuracy = 0.0;
913 base_type::m_solutionproperties.iterations = 0;
914 base_type::m_solutionproperties.value = 0.0;
915 base_type::m_solutionproperties.seconds = 0.0;
918 RealMatrix
w(classes, dim);
919 for (
unsigned int c=0; c<classes; c++)
924 solver.
solve(this->C(), 0.0, base_type::m_stoppingcondition, &prop, base_type::m_verbosity > 0);
926 base_type::m_solutionproperties.iterations += prop.
iterations;
927 base_type::m_solutionproperties.seconds += prop.
seconds;
928 base_type::m_solutionproperties.accuracy = std::max(base_type::solutionProperties().accuracy, prop.
accuracy);
935 template <
class InputType,
class CacheType =
float>
954 : base_type(kernel, C, unconstrained)
963 : base_type(kernel,negativeC, positiveC, unconstrained)
968 {
return "SquaredHingeCSvmTrainer"; }
975 RealVector diagonalModifier(dataset.
numberOfElements(),0.5/base_type::m_regularizers(0));
976 if(base_type::m_regularizers.size() != 1){
977 for(std::size_t i = 0; i != diagonalModifier.size();++i){
978 diagonalModifier(i) = 0.5/base_type::m_regularizers(dataset.
element(i).label);
982 KernelMatrixType km(*base_type::m_kernel, dataset.
inputs(),diagonalModifier);
985 PrecomputedMatrixType matrix(&km);
990 CachedMatrixType matrix(&km);
993 base_type::m_accessCount = km.getAccessCount();
1000 template<
class Matrix>
1003 SVMProblemType svmProblem(matrix,dataset.
labels(),1e100);
1004 if (this->m_trainOffset)
1007 ProblemType problem(svmProblem,base_type::m_shrinking);
1009 solver.
solve(base_type::stoppingCondition(), &base_type::solutionProperties());
1010 column(svm.
alpha(),0)= problem.getUnpermutedAlpha();
1013 std::size_t freeVars = 0;
1014 for (std::size_t i=0; i < problem.dimensions(); i++)
1016 if(problem.alpha(i) > problem.boxMin(i) && problem.alpha(i) < problem.boxMax(i)){
1017 sum += problem.gradient(i) - problem.alpha(i)*2*diagonalModifier(i);
1022 svm.
offset(0) = sum / freeVars;
1029 ProblemType problem(svmProblem,base_type::m_shrinking);
1031 solver.
solve(base_type::stoppingCondition(), &base_type::solutionProperties());
1032 column(svm.
alpha(),0) = problem.getUnpermutedAlpha();
1039 template <
class InputType>
1050 {
return "SquaredHingeLinearCSvmTrainer"; }
1056 RealMatrix
w(1, dim, 0.0);
1059 1.0 / base_type::C(),