37 #ifndef SHARK_ALGORITHMS_LinearSAGTrainer_H 38 #define SHARK_ALGORITHMS_LinearSAGTrainer_H 53 template<
class InputType,
class LabelType>
54 struct LinearSAGTrainerBase{
55 typedef AbstractWeightedTrainer< LinearModel<InputType>,LabelType > type;
56 typedef AbstractLoss<LabelType,RealVector> LossType;
58 template<
class InputType>
59 struct LinearSAGTrainerBase<
InputType, unsigned int>{
60 typedef AbstractWeightedTrainer< LinearClassifier<InputType>,
unsigned int > type;
61 typedef AbstractLoss<unsigned int,RealVector> LossType;
89 template <
class InputType,
class LabelType>
114 {
return "LinearSAGTrainer"; }
117 void train(ModelType& model, WeightedDatasetType
const& dataset){
125 {
return m_maxEpochs; }
130 { m_maxEpochs = value; }
147 { m_offset = offset;}
152 return RealVector(1,m_lambda);
159 m_lambda = newParameters(0);
171 random::rng_type& rng,
178 if(classes == 2) classes = 1;
181 model.setStructure(dim,classes, m_offset);
183 iterate(rng, model,dataset,loss);
186 template<
class LabelT>
188 random::rng_type& rng,
197 iterate(rng, model,dataset,loss);
203 random::rng_type& rng,
211 std::size_t ell = data.
size();
212 std::size_t labelDim = model.
outputShape().numElements();
213 std::size_t dim = model.
inputShape().numElements();
216 std::size_t iterations = m_maxEpochs * ell;
218 iterations = std::max(10 * ell, std::size_t(std::ceil(dim * ell)));
221 RealVector probabilities =
createBatch(dataset.weights().elements());
222 probabilities /= sum(probabilities);
226 RealMatrix gradD(labelDim,ell,0);
227 RealMatrix grad(labelDim,dim);
228 RealVector gradOffset(labelDim,0);
229 RealVector pointNorms(ell);
230 for(std::size_t i = 0; i != ell; ++i){
231 pointNorms(i) = norm_sqr(data[i].input);
234 RealVector f_b(labelDim, 0.0);
235 RealVector derivative(labelDim, 0.0);
239 for(std::size_t iter = 0; iter < iterations; iter++)
242 std::size_t b = dist(rng);
245 noalias(f_b) = prod(model.
matrix(), data[b].input);
246 if(m_offset) noalias(f_b) += model.
offset();
249 double currentValue = loss.
evalDerivative(data[b].label, f_b, derivative);
252 noalias(grad) += probabilities(b) * outer_prod(derivative-column(gradD,b), data[b].input);
253 if(m_offset) noalias(gradOffset) += probabilities(b) *(derivative-column(gradD,b));
254 noalias(column(gradD,b)) = derivative;
257 double eta = 1.0/(L+m_lambda);
258 noalias(model.
matrix()) *= 1 - eta * m_lambda;
259 for(std::size_t i = 0; i != labelDim; ++i){
260 for(std::size_t j = 0; j != dim; ++j){
261 model.
matrix()(i,j) -= eta*grad(i,j);
265 if(m_offset) noalias(model.
offset()) -= eta * gradOffset;
268 noalias(f_b) -= derivative/L*pointNorms(b);
269 double newValue = loss.
eval(data[b].label, f_b);
270 if(norm_sqr(derivative)*pointNorms(b) > 1.e-8 && newValue > currentValue - 1/(2*L)*norm_sqr(derivative)*pointNorms(b)){
273 L*= std::pow(2.0,-1.0/ell);
279 random::rng_type& rng,
287 std::size_t ell = data.
size();
288 std::size_t labelDim = model.outputSize();
289 std::size_t dim = model.inputSize();
292 std::size_t iterations = m_maxEpochs * ell;
294 iterations = std::max(10 * ell, std::size_t(std::ceil(dim * ell)));
297 RealVector probabilities =
createBatch(dataset.weights().elements());
298 probabilities /= sum(probabilities);
302 blas::matrix<double,blas::column_major> gradD(labelDim,ell,0);
303 RealMatrix grad(labelDim,dim);
304 RealVector gradOffset(labelDim,0);
305 RealVector pointNorms(ell);
306 for(std::size_t i = 0; i != ell; ++i){
307 pointNorms(i) = norm_sqr(data[i].input);
310 RealVector f_b(labelDim, 0.0);
311 RealVector derivative(labelDim, 0.0);
317 RealVector appliedRates(dim,0.0);
318 double stepsCumSum = 0.0;
322 for(std::size_t iter = 0; iter < iterations; iter++)
325 std::size_t b = dist(rng);
326 auto const& point = data[b];
329 auto end = point.input.end();
330 for(
auto pos = point.input.begin(); pos != end; ++pos){
331 std::size_t index = pos.index();
332 noalias(column(model.
matrix(),index)) -= (stepsCumSum - blas::repeat(appliedRates(index),labelDim))*column(grad,index);
333 appliedRates(index) = stepsCumSum;
337 noalias(f_b) = kappa * prod(model.
matrix(), point.input);
338 if(m_offset) noalias(f_b) += model.
offset();
341 double currentValue = loss.
evalDerivative(point.label, f_b, derivative);
345 for(std::size_t l = 0; l != derivative.size(); ++l){
346 double val = probabilities(b) * (derivative(l) - gradD(l,b));
347 noalias(row(grad,l)) += val * point.input;
350 if(m_offset) noalias(gradOffset) += probabilities(b) *(derivative-column(gradD,b));
351 noalias(column(gradD,b)) = derivative;
354 double eta = 1.0/(L+m_lambda);
355 stepsCumSum += kappa * eta;
356 if(m_offset) noalias(model.
offset()) -= eta * gradOffset;
357 kappa *= 1 - eta * m_lambda;
360 noalias(f_b) -= derivative/L*pointNorms(b);
361 double newValue = loss.
eval(point.label, f_b);
362 if(norm_sqr(derivative)*pointNorms(b) > 1.e-8 && newValue > currentValue - 1/(2*L)*norm_sqr(derivative)*pointNorms(b)){
365 L*= std::pow(2.0,-1.0/ell);
370 if((iter +1)% ell == 0){
371 noalias(model.
matrix()) -= (stepsCumSum - blas::repeat(appliedRates,labelDim))*grad;
375 appliedRates.clear();
380 LossType
const* mep_loss;
383 std::size_t m_maxEpochs;