36 #ifndef SHARK_OBJECTIVEFUNCTIONS_LOSS_CROSS_ENTROPY_H 37 #define SHARK_OBJECTIVEFUNCTIONS_LOSS_CROSS_ENTROPY_H 74 double evalError(
double label,
double exponential,
double value)
const {
76 if(value*label < -200 ){
79 return - value * label;
81 return std::log(1+exponential);
93 {
return "CrossEntropy"; }
98 double eval(UIntVector
const& target, RealMatrix
const& prediction)
const {
100 for(std::size_t i = 0; i != prediction.size1(); ++i){
101 error +=
eval(target(i), row(prediction,i));
107 if ( prediction.size() == 1 )
110 double label = 2.0 * target - 1;
111 double exponential = std::exp( -label * prediction(0) );
112 return evalError(label,exponential,prediction(0 ));
119 double maximum = max(prediction);
120 double logNorm = sum(exp(prediction-maximum));
121 logNorm = std::log(logNorm) + maximum;
122 return logNorm - prediction(target);
126 double evalDerivative(UIntVector
const& target, RealMatrix
const& prediction, RealMatrix& gradient)
const {
127 gradient.resize(prediction.size1(),prediction.size2());
128 if ( prediction.size2() == 1 )
131 for(std::size_t i = 0; i != prediction.size1(); ++i){
133 double label = 2 *
static_cast<double>(target(i)) - 1;
134 double exponential = std::exp ( -label * prediction (i, 0 ) );
135 double sigmoid = 1.0/(1.0+exponential);
136 gradient ( i,0 ) = -label * (1.0 -
sigmoid);
137 error+=evalError(label,exponential,prediction (i, 0 ));
142 for(std::size_t i = 0; i != prediction.size1(); ++i){
144 auto gradRow=row(gradient,i);
151 double maximum = max(row(prediction,i));
152 noalias(gradRow) = exp(row(prediction,i) - maximum);
153 double norm = sum(gradRow);
155 gradient(i,target(i)) -= 1;
156 error+=std::log(norm) - prediction(i,target(i))+maximum;
162 gradient.resize(prediction.size());
163 if ( prediction.size() == 1 )
166 double label = 2.0 * target - 1;
167 double exponential = std::exp ( - label * prediction(0));
168 double sigmoid = 1.0/(1.0+exponential);
169 gradient(0) = -label * (1.0 -
sigmoid);
170 return evalError(label,exponential,prediction(0));
179 double maximum = max(prediction);
180 noalias(gradient) = exp(prediction - maximum);
181 double norm = sum(gradient);
183 gradient(target) -= 1;
184 return std::log(norm) - prediction(target) + maximum;
192 gradient.resize(prediction.size());
193 hessian.resize(prediction.size(),prediction.size());
194 if ( prediction.size() == 1 )
197 double label = 2 *
static_cast<double>(target) - 1;
198 double exponential = std::exp ( -label * prediction ( 0 ) );
199 double sigmoid = 1.0/(1.0+exponential);
200 gradient ( 0 ) = -label * (1.0-
sigmoid);
201 hessian ( 0,0 ) = sigmoid * ( 1-
sigmoid );
202 return evalError(label,exponential,prediction ( 0 ));
212 double maximum = max(prediction);
213 noalias(gradient) = exp(prediction-maximum);
214 double norm = sum(gradient);
217 noalias(hessian)=-outer_prod(gradient,gradient);
218 noalias(diag(hessian)) += gradient;
219 gradient(target) -= 1;
221 return std::log(norm) - prediction(target) - maximum;