EvalSkipMissingFeatures.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Do special kernel evaluation by skipping missing features
6  *
7  *
8  *
9  * \author B. Li
10  * \date 2012
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 #ifndef SHARK_MODELS_KERNELS_EVAL_SKIP_MISSING_FEATURES_H
35 #define SHARK_MODELS_KERNELS_EVAL_SKIP_MISSING_FEATURES_H
36 
37 #include "shark/Core/Exception.h"
38 #include "shark/LinAlg/Base.h"
43 
44 #include <boost/optional.hpp>
45 #include <boost/math/special_functions/fpclassify.hpp>
46 #include <vector>
47 
48 namespace shark {
49 
50 /// Does a kernel function evaluation with Missing features in the inputs
51 /// @param kernelFunction The kernel function used to do evaluation
52 /// @param inputA a input
53 /// @param inputB another input
54 ///
55 /// The kernel k(x,y) is evaluated taking missing features into account. For this it is checked whether a feature
56 /// of x or y is nan and in this case the corresponding features in @a inputA and @a inputB won't be considered.
57 template <typename InputType,typename InputTypeT1,typename InputTypeT2>
59  const AbstractKernelFunction<InputType>& kernelFunction,
60  const InputTypeT1& inputA,
61  const InputTypeT2& inputB)
62 {
63  SIZE_CHECK(inputA.size() == inputB.size());
64  // Do kernel type check
65  SHARK_RUNTIME_CHECK(kernelFunction.supportsVariableInputSize(), "Kernel must support variable input size.");
66 
67  // Work out features that are valid for both dataset i and j, and also should not be filtered out by missingness
68  // Because we won't exact length of valid features beforehand, so we choose to construct two vectors and then
69  // construct another two InputTypes with them.
70  typedef typename InputType::value_type InputValueType;
71  std::vector<InputValueType> tempInputA;
72  std::vector<InputValueType> tempInputB;
73  tempInputA.reserve(inputA.size());
74  tempInputB.reserve(inputB.size());
75  for (std::size_t index = 0; index < inputA.size(); ++index)
76  {
77  //using namespace boost::math;
78  if (!boost::math::isnan(inputA(index)) && !boost::math::isnan(inputB(index)))
79  {
80  tempInputA.push_back(inputA(index));
81  tempInputB.push_back(inputB(index));
82  }
83  }
84 
85  SIZE_CHECK(tempInputA.size() == tempInputB.size());
86  SIZE_CHECK(tempInputA.size() > 0);
87  InputType validInputA(tempInputA.size());
88  InputType validInputB(tempInputA.size());
89  std::copy(tempInputA.begin(),tempInputA.end(),validInputA.begin());
90  std::copy(tempInputB.begin(),tempInputB.end(),validInputB.begin());
91 
92  // And then pass them to the kernel for calculation
93  return kernelFunction.eval(validInputA, validInputB);
94 }
95 
96 /// Do kernel function evaluation while Missing features in the inputs
97 /// @param kernelFunction The kernel function used to do evaluation
98 /// @param inputA a input
99 /// @param inputB another input
100 /// @param missingness
101 /// used to decide which features in the inputs to take into consideration for the purpose of evaluation.
102 /// If a feature is NaN, then the corresponding features in @a inputA and @a inputB won't be considered.
103 template <typename InputType,typename InputTypeT1,typename InputTypeT2,typename InputTypeT3>
105  const AbstractKernelFunction<InputType>& kernelFunction,
106  const InputTypeT1& inputA,
107  const InputTypeT2& inputB,
108  InputTypeT3 const& missingness)
109 {
110  SIZE_CHECK(inputA.size() == inputB.size());
111  //SIZE_CHECK(inputA.size() == missingness.size());
112  // Do kernel type check
113  SHARK_RUNTIME_CHECK(kernelFunction.supportsVariableInputSize(), "Kernel must support variable input size.");
114 
115 
116 
117  // Work out features that are valid for both dataset i and j, and also should not be filtered out by missingness
118  // Because we won't exact length of valid features beforehand, so we choose to construct two vectors and then
119  // construct another two InputTypes with them.
120  typedef typename InputType::value_type InputValueType;
121  std::vector<InputValueType> tempInputA;
122  std::vector<InputValueType> tempInputB;
123  tempInputA.resize(inputA.size());
124  tempInputB.resize(inputB.size());
125  for (std::size_t index = 0; index < inputA.size(); ++index)
126  {
127  if (!boost::math::isnan(inputA(index)) && !boost::math::isnan(inputB(index)) && !boost::math::isnan(missingness(index)))
128  {
129  tempInputA.push_back(inputA(index));
130  tempInputB.push_back(inputB(index));
131  }
132  }
133 
134  SIZE_CHECK(tempInputA.size() == tempInputB.size());
135  SIZE_CHECK(tempInputA.size() > 0);
136  InputType validInputA(tempInputA.size());
137  InputType validInputB(tempInputA.size());
138  for (std::size_t i = 0; i < tempInputA.size(); ++i)
139  {
140  validInputA(i) = tempInputA[i];
141  validInputB(i) = tempInputB[i];
142  }
143 
144  // And then pass them to the kernel for calculation
145  return kernelFunction.eval(validInputA, validInputB);
146 }
147 
148 } // namespace shark {
149 
150 #endif // SHARK_MODELS_KERNELS_EVAL_SKIP_MISSING_FEATURES_H