Datasets.cpp
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Data Normalization
6  *
7  * This file is part of the tutorial "Data Containers".
8  * By itself, it does not do anything particularly useful.
9  *
10  * \author T. Glasmachers
11  * \date 2014
12  *
13  *
14  * \par Copyright 1995-2017 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://shark-ml.org/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 #include <shark/Data/Dataset.h>
37 
38 #include <shark/Data/DataView.h>
39 
41 
42 using namespace shark;
43 
44 
45 class F
46 {
47 public:
48  typedef RealVector result_type;
49  RealVector operator () (RealVector x) const
50  { return (2.0 * x); }
51 };
52 
53 class G
54 {
55 public:
56  typedef unsigned int result_type;
57  unsigned int operator () (unsigned int y) const
58  { return y + 1; }
59 };
60 
61  class Add
62  {
63  public:
64  Add(RealVector offset) : m_offset(offset) {}
65 
66  typedef RealVector result_type; // do not forget to specify the result type
67 
68  RealVector operator () (RealVector input) const { // const is important
69  return (input + m_offset);
70  }
71 
72  private:
73  RealVector m_offset;
74  };
75 
76 
77 int main()
78 {
79 
80 {
81  std::vector<RealVector> points;
83 }
84 {
85  std::vector<RealVector> inputs;
86  std::vector<unsigned int> labels;
88 }
89 {
90  Data<RealVector> data(1000, RealVector(5));
91 }
92 {
93  Data<RealVector> data(1000, RealVector(5), 100);
94 }
95 {
96  Data<RealVector> data;
97  Data<RealVector> data2(data);
98  data = data2;
99  data.makeIndependent();
100 }
101 {
102  Data<RealVector> data;
103  typedef Data<RealVector>::batch_range Batches;
104  Batches batches = data.batches();
105 
106  std::cout << batches.size() << std::endl;
107  for (auto pos = batches.begin(); pos != batches.end(); ++pos) {
108  std::cout << *pos << std::endl;
109  }
110 }
111 {
112  Data<RealVector> data;
114  for(auto const& batch: data.batches()) {
115  std::cout << batch << std::endl;
116  }
117  for (std::size_t i = 0; i != data.numberOfBatches(); ++i) {
118  std::cout << data.batch(i) << std::endl;
119  }
120  for(auto const& batch: data.batches()) {
121  for(std::size_t i=0; i != batchSize(batch); ++i) {
122  std::cout << getBatchElement(batch,i ); // prints element i of the batch
123  }
124  }
125  typedef Data<RealVector>::element_range Elements;
126 
127  // 1: explicit iterator loop using the range over the elements
128  Elements elements = data.elements();
129  for (auto pos = elements.begin(); pos != elements.end(); ++pos) {
130  std::cout << *pos << std::endl;
131  }
132 
133  // 2: foreach
134  //note pass by value, the range returns proxy elements instead of references
135  for(auto element: data.elements()) {
136  std::cout << element << std::endl;
137  }
138 }
139 {
140  Data<unsigned int> data;
141  std::size_t classes = numberOfClasses(data); // maximal class label minus one
142  std::vector<std::size_t> sizes = classSizes(data); // number of occurrences of every class label
143 
144  Data<RealVector> dataVec;
145  std::size_t dim = dataDimension(dataVec); // dimensionality of the data points
146 }
147 {
149  std::size_t classes = numberOfClasses(data); // maximal class label minus one
150  std::vector<std::size_t> sizes = classSizes(data); // number of occurrences of every class label
151  std::size_t dim = inputDimension(data); // dimensionality of the data points
152 }
153 {
154  F f;
155  G g;
156  Data<RealVector> data; // initial data set
157  data = transform(data, f); // applies f to each element
158 
159  LabeledData<RealVector, unsigned int> labeledData; // initial labeled dataset
160  labeledData = transformInputs(labeledData, f); // applies f to each input
161  labeledData = transformLabels(labeledData, g); // applies g to each label
162 
163  // a linear model, for example for whitening
164  LinearModel<> model;
165  // application of the model to the data
166  labeledData = transformInputs(labeledData, model);
167  // or an alternate shortcut:
168  data = model(data);
169 }
170 {
171  Data<RealVector> data;
172  RealVector v(3); v(0) = 1.0; v(1) = 3.0; v(2) = -0.5;
173  data = transform(data, Add(v));
174 }
175 {
176  Data<unsigned int> dataset;
177  DataView<Data<unsigned int> > view(dataset);
178  for (std::size_t i=0; i != view.size(); ++i) {
179  std::cout << view[i] << std::endl;
180  }
181  std::vector<std::size_t> indices;
182  // somehow choose a set of indices
183  Data<unsigned int> subsetData = toDataset(subset(view, indices));
184 }
185 {
186  Data<unsigned int> dataset;
187  DataView<Data<unsigned int> > view(dataset);
188  std::vector<std::size_t> indices;
189  std::size_t maximumBatchSize = 100;
190  Data<unsigned int> subsetData = toDataset(subset(view, indices), maximumBatchSize);
191 }
192 {
195  std::cout << numberOfClasses(view) << " " << inputDimension(view) << std::endl;
196 }
197 
198 }