Import.cpp
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Data Import
6  *
7  * This file is part of the tutorial "Importing Data".
8  * By itself, it does not do anything particularly useful.
9  *
10  * \author T. Glasmachers
11  * \date 2014, 2016
12  *
13  *
14  * \par Copyright 1995-2017 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://shark-ml.org/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 #include <shark/Data/Dataset.h>
38 #include <shark/Data/Csv.h>
39 #include <shark/Data/SparseData.h>
40 #include <shark/Data/Download.h>
41 #include <iostream>
42 using namespace shark;
43 
44 
45 class YourDistribution : public LabeledDataDistribution<RealVector, unsigned int>
46 {
47 public:
48  void draw(RealVector& input, unsigned int& label) const
49  {
50  input.resize(2);
52  input(0) = random::uni(random::globalRng, -1,1);
53  input(1) = random::uni(random::globalRng, -1,1) + label;
54  }
55 };
56 
57 
58 int main(int argc, char** argv)
59 {
60  std::cout <<
61  "\n"
62  "WARNING: This program loads several data sets from disk.\n"
63  " If the files are not found then it will terminate\n"
64  " with an exception.\n"
65  "\n";
66 
67  Data<RealVector> points;
68  ClassificationDataset dataset;
69 
70  YourDistribution distribution;
71  unsigned int numberOfSamples = 1000;
72  dataset = distribution.generateDataset(numberOfSamples);
73 
74  importCSV(points, "inputs.csv", ',', '#');
75  importCSV(dataset, "data.csv", LAST_COLUMN, ',', '#');
76 
77 {
78  Data<RealVector> inputs;
79  Data<RealVector> labels;
80  importCSV(inputs, "inputs.csv");
81  importCSV(labels, "labels.csv");
82  RegressionDataset dataset(inputs, labels);
83 }
84 
85 {
86  importSparseData(dataset, "data.libsvm");
87 
89  importSparseData(sparse_dataset, "data.libsvm");
90 }
91 
92 {
93  ClassificationDataset dataset;
94  // download dense data
95  downloadCsvData(dataset, "http://mldata.org/repository/data/download/csv/banana-ida/", FIRST_COLUMN);
96 
97  // download sparse data
98  downloadSparseData(dataset, "http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/svmguide1");
99 }
100 {
101  ClassificationDataset dataset;
102  // fetch data set by name from mldata.org
103  downloadFromMLData(dataset, "iris");
104 }
105 }