Shark machine learning library
About Shark
News!
Contribute
Credits and copyright
Downloads
Getting Started
Installation
Using the docs
Documentation
Tutorials
Quick references
Class list
Global functions
FAQ
Showroom
examples
Data
Import.cpp
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Data Import
6
*
7
* This file is part of the tutorial "Importing Data".
8
* By itself, it does not do anything particularly useful.
9
*
10
* \author T. Glasmachers
11
* \date 2014, 2016
12
*
13
*
14
* \par Copyright 1995-2017 Shark Development Team
15
*
16
* <BR><HR>
17
* This file is part of Shark.
18
* <http://shark-ml.org/>
19
*
20
* Shark is free software: you can redistribute it and/or modify
21
* it under the terms of the GNU Lesser General Public License as published
22
* by the Free Software Foundation, either version 3 of the License, or
23
* (at your option) any later version.
24
*
25
* Shark is distributed in the hope that it will be useful,
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
* GNU Lesser General Public License for more details.
29
*
30
* You should have received a copy of the GNU Lesser General Public License
31
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
32
*
33
*/
34
//===========================================================================
35
36
#include <
shark/Data/Dataset.h
>
37
#include <
shark/Data/DataDistribution.h
>
38
#include <
shark/Data/Csv.h
>
39
#include <
shark/Data/SparseData.h
>
40
#include <
shark/Data/Download.h
>
41
#include <iostream>
42
using namespace
shark
;
43
44
45
class
YourDistribution :
public
LabeledDataDistribution
<RealVector, unsigned int>
46
{
47
public
:
48
void
draw(RealVector& input,
unsigned
int
& label)
const
49
{
50
input.resize(2);
51
label =
random::coinToss
(
random::globalRng
);
52
input(0) =
random::uni
(
random::globalRng
, -1,1);
53
input(1) =
random::uni
(
random::globalRng
, -1,1) + label;
54
}
55
};
56
57
58
int
main
(
int
argc,
char
** argv)
59
{
60
std::cout <<
61
"\n"
62
"WARNING: This program loads several data sets from disk.\n"
63
" If the files are not found then it will terminate\n"
64
" with an exception.\n"
65
"\n"
;
66
67
Data<RealVector>
points;
68
ClassificationDataset
dataset;
69
70
YourDistribution distribution;
71
unsigned
int
numberOfSamples = 1000;
72
dataset = distribution.generateDataset(numberOfSamples);
73
74
importCSV
(points,
"inputs.csv"
,
','
,
'#'
);
75
importCSV
(dataset,
"data.csv"
,
LAST_COLUMN
,
','
,
'#'
);
76
77
{
78
Data<RealVector>
inputs;
79
Data<RealVector>
labels;
80
importCSV
(inputs,
"inputs.csv"
);
81
importCSV
(labels,
"labels.csv"
);
82
RegressionDataset
dataset(inputs, labels);
83
}
84
85
{
86
importSparseData
(dataset,
"data.libsvm"
);
87
88
LabeledData<CompressedRealVector, unsigned int>
sparse_dataset;
89
importSparseData
(sparse_dataset,
"data.libsvm"
);
90
}
91
92
{
93
ClassificationDataset
dataset;
94
// download dense data
95
downloadCsvData
(dataset,
"http://mldata.org/repository/data/download/csv/banana-ida/"
,
FIRST_COLUMN
);
96
97
// download sparse data
98
downloadSparseData
(dataset,
"http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/svmguide1"
);
99
}
100
{
101
ClassificationDataset
dataset;
102
// fetch data set by name from mldata.org
103
downloadFromMLData
(dataset,
"iris"
);
104
}
105
}