298 lines
8.5 KiB
Markdown
298 lines
8.5 KiB
Markdown
|
|
DeepLearnToolbox
|
|
================
|
|
|
|
A Matlab toolbox for Deep Learning.
|
|
|
|
Deep Learning is a new subfield of machine learning that focuses on learning deep hierarchical models of data.
|
|
It is inspired by the human brain's apparent deep (layered, hierarchical) architecture.
|
|
A good overview of the theory of Deep Learning theory is
|
|
[Learning Deep Architectures for AI](http://www.iro.umontreal.ca/~bengioy/papers/ftml_book.pdf)
|
|
|
|
For a more informal introduction, see the following videos by Geoffrey Hinton and Andrew Ng.
|
|
|
|
* [The Next Generation of Neural Networks](http://www.youtube.com/watch?v=AyzOUbkUf3M) (Hinton, 2007)
|
|
* [Recent Developments in Deep Learning](http://www.youtube.com/watch?v=VdIURAu1-aU) (Hinton, 2010)
|
|
* [Unsupervised Feature Learning and Deep Learning](http://www.youtube.com/watch?v=ZmNOAtZIgIk) (Ng, 2011)
|
|
|
|
If you use this toolbox in your research please cite [Prediction as a candidate for learning deep hierarchical models of data](http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=6284)
|
|
|
|
```
|
|
@MASTERSTHESIS\{IMM2012-06284,
|
|
author = "R. B. Palm",
|
|
title = "Prediction as a candidate for learning deep hierarchical models of data",
|
|
year = "2012",
|
|
}
|
|
```
|
|
|
|
Contact: rasmusbergpalm at gmail dot com
|
|
|
|
Directories included in the toolbox
|
|
-----------------------------------
|
|
|
|
`NN/` - A library for Feedforward Backpropagation Neural Networks
|
|
|
|
`CNN/` - A library for Convolutional Neural Networks
|
|
|
|
`DBN/` - A library for Deep Belief Networks
|
|
|
|
`SAE/` - A library for Stacked Auto-Encoders
|
|
|
|
`CAE/` - A library for Convolutional Auto-Encoders
|
|
|
|
`util/` - Utility functions used by the libraries
|
|
|
|
`data/` - Data used by the examples
|
|
|
|
`tests/` - unit tests to verify toolbox is working
|
|
|
|
For references on each library check REFS.md
|
|
|
|
Setup
|
|
-----
|
|
|
|
1. Download.
|
|
2. addpath(genpath('DeepLearnToolbox'));
|
|
|
|
Known errors
|
|
------------------------------
|
|
|
|
`test_cnn_gradients_are_numerically_correct` fails on Octave because of a bug in Octave's convn implementation. See http://savannah.gnu.org/bugs/?39314
|
|
|
|
`test_example_CNN` fails in Octave for the same reason.
|
|
Example: Deep Belief Network
|
|
---------------------
|
|
```matlab
|
|
|
|
function test_example_DBN
|
|
load mnist_uint8;
|
|
|
|
train_x = double(train_x) / 255;
|
|
test_x = double(test_x) / 255;
|
|
train_y = double(train_y);
|
|
test_y = double(test_y);
|
|
|
|
%% ex1 train a 100 hidden unit RBM and visualize its weights
|
|
rand('state',0)
|
|
dbn.sizes = [100];
|
|
opts.numepochs = 1;
|
|
opts.batchsize = 100;
|
|
opts.momentum = 0;
|
|
opts.alpha = 1;
|
|
dbn = dbnsetup(dbn, train_x, opts);
|
|
dbn = dbntrain(dbn, train_x, opts);
|
|
figure; visualize(dbn.rbm{1}.W'); % Visualize the RBM weights
|
|
|
|
%% ex2 train a 100-100 hidden unit DBN and use its weights to initialize a NN
|
|
rand('state',0)
|
|
%train dbn
|
|
dbn.sizes = [100 100];
|
|
opts.numepochs = 1;
|
|
opts.batchsize = 100;
|
|
opts.momentum = 0;
|
|
opts.alpha = 1;
|
|
dbn = dbnsetup(dbn, train_x, opts);
|
|
dbn = dbntrain(dbn, train_x, opts);
|
|
|
|
%unfold dbn to nn
|
|
nn = dbnunfoldtonn(dbn, 10);
|
|
nn.activation_function = 'sigm';
|
|
|
|
%train nn
|
|
opts.numepochs = 1;
|
|
opts.batchsize = 100;
|
|
nn = nntrain(nn, train_x, train_y, opts);
|
|
[er, bad] = nntest(nn, test_x, test_y);
|
|
|
|
assert(er < 0.10, 'Too big error');
|
|
|
|
```
|
|
|
|
|
|
Example: Stacked Auto-Encoders
|
|
---------------------
|
|
```matlab
|
|
|
|
function test_example_SAE
|
|
load mnist_uint8;
|
|
|
|
train_x = double(train_x)/255;
|
|
test_x = double(test_x)/255;
|
|
train_y = double(train_y);
|
|
test_y = double(test_y);
|
|
|
|
%% ex1 train a 100 hidden unit SDAE and use it to initialize a FFNN
|
|
% Setup and train a stacked denoising autoencoder (SDAE)
|
|
rand('state',0)
|
|
sae = saesetup([784 100]);
|
|
sae.ae{1}.activation_function = 'sigm';
|
|
sae.ae{1}.learningRate = 1;
|
|
sae.ae{1}.inputZeroMaskedFraction = 0.5;
|
|
opts.numepochs = 1;
|
|
opts.batchsize = 100;
|
|
sae = saetrain(sae, train_x, opts);
|
|
visualize(sae.ae{1}.W{1}(:,2:end)')
|
|
|
|
% Use the SDAE to initialize a FFNN
|
|
nn = nnsetup([784 100 10]);
|
|
nn.activation_function = 'sigm';
|
|
nn.learningRate = 1;
|
|
nn.W{1} = sae.ae{1}.W{1};
|
|
|
|
% Train the FFNN
|
|
opts.numepochs = 1;
|
|
opts.batchsize = 100;
|
|
nn = nntrain(nn, train_x, train_y, opts);
|
|
[er, bad] = nntest(nn, test_x, test_y);
|
|
assert(er < 0.16, 'Too big error');
|
|
|
|
```
|
|
|
|
|
|
Example: Convolutional Neural Nets
|
|
---------------------
|
|
```matlab
|
|
|
|
function test_example_CNN
|
|
load mnist_uint8;
|
|
|
|
train_x = double(reshape(train_x',28,28,60000))/255;
|
|
test_x = double(reshape(test_x',28,28,10000))/255;
|
|
train_y = double(train_y');
|
|
test_y = double(test_y');
|
|
|
|
%% ex1 Train a 6c-2s-12c-2s Convolutional neural network
|
|
%will run 1 epoch in about 200 second and get around 11% error.
|
|
%With 100 epochs you'll get around 1.2% error
|
|
rand('state',0)
|
|
cnn.layers = {
|
|
struct('type', 'i') %input layer
|
|
struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) %convolution layer
|
|
struct('type', 's', 'scale', 2) %sub sampling layer
|
|
struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) %convolution layer
|
|
struct('type', 's', 'scale', 2) %subsampling layer
|
|
};
|
|
cnn = cnnsetup(cnn, train_x, train_y);
|
|
|
|
opts.alpha = 1;
|
|
opts.batchsize = 50;
|
|
opts.numepochs = 1;
|
|
|
|
cnn = cnntrain(cnn, train_x, train_y, opts);
|
|
|
|
[er, bad] = cnntest(cnn, test_x, test_y);
|
|
|
|
%plot mean squared error
|
|
figure; plot(cnn.rL);
|
|
|
|
assert(er<0.12, 'Too big error');
|
|
|
|
```
|
|
|
|
|
|
Example: Neural Networks
|
|
---------------------
|
|
```matlab
|
|
|
|
function test_example_NN
|
|
load mnist_uint8;
|
|
|
|
train_x = double(train_x) / 255;
|
|
test_x = double(test_x) / 255;
|
|
train_y = double(train_y);
|
|
test_y = double(test_y);
|
|
|
|
% normalize
|
|
[train_x, mu, sigma] = zscore(train_x);
|
|
test_x = normalize(test_x, mu, sigma);
|
|
|
|
%% ex1 vanilla neural net
|
|
rand('state',0)
|
|
nn = nnsetup([784 100 10]);
|
|
opts.numepochs = 1; % Number of full sweeps through data
|
|
opts.batchsize = 100; % Take a mean gradient step over this many samples
|
|
[nn, L] = nntrain(nn, train_x, train_y, opts);
|
|
|
|
[er, bad] = nntest(nn, test_x, test_y);
|
|
|
|
assert(er < 0.08, 'Too big error');
|
|
|
|
%% ex2 neural net with L2 weight decay
|
|
rand('state',0)
|
|
nn = nnsetup([784 100 10]);
|
|
|
|
nn.weightPenaltyL2 = 1e-4; % L2 weight decay
|
|
opts.numepochs = 1; % Number of full sweeps through data
|
|
opts.batchsize = 100; % Take a mean gradient step over this many samples
|
|
|
|
nn = nntrain(nn, train_x, train_y, opts);
|
|
|
|
[er, bad] = nntest(nn, test_x, test_y);
|
|
assert(er < 0.1, 'Too big error');
|
|
|
|
|
|
%% ex3 neural net with dropout
|
|
rand('state',0)
|
|
nn = nnsetup([784 100 10]);
|
|
|
|
nn.dropoutFraction = 0.5; % Dropout fraction
|
|
opts.numepochs = 1; % Number of full sweeps through data
|
|
opts.batchsize = 100; % Take a mean gradient step over this many samples
|
|
|
|
nn = nntrain(nn, train_x, train_y, opts);
|
|
|
|
[er, bad] = nntest(nn, test_x, test_y);
|
|
assert(er < 0.1, 'Too big error');
|
|
|
|
%% ex4 neural net with sigmoid activation function
|
|
rand('state',0)
|
|
nn = nnsetup([784 100 10]);
|
|
|
|
nn.activation_function = 'sigm'; % Sigmoid activation function
|
|
nn.learningRate = 1; % Sigm require a lower learning rate
|
|
opts.numepochs = 1; % Number of full sweeps through data
|
|
opts.batchsize = 100; % Take a mean gradient step over this many samples
|
|
|
|
nn = nntrain(nn, train_x, train_y, opts);
|
|
|
|
[er, bad] = nntest(nn, test_x, test_y);
|
|
assert(er < 0.1, 'Too big error');
|
|
|
|
%% ex5 plotting functionality
|
|
rand('state',0)
|
|
nn = nnsetup([784 20 10]);
|
|
opts.numepochs = 5; % Number of full sweeps through data
|
|
nn.output = 'softmax'; % use softmax output
|
|
opts.batchsize = 1000; % Take a mean gradient step over this many samples
|
|
opts.plot = 1; % enable plotting
|
|
|
|
nn = nntrain(nn, train_x, train_y, opts);
|
|
|
|
[er, bad] = nntest(nn, test_x, test_y);
|
|
assert(er < 0.1, 'Too big error');
|
|
|
|
%% ex6 neural net with sigmoid activation and plotting of validation and training error
|
|
% split training data into training and validation data
|
|
vx = train_x(1:10000,:);
|
|
tx = train_x(10001:end,:);
|
|
vy = train_y(1:10000,:);
|
|
ty = train_y(10001:end,:);
|
|
|
|
rand('state',0)
|
|
nn = nnsetup([784 20 10]);
|
|
nn.output = 'softmax'; % use softmax output
|
|
opts.numepochs = 5; % Number of full sweeps through data
|
|
opts.batchsize = 1000; % Take a mean gradient step over this many samples
|
|
opts.plot = 1; % enable plotting
|
|
nn = nntrain(nn, tx, ty, opts, vx, vy); % nntrain takes validation set as last two arguments (optionally)
|
|
|
|
[er, bad] = nntest(nn, test_x, test_y);
|
|
assert(er < 0.1, 'Too big error');
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/rasmusbergpalm/deeplearntoolbox/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
|
|
|