SAE代码.docx - 冰豆网

资源描述

SAE代码.docx

《SAE代码.docx》由会员分享，可在线阅读，更多相关《SAE代码.docx（13页珍藏版）》请在冰豆网上搜索。

SAE代码.docx

SAE代码

Arial, Helvetica, sans-serif;">function test_example_SAE

1.load mnist_uint8;

3.train_x = double（train_x）/255;

4.test_x = double（test_x）/255;

5.train_y = double（train_y）;

6.test_y = double（test_y）; //将数据一开始初始化

8.%% ex1 train a 100 hidden unit SDAE and use it to initialize a FFNN

9.% Setup and train a stacked denoising autoencoder （SDAE）

10.rand（'state',0）

11.sae = saesetup（[784 100]）;

这里跳入saesetup函数，由函数可知返回的是sae的结构体

1.function sae = saesetup（size）

2. for u = 2 :

numel（size） //numel（size）=2

3. sae.ae{u-1} = nnsetup（[size（u-1） size（u） size（u-1）]）; %size

（1）=784 size

（2）=100 size（3）=784

4. end

5.end

这里调用了nnsetup函数，由该函数可知返回的也是nn结构体，可以看出训练后是把nn替代成sae.

1.function nn = nnsetup（architecture）

2.%NNSETUP creates a Feedforward Backpropagate Neural Network

3.% nn = nnsetup（architecture） returns an neural network structure with n=numel（architecture）

4.% layers, architecture being a n x 1 vector of layer sizes e.g. [784 100 10]

6. nn.size = architecture; //architecture表示每一层由多少个神经元，总共有多少层（3）

7. nn.n = numel（nn.size）;//网络层数3

9. nn.activation_function = 'tanh_opt'; % Activation functions of hidden layers:

'sigm' （sigmoid） or 'tanh_opt' （optimal tanh）.

10. nn.learningRate = 2; % learning rate Note:

typically needs to be lower when using 'sigm' activation function and non-normalized inputs.

11. nn.momentum = 0.5; % Momentum

12. nn.scaling_learningRate = 1; % Scaling factor for the learning rate （each epoch）

13. nn.weightPenaltyL2 = 0; % L2 regularization

14. nn.nonSparsityPenalty = 0; % Non sparsity penalty

15. nn.sparsityTarget = 0.05; % Sparsity target

16. nn.inputZeroMaskedFraction = 0; % Used for Denoising AutoEncoders

17. nn.dropoutFraction = 0; % Dropout level （http:

//www.cs.toronto.edu/~hinton/absps/dropout.pdf）

18. nn.testing = 0; % Internal variable. nntest sets this to one.

19. nn.output = 'sigm'; % output unit 'sigm' （=logistic）, 'softmax' and 'linear'

20. //对每一层的网络结构进行初始化，一共三个参数W,vW,p,其中W是主要的参数

21. //vW是更新参数时的临时参数，p是所谓的sparsity,

22. for i = 2 :

nn.n %生成两层权值和p{i}

23. % weights and weight momentum

24. nn.W{i - 1} = （rand（nn.size（i）, nn.size（i - 1）+1） - 0.5） * 2 * 4 * sqrt（6 / （nn.size（i） + nn.size（i - 1）））;

Arial, Helvetica, sans-serif;">//

Arial, Helvetica, sans-serif;">随机取从-0.5到 2 * 4 * sqrt（6 / （nn.size（i） + nn.size（i - 1）））的权值序列

25. nn.vW{i - 1} = zeros（size（nn.W{i - 1}））;

Arial, Helvetica, sans-serif;">//

Arial, Helvetica, sans-serif;">使vW与W空间相同，但为0矩阵

26.

27. % average activations （for use with sparsity）

28. nn.p{i} = zeros（1, nn.size（i））; //生成两个空矩阵，p{i}用来表示隐藏神经元j的平均活跃度（详情可见UFLDL教程）

29. end

30.end

程序跳回这一段

1.sae.ae{1}.activation_function = 'sigm';

2.sae.ae{1}.learningRate = 1;

3.sae.ae{1}.inputZeroMaskedFraction = 0.5;

Arial, Helvetica, sans-serif;">//

Arial, Helvetica, sans-serif;">修改sae里面的各个参数

4.opts.numepochs = 1;

5.opts.batchsize = 100;

6.sae = saetrain（sae, train_x, opts）;

这里将nn里的各个参数在sae里部分更改，然后又跳到saetrain函数

1.function sae = saetrain（sae, x, opts）

2. for i = 1 :

numel（sae.ae）;

3. disp（['Training AE ' num2str（i） '/' num2str（numel（sae.ae））]）;//训练到第几代

4. sae.ae{i} = nntrain（sae.ae{i}, x, x, opts）;

5. t = nnff（sae.ae{i}, x, x）;

6. x = t.a{2};

7. %remove bias term

8. x = x（:

end）; //把第一列去掉

9. end

10.end

这里转到nntrain函数，跳过前面的assert判定

1.loss.train.e = [];

2.loss.train.e_frac = [];

3.loss.val.e = [];

4.loss.val.e_frac = [];

5.opts.validation = 0;

6.if nargin == 6

7. opts.validation = 1;

8.end

10.fhandle = [];

11.if isfield（opts,'plot'） && opts.plot == 1 //检查结构体opts是否包含由‘plot’指定的域，如果包含则返回逻辑1

12. fhandle = figure（）;

13.end

14.

15.m = size（train_x, 1）;

16.//m是训练样本的数量

17.//注意在调用的时候我们设置了opt,batchsize是做batch gradient时候的大小

18.batchsize = opts.batchsize;

19.numepochs = opts.numepochs;//表示循环的次数

20.

21.numbatches = m / batchsize;

22.

23.assert（rem（numbatches, 1） == 0, 'numbatches must be a integer'）;

1.L = zeros（numepochs*numbatches,1）;

2.n = 1;

1.for i = 1 :

numepochs

2. tic;

4. kk = randperm（m）; //把1到m这些数随机打乱得到的一个数字序列。

5. for l = 1 :

numbatches

6. batch_x = train_x（kk（（l - 1） * batchsize + 1 :

l * batchsize）, :

）; //一批一批进行训练，每一批数目为batchsize,即600

8. //Add noise to input （for use in denoising autoencoder）加入noise，这是denoising autoencoder需要使用到的部分

9. if（nn.inputZeroMaskedFraction ~= 0） //请参见《Extracting and Composing Robust Features with Denoising Autoencoders》这篇论文

10. batch_x = batch_x.*（rand（size（batch_x））>nn.inputZeroMaskedFraction）;//具体加入的方法就是把训练样例中的一些数据调整变为0，inputZeroMaskedFraction表示了调整的比例

11. end

12.

13. batch_y = train_y（kk（（l - 1） * batchsize + 1 :

l * batchsize）, :

）; //同理对y也进行一批一批的调用，与前面的batch_x对应

14.

15. nn = nnff（nn, batch_x, batch_y）;

16. nn = nnbp（nn）;

17. nn = nnapplygrads（nn）;

18.

19. L（n） = nn.L; //nn最后结果

20.

21. n = n + 1;

22. end

23.

24. t = toc; //这里计算出整个运算过程用了多少second

25.

26.

27. if opts.validation == 1

28. loss = nneval（nn, loss, train_x, train_y, val_x, val_y）;

29. str_perf = sprintf（'; Full-batch train mse = %f, val mse = %f', loss.train.e（end）, loss.val.e（end））;

30. else

31. loss = nneval（nn, loss, train_x, train_y）;

32. str_perf = sprintf（'; Full-batch train err = %f', loss.train.e（end））;

33. end

34. if ishandle（fhandle）

35. nnupdatefigures（nn, fhandle, loss, opts, i）;

36. end

37.

38. disp（['epoch ' num2str（i） '/' num2str（opts.numepochs） '. Took ' num2str（t） ' seconds' '. Mini-batch mean squared error on training set is ' num2str（mean（L（（n-numbatches）:

（n-1）））） str_perf]）;

39. nn.learningRate = nn.learningRate * nn.scaling_learningRate; //加速学习速率

40.end

41.end

函数转为nnff,意为前向传播算法

1.function nn = nnff（nn, x, y）

2.%NNFF performs a feedforward pass

3.% nn = nnff（nn, x, y） returns an neural network structure with updated

4.% layer activations, error and loss （nn.a, nn.e and nn.L）

6. n = nn.n;

7. m = size（x, 1）;

9. x = [ones（m,1） x];

10. nn.a{1} = x;

11.

12. //feedforward pass

13. for i = 2 :

n-1

14. //根据选择的激活函数不同进行正向传播计算

15. //可以回过头看nnsetup里面的第一个参数activation_function

16. //sigm就是sigmoid

17. switch nn.activation_function

18. case 'sigm'

19. % Calculate the unit's outputs （including the bias term）

20. nn.a{i} = sigm（nn.a{i - 1} * nn.W{i - 1}'）;

21. case 'tanh_opt'

22. nn.a{i} = tanh_opt（nn.a{i - 1} * nn.W{i - 1}'）;

23. end

24.

25. //dropout计算部分 dropoutFraction是nnsetup中可以设置的一个参数

26. if（nn.dropoutFraction > 0） //>0则执行，去除偏差较大的部分

27. if（nn.testing）

28. nn.a{i} = nn.a{i}.*（1 - nn.dropoutFraction）;

29. else

30. nn.dropOutMask{i} = （rand（size（nn.a{i}））>nn.dropoutFraction）;

31. nn.a{i} = nn.a{i}.*nn.dropOutMask{i};

32. end

33. end

34. //计算sparsity,nonSparsityPenalty是对没达到sparsitytarget的参数的惩罚系数

35. //calculate running exponential activations for use with sparsity

36. if（nn.nonSparsityPenalty>0） //>0则执行

37. nn.p{i} = 0.99 * nn.p{i} + 0.01 * mean（nn.a{i}, 1）;

38. end

39.

40. //Add the bias term

41. nn.a{i} = [ones（m,1） nn.a{i}];

42. end

43. switch nn.output //输出层的结果

44. case 'sigm'

45. nn.a{n} = sigm（nn.a{n - 1} * nn.W{n - 1}'）;

46. case 'linear'

47. nn.a{n} = nn.a{n - 1} * nn.W{n - 1}';

48. case 'softmax'

49. nn.a{n} = nn.a{n - 1} * nn.W{n - 1}';

50. nn.a{n} = exp（bsxfun（@minus, nn.a{n}, max（nn.a{n},[],2）））;

51. nn.a{n} = bsxfun（@rdivide, nn.a{n}, sum（nn.a{n}, 2））;

52. end

53.

54. //error and loss

55. //计算error （计算输出层的e）

56. nn.e = y - nn.a{n}; %y-H w,b（x）

57.

58. switch nn.output

59. case {'sigm', 'linear'}

60. nn.L = 1/2 * sum（sum（nn.e .^ 2）） / m;//见公式P9（UFLDL）

61. case 'softmax'

62. nn.L = -sum（sum（y .* log（nn.a{n}））） / m;

63. end

64.end

接下来跳转到nnbp函数

1.function nn = nnbp（nn）

2.//NNBP performs backpropagation

3.// nn = nnbp（nn） returns an neural network structure with updated weights

5. n = nn.n;

6. sparsityError = 0;

7. switch nn.output

8. case 'sigm'

9. d{n} = - nn.e .* （nn.a{n} .* （1 - nn.a{n}））; //见UFLDL反向传导算法公式2

10. case {'softmax','linear'}

11. d{n} = - nn.e;

12.

展开阅读全文