%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% 5-layer Neural Network by Simple Backpropgation
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% A 5-layer neural network is trained by simple backpropation.
clear;
clf;
close all hidden;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
RIDGE=0; %%%%% Ridge %%%%%%%%%%%
LASSO=1; %%%%% Lasso %%%%%%%%%%%
HYPERPARAMETER1=0.00001; %%%%% Hyperparameter %%%%%%%%%%
HYPERPARAMETER2=0.000001; %%%%% Hyperparameter %%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if(RIDGE==1)
fff=@(a)(HYPERPARAMETER1*a);
elseif(LASSO==1)
fff=@(a)(HYPERPARAMETER2*sign(a));
else
fff=@(a)(0);
end
%%%%%%%%%%%%%%%%%%%%%%%%% Input:M, Output: N %%%%%%%%
PIX=5;
M=PIX*PIX;
N=2; %%%%% Output units %%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
n=2000; %%%%%%%%%%%%%%%%% Training set
ntest=2000; %%%%%%%%%%%%%%%%% Test set
xdata=zeros(M,n);
ydata=zeros(N,n);
xtest=zeros(M,n);
ytest=zeros(N,n);
%%%%%%%%%%%%%%%%%%%% Training Data reading %%%%%%%%
A=dlmread('char_train.txt');
xdata=A';
for i=1:1:n
if(i<1001)
ydata(:,i)=[1;0];
else
ydata(:,i)=[0;1];
end
end
%%%%%%%%%%%%%%%%%%% Test data
A=dlmread('char_test.txt');
xtest=A';
for i=1:1:ntest
if(i<1001)
ytest(:,i)=[1;0];
else
ytest(:,i)=[0;1];
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%% Training Record %%%%%%%%%%%%%%%%%%%%%
CYCLE=500;
MODCYC=5;
Err0=zeros(1,CYCLE/MODCYC);
Err1=zeros(1,CYCLE/MODCYC);
Err2=zeros(1,CYCLE/MODCYC);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%% Neural NetworkX Architecture %%%
%%%%% M=H4 -> H3 -> H2 -> H1 -> H0=N
H0=N;
H1=4;
H2=6;
H3=8;
H4=M;
%%%%%%%%%%%%%%%%%%%%%%%% Hyperparameters %%%%%%%%%%%%%%%%%%%%%
ETA0=0.5;
ALPHA=0.1;
EPSILON=0.0001;
%%%%%%%%%%%%%%%%%%%%%%%% Neural Network Calculation %%%%%%%%%%
sig=@(t)(1./(1+exp(-t)));
out=@(w,t,h)(sig(w*h+t));
%%%%%%%%%%%%%%%%%%%% input, hidden, output %%%%%%%%%%%%%%
h0=zeros(H0,1);
h1=zeros(H1,1);
h2=zeros(H2,1);
h3=zeros(H3,1);
h4=zeros(H4,1);
%%%%%%%%%%%%%%%%%%%%%% Training Initialization %%%%%%%%
w0=0.1*randn(H0,H1);
w1=0.1*randn(H1,H2);
w2=0.1*randn(H2,H3);
w3=0.1*randn(H3,H4);
th0=0.1*randn(N,1);
th1=0.1*randn(H1,1);
th2=0.1*randn(H2,1);
th3=0.1*randn(H3,1);
%%%%%%%%%%%%%%%%%%%% Accelerator
dw0=zeros(H0,H1);
dw1=zeros(H1,H2);
dw2=zeros(H2,H3);
dw3=zeros(H3,H4);
dth0=zeros(H0,1);
dth1=zeros(H1,1);
dth2=zeros(H2,1);
dth3=zeros(H3,1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%% Backpropagation Learning %%%%%%%%%%%%
for cycle=0:1:(CYCLE-1)
ETA=ETA0*CYCLE/(CYCLE+10*cycle);
for i=1:1:n
ii=floor(n/2)*mod(i-1,2)+floor((i+1)/2);
h4=xdata(:,ii);
t=ydata(:,ii);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
delta0=(h0-t).*(h0.*(1-h0)+EPSILON);
delta1=(delta0'*w0)'.*(h1.*(1-h1)+EPSILON);
delta2=(delta1'*w1)'.*(h2.*(1-h2)+EPSILON);
delta3=(delta2'*w2)'.*(h3.*(1-h3)+EPSILON);
%%%%%%%%%%%%%%%%%% gradient %%%%%%%%%%%
dw0=-ETA*delta0*h1'+ALPHA*dw0;
dth0=-ETA*delta0+ALPHA*dth0;
dw1=-ETA*delta1*h2'+ALPHA*dw1;
dth1=-ETA*delta1+ALPHA*dth1;
dw2=-ETA*delta2*h3'+ALPHA*dw2;
dth2=-ETA*delta2+ALPHA*dth2;
dw3=-ETA*delta3*h4'+ALPHA*dw3;
dth3=-ETA*delta3+ALPHA*dth3;
%%%%%%%%%%%%%%%%%%% steepest descent %%%%%%%%%%
w0=w0+dw0-fff(w0);
th0=th0+dth0-fff(th0);
w1=w1+dw1-fff(w1);
th1=th1+dth1-fff(th1);
w2=w2+dw2-fff(w2);
th2=th2+dth2-fff(th2);
w3=w3+dw3-fff(w3);
th3=th3+dth3-fff(th3);
end
%%%%%%%%%%%%%% Calculation of Training and Generalization Errors %%%%
if(mod(cycle,MODCYC)==0)
Err0(cycle/MODCYC+1)=cycle;
err1=0;
for i=1:1:n
h4=xdata(:,i);
t=ydata(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err1=err1+dot(t-h0,t-h0);
end
Err1(cycle/MODCYC+1)=err1/n;
err2=0;
for i=1:1:ntest
h4=xtest(:,i);
t=ytest(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err2=err2+dot(t-h0,t-h0);
end
Err2(cycle/MODCYC+1)=err2/ntest;
fprintf('[%g] Training error=%f, Test error=%f\n',cycle,err1,err2);
figure(1);
deep_see; drawnow;
end
end
figure(2);
plot(Err0,Err1,'b-',Err0,Err2,'r-');
title('X: Training Cycle. Blue: Training Error, Red: Test Error.');
%%%%%%%%%%%%%%%%%%%%%%%% Backpropagation End %%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%% Trained Data %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
counter1=0;
fprintf('Error in Train:');
for i=1:1:n
h4=xdata(:,i);
t=ydata(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
o=out(w0,th0,h1);
[max1,maxarg1]=max(o);
[max2,maxarg2]=max(t);
if(not(maxarg1==maxarg2))
fprintf('%g ',i);
counter1=counter1+1;
end
end
fprintf('\n Error/TRAINED = %g/%g = %.3f \n',counter1,n,counter1/n);
%%%%%%%%%%%%%%%%%%%%%%%%%% Test Data %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
counter2=0;
fprintf('Error in Test:');
for i=1:1:ntest
h4=xtest(:,i);
t=ytest(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
o=out(w0,th0,h1);
[max1,maxarg1]=max(o);
[max2,maxarg2]=max(t);
if(not(maxarg1==maxarg2))
fprintf('%g ',i);
counter2=counter2+1;
end
end
fprintf('\n Error/TEST = %g/%g = %.3f \n',counter2,ntest,counter2/ntest);
%%% figure(2);
%%% deep_see;