%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% 5-layer neural network by Sequential Training
%%%%% 3-layer ==> 4-layer ==> 5-layer
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%% Neural Network Architecture %%%
%%%%%%%%%%%%%%%% (1) M -> H03 -> N
%%%%%%%%%%%%%%%% (2) M => H03 -> H02 -> N
%%%%%%%%%%%%%%%% (3) M => H03 => H02 -> H01 -> N
%%%%%%%%%%%%%%%% => is a copy of above ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clear;
clf;
close all hidden;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
RIDGE=0; %%%%% Ridge %%%%%%%%%%%
LASSO=1; %%%%% Lasso %%%%%%%%%%%
HYPERPARAMETER1=0.00001; %%%%% Hyperparameter %%%%%%%%%%
HYPERPARAMETER2=0.000002; %%%%% Hyperparameter %%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if(RIDGE==1)
fff=@(a)(HYPERPARAMETER1*a);
elseif(LASSO==1)
fff=@(a)(HYPERPARAMETER2*sign(a));
else
fff=@(a)(0);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
PIX=5;
M=PIX*PIX;
N=2; %%%%% Output units %%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
n=2000;
ntest=2000;
xdata=zeros(M,n);
ydata=zeros(N,n);
xtest=zeros(M,n);
ytest=zeros(N,n);
%%%%%%%%%%%%%%%%%%%% Training Data reading %%%%%%%%
A=dlmread('char_train.txt');
xdata=A';
for i=1:1:n
if(i<1001)
ydata(:,i)=[1;0];
else
ydata(:,i)=[0;1];
end
end
%%%%%%%%%%%%%%%%%%%%%%% Test data
A=dlmread('char_test.txt');
xtest=A';
for i=1:1:ntest
if(i<1001)
ytest(:,i)=[1;0];
else
ytest(:,i)=[0;1];
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Training Recorder %%%%%%%%%%%%%
CYCLE=200;
MODCYC=5;
Err0=zeros(1,CYCLE/MODCYC);
Err1=zeros(1,CYCLE/MODCYC);
Err2=zeros(1,CYCLE/MODCYC);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%% Final Neural NetworkX Architecture %%%
%%%%%%%%%%%%%%%%% M=H4 -> H3 -> H2 -> H1 -> H0=N
H0=N;
H01=4;
H02=6;
H03=8;
H4=M;
%%%%%%%%%%%%%%%%%%%%%%%% Neural Network Calculation %%%%%%%%%%
sig=@(t)(1./(1+exp(-t)));
out=@(w,t,h)(sig(w*h+t));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%% 1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% M= H2 -> H1 -> H0=N
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%% Hyperparameters %%%%%%%%%%%%%%%%%%%%%
ETA0=0.8;
ALPHA=0.3;
EPSILON=0.0001;
%%%%%%%%%%%%%%%%% Neural Network for Character Recognition %%%
H0=N;
H1=H03;
H2=M;
H3=0;
H4=0;
%%%%%%%%%%%%%%%%%%%% input, hidden, output %%%%%%%%%%%%%%
h0=zeros(H0,1);
h1=zeros(H1,1);
h2=zeros(H2,1);
%%%%%%%%%%%%%%%%%%%%%%%% Neural Network Calculation %%%%%%%%%%
sig=@(t)(1./(1+exp(-t)));
out=@(w,t,h)(sig(w*h+t));
%%%%%%%%%%%%%%%%%%%%%% Training Initialization %%%%%%%%
w0=0.1*randn(H0,H1);
th0=0.1*randn(H0,1);
w1=0.1*randn(H1,H2);
th1=0.1*randn(H1,1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
dw0=zeros(H0,H1);
dth0=zeros(H0,1);
dw1=zeros(H1,H2);
dth1=zeros(H1,1);
%%%%%%%%%%%%%%%%%%%% Backpropagation Learning %%%%%%%%%%%%
for cycle=0:1:(CYCLE-1)
ETA=ETA0*CYCLE/(CYCLE+10*cycle);
for i=1:1:n
ii=(n/2)*mod(i-1,2)+floor((i+1)/2);
h2=xdata(:,ii);
t=ydata(:,ii);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
delta0=(h0-t).*(h0.*(1-h0)+EPSILON);
delta1=(delta0'*w0)'.*(h1.*(1-h1)+EPSILON);
%%%%%%%%%%%%%%%%%% gradient %%%%%%%%%%%
dw0=-ETA*delta0*h1'+ALPHA*dw0;
dth0=-ETA*delta0+ALPHA*dth0;
dw1=-ETA*delta1*h2'+ALPHA*dw1;
dth1=-ETA*delta1+ALPHA*dth1;
%%%%%%%%%%%%%%%%%%% steepest descent %%%%%%%%%%
w0=w0+dw0-fff(w0);
th0=th0+dth0-fff(th0);
w1=w1+dw1-fff(w1);
th1=th1+dth1-fff(th1);
end
%%%%%%%%%%%%%% Calculation of Training and Generalization Errors %%%%
if(mod(cycle,MODCYC)==0)
Err0(cycle/MODCYC+1)=cycle;
err1=0;
for i=1:1:n
h2=xdata(:,i);
t=ydata(:,i);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err1=err1+dot(t-h0,t-h0);
end
Err1(cycle/MODCYC+1)=err1/n;
err2=0;
for i=1:1:ntest
h2=xtest(:,i);
t=ytest(:,i);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err2=err2+dot(t-h0,t-h0);
end
Err2(cycle/MODCYC+1)=err2/ntest;
fprintf('[%g] Training error=%f, Test error=%f\n',cycle,err1,err2);
figure(1);
deep_see; drawnow;
end
end
figure(2);
plot(Err0,Err1,'b-',Err0,Err2,'r-'); drawnow;
title('X: Training Cycle. Blue: Training Error, Red: Test Error.');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% M=H3 -> H2 -> H1 -> H0=N
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%% Hyperparameters %%%%%%%%%%%%%%%%%%%%%
ETA0=0.6;
ALPHA=0.2;
EPSILON=0.0001;
%%%%%%%%%%%%%%%%% Neural Network for Character Recognition %%%
H4=0;
H3=M;
H2=H03;
H1=H02;
H0=N;
%%%%%%%%%%%%%%%%%%%% input, hidden, output %%%%%%%%%%%%%%
h0=zeros(H0,1);
h1=zeros(H1,1);
h2=zeros(H2,1);
h3=zeros(H3,1);
%%%%%%%%%%%%%%%%%%%%%%%% Neural Network Calculation %%%%%%%%%%
sig=@(t)(1./(1+exp(-t)));
out=@(w,t,h)(sig(w*h+t));
%%%%%%%%%%%%%%%%%%%%%% Training Initialization %%%%%%%%
w2=w1;
th2=th1;
w1=0.1*randn(H1,H2);
th1=0.1*randn(H1,1);
w0=0.1*randn(H0,H1);
th0=0.1*randn(H0,1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
dw0=zeros(H0,H1);
dth0=zeros(H0,1);
dw1=zeros(H1,H2);
dth1=zeros(H1,1);
dw2=zeros(H2,H3);
dth2=zeros(H2,1);
%%%%%%%%%%%%%%%%%%%% Backpropagation Learning %%%%%%%%%%%%
for cycle=0:1:(CYCLE-1)
ETA=ETA0*CYCLE/(CYCLE+10*cycle);
for i=1:1:n
ii=(n/2)*mod(i-1,2)+floor((i+1)/2);
h3=xdata(:,ii);
t=ydata(:,ii);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
delta0=(h0-t).*(h0.*(1-h0)+EPSILON);
delta1=(delta0'*w0)'.*(h1.*(1-h1)+EPSILON);
delta2=(delta1'*w1)'.*(h2.*(1-h2)+EPSILON);
%%%%%%%%%%%%%%%%%% gradient %%%%%%%%%%%
dw0=-ETA*delta0*h1'+ALPHA*dw0;
dth0=-ETA*delta0+ALPHA*dth0;
dw1=-ETA*delta1*h2'+ALPHA*dw1;
dth1=-ETA*delta1+ALPHA*dth1;
dw2=-ETA*delta2*h3'+ALPHA*dw2;
dth2=-ETA*delta2+ALPHA*dth2;
%%%%%%%%%%%%%%%%%%% steepest descent %%%%%%%%%%
w0=w0+dw0-fff(w0);
th0=th0+dth0-fff(th0);
w1=w1+dw1-fff(w1);
th1=th1+dth1-fff(th1);
w2=w2+dw2-fff(w2);
th2=th2+dth2-fff(th2);
end
%%%%%%%%%%%%%% Calculation of Training and Generalization Errors %%%%
if(mod(cycle,MODCYC)==0)
Err0(cycle/MODCYC+1)=cycle;
err1=0;
for i=1:1:n
h3=xdata(:,i);
t=ydata(:,i);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err1=err1+dot(t-h0,t-h0);
end
Err1(cycle/MODCYC+1)=err1/n;
err2=0;
for i=1:1:ntest
h3=xtest(:,i);
t=ytest(:,i);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err2=err2+dot(t-h0,t-h0);
end
Err2(cycle/MODCYC+1)=err2/ntest;
fprintf('[%g] Training error=%f, Test error=%f\n',cycle,err1,err2);
figure(3); deep_see;
end
end
figure(4);
plot(Err0,Err1,'b-',Err0,Err2,'r-'); drawnow;
title('X: Training Cycle. Blue: Training Error, Red: Test Error.');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%% 3 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% M=H4 -> H3 -> H2 -> H1 -> H0=N
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%% Hyperparameters %%%%%%%%%%%%%%%%%%%%%
ETA0=0.3;
ALPHA=0.05;
EPSILON=0.0001;
%%%%%%%%%%%%%%%%% Neural Network for Character Recognition %%%
H4=M;
H3=H03;
H2=H02;
H1=H01;
H0=N;
%%%%%%%%%%%%%%%%%%%% input, hidden, output %%%%%%%%%%%%%%
h0=zeros(H0,1);
h1=zeros(H1,1);
h2=zeros(H2,1);
h3=zeros(H3,1);
h4=zeros(H4,1);
%%%%%%%%%%%%%%%%%%%%%%%% Neural Network Calculation %%%%%%%%%%
sig=@(t)(1./(1+exp(-t)));
out=@(w,t,h)(sig(w*h+t));
%%%%%%%%%%%%%%%%%%%%%% Training Initialization %%%%%%%%
w3=w2;
th3=th2;
w2=w1;
th2=th1;
w1=0.1*randn(H1,H2);
th1=0.1*randn(H1,1);
w0=0.1*randn(H0,H1);
th0=0.1*randn(H0,1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
dw0=zeros(H0,H1);
dth0=zeros(H0,1);
dw1=zeros(H1,H2);
dth1=zeros(H1,1);
dw2=zeros(H2,H3);
dth2=zeros(H2,1);
dw3=zeros(H3,H4);
dth3=zeros(H3,1);
%%%%%%%%%%%%%%%%%%%% Backpropagation Learning %%%%%%%%%%%%
for cycle=0:1:(CYCLE-1)
ETA=ETA0*CYCLE/(CYCLE+10*cycle);
for i=1:1:n
ii=(n/2)*mod(i-1,2)+floor((i+1)/2);
h4=xdata(:,ii);
t=ydata(:,ii);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
delta0=(h0-t).*(h0.*(1-h0)+EPSILON);
delta1=(delta0'*w0)'.*(h1.*(1-h1)+EPSILON);
delta2=(delta1'*w1)'.*(h2.*(1-h2)+EPSILON);
delta3=(delta2'*w2)'.*(h3.*(1-h3)+EPSILON);
%%%%%%%%%%%%%%%%%% gradient %%%%%%%%%%%
dw0=-ETA*delta0*h1'+ALPHA*dw0;
dth0=-ETA*delta0+ALPHA*dth0;
dw1=-ETA*delta1*h2'+ALPHA*dw1;
dth1=-ETA*delta1+ALPHA*dth1;
dw2=-ETA*delta2*h3'+ALPHA*dw2;
dth2=-ETA*delta2+ALPHA*dth2;
dw3=-ETA*delta3*h4'+ALPHA*dw3;
dth3=-ETA*delta3+ALPHA*dth3;
%%%%%%%%%%%%%%%%%%% steepest descent %%%%%%%%%%
w0=w0+dw0-fff(w0);
th0=th0+dth0-fff(th0);
w1=w1+dw1-fff(w1);
th1=th1+dth1-fff(th1);
w2=w2+dw2-fff(w2);
th2=th2+dth2-fff(th2);
w3=w3+dw3-fff(w3);
th3=th3+dth3-fff(th3);
end
%%%%%%%%%%%%%% Calculation of Training and Generalization Errors %%%%
if(mod(cycle,MODCYC)==0)
Err0(cycle/MODCYC+1)=cycle;
err1=0;
for i=1:1:n
h4=xdata(:,i);
t=ydata(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err1=err1+dot(t-h0,t-h0);
end
Err1(cycle/MODCYC+1)=err1/n;
err2=0;
for i=1:1:ntest
h4=xtest(:,i);
t=ytest(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err2=err2+dot(t-h0,t-h0);
end
Err2(cycle/MODCYC+1)=err2/ntest;
fprintf('[%g] Training error=%f, Test error=%f\n',cycle,err1,err2);
figure(5); deep_see;
end
end
figure(6);
plot(Err0,Err1,'b-',Err0,Err2,'r-'); drawnow
title('X: Training Cycle. Blue: Training Error, Red: Test Error.');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%% Trained Data %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
counter1=0;
fprintf('Error in Train:');
for i=1:1:n
h4=xdata(:,i);
t=ydata(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
o=out(w0,th0,h1);
[max1,maxarg1]=max(o);
[max2,maxarg2]=max(t);
if(not(maxarg1==maxarg2))
fprintf('%g ',i);
counter1=counter1+1;
end
end
fprintf('\n Error/TRAINED = %g/%g = %.3f \n',counter1,n,counter1/n);
%%%%%%%%%%%%%%%%%%%%%%%%%% Test Data %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
counter2=0;
fprintf('Error in Test:');
for i=1:1:ntest
h4=xtest(:,i);
t=ytest(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
o=out(w0,th0,h1);
[max1,maxarg1]=max(o);
[max2,maxarg2]=max(t);
if(not(maxarg1==maxarg2))
fprintf('%g ',i);
counter2=counter2+1;
end
end
fprintf('\n Error/TEST = %g/%g = %.3f \n',counter2,ntest,counter2/ntest);