%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% 5-layer Neural Network by Pre-Bottle-Neck
%%%%% Firstly, A 5-layer neural network is trained by a bottle-neck method.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%% Neural Network Architecture %%%
%%%%%%%%%%%%%%%% (1) M -> H03 -> H02 -> H03 -> M
%%%%%%%%%%%%%%%% (2) M => H03 => H02 -> H01 -> N
%%%%%%%%%%%%%%%% => is a copy of above ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clear;
clf;
close all hidden;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
RIDGE=0; %%%%% Ridge %%%%%%%%%%%
LASSO=1; %%%%% Lasso %%%%%%%%%%%
HYPERPARAMETER1=0.00001; %%%%% Hyperparameter %%%%%%%%%%
HYPERPARAMETER2=0.000001; %%%%% Hyperparameter %%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if(RIDGE==1)
fff=@(a)(HYPERPARAMETER1*a);
elseif(LASSO==1)
fff=@(a)(HYPERPARAMETER2*sign(a));
else
fff=@(a)(0);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
PIX=5;
M=PIX*PIX;
N=2; %%%%% Output units %%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
n=2000;
ntest=2000;
xdata=zeros(M,n);
ydata=zeros(N,n);
xtest=zeros(M,n);
ytest=zeros(N,n);
%%%%%%%%%%%%%%%%%%%% Training Data reading %%%%%%%%
A=dlmread('char_train.txt');
xdata=A';
for i=1:1:n
if(i<1001)
ydata(:,i)=[1;0];
else
ydata(:,i)=[0;1];
end
end
%%%%%%%%%%%%%%%%%%%%%%% Test data
A=dlmread('char_test.txt');
xtest=A';
for i=1:1:ntest
if(i<1001)
ytest(:,i)=[1;0];
else
ytest(:,i)=[0;1];
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
CYCLE=500;
MODCYC=5;
Err0=zeros(1,CYCLE/MODCYC);
Err1=zeros(1,CYCLE/MODCYC);
Err2=zeros(1,CYCLE/MODCYC);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%% Bottle-Neck Neural Network %%%
%%%%% M=H4 -> H3 -> H2 -> H3 -> H0=M
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
H0=M;
H1=8;
H2=6;
H3=8;
H4=M;
%%%%%%%%%%%%%%%%%%%% input, hidden, output %%%%%%%%%%%%%%
h0=zeros(H0,1);
h1=zeros(H1,1);
h2=zeros(H2,1);
h3=zeros(H3,1);
h4=zeros(H4,1);
%%%%%%%%%%%%%%%%%%%%%%%% Neural Network Calculation %%%%%%%%%%
sig=@(t)(1./(1+exp(-t)));
out=@(w,t,h)(sig(w*h+t));
%%%%%%%%%%%%%%%%%%%%%% Training Initialization %%%%%%%%
w0=0.01*randn(H0,H1);
w1=0.01*randn(H1,H2);
w2=0.01*randn(H2,H3);
w3=0.01*randn(H3,H4);
th0=0.01*randn(H0,1);
th1=0.01*randn(H1,1);
th2=0.01*randn(H2,1);
th3=0.01*randn(H3,1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
dw0=zeros(H0,H1);
dw1=zeros(H1,H2);
dw2=zeros(H2,H3);
dw3=zeros(H3,H4);
dth0=zeros(H0,1);
dth1=zeros(H1,1);
dth2=zeros(H2,1);
dth3=zeros(H3,1);
ETA0=0.8;
ALPHA=0.3;
EPSILON=0.0001;
%%%%%%%%%%%%%%%%%%%% Backpropagation Learning %%%%%%%%%%%%
for cycle=0:1:(CYCLE-1)
ETA=ETA0*CYCLE/(CYCLE+10*cycle);
for i=1:1:n
ii=(n/2)*mod(i-1,2)+floor((i+1)/2);
h4=xdata(:,ii);
t=xdata(:,ii);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
delta0=(h0-t).*(h0.*(1-h0)+EPSILON);
delta1=(delta0'*w0)'.*(h1.*(1-h1)+EPSILON);
delta2=(delta1'*w1)'.*(h2.*(1-h2)+EPSILON);
delta3=(delta2'*w2)'.*(h3.*(1-h3)+EPSILON);
%%%%%%%%%%%%%%%%%% gradient %%%%%%%%%%%
dw0=-ETA*delta0*h1'+ALPHA*dw0;
dth0=-ETA*delta0+ALPHA*dth0;
dw1=-ETA*delta1*h2'+ALPHA*dw1;
dth1=-ETA*delta1+ALPHA*dth1;
dw2=-ETA*delta2*h3'+ALPHA*dw2;
dth2=-ETA*delta2+ALPHA*dth2;
dw3=-ETA*delta3*h4'+ALPHA*dw3;
dth3=-ETA*delta3+ALPHA*dth3;
%%%%%%%%%%%%%%%%%%% steepest descent %%%%%%%%%%
w0=w0+dw0-fff(w0);
th0=th0+dth0-fff(th0);
w1=w1+dw1-fff(w1);
th1=th1+dth1-fff(th1);
w2=w2+dw2-fff(w2);
th2=th2+dth2-fff(th2);
w3=w3+dw3-fff(w3);
th3=th3+dth3-fff(th3);
end
%%%%%%%%%%%%%% Calculation of Training and Generalization Errors %%%%
if(mod(cycle,MODCYC)==0)
Err0(cycle/MODCYC+1)=cycle;
err1=0;
for i=1:1:n
h4=xdata(:,i);
t=h4;
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err1=err1+dot(t-h0,t-h0);
end
Err1(cycle/MODCYC+1)=err1/n;
err2=0;
for i=1:1:ntest
h4=xtest(:,i);
t=h4;
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err2=err2+dot(t-h0,t-h0);
end
Err2(cycle/MODCYC+1)=err2/ntest;
fprintf('[%g] Training error=%f, Test error=%f\n',cycle,err1,err2);
figure(1);
deep_see; drawnow;
end
end
figure(2);
plot(Err0,Err1,'b-',Err0,Err2,'r-'); drawnow;
title('X: Training Cycle. Blue: Training Error, Red: Test Error.');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%% Neural Network for Character Recognition %%%
%%%%% M=H4 -> H3 -> H2 -> H1 -> H0=N
H0=N;
H1=4;
H2=6;
H3=8;
H4=M;
%%%%%%%%%%%%%%%%%%%% input, hidden, output %%%%%%%%%%%%%%
h0=zeros(H0,1);
h1=zeros(H1,1);
h2=zeros(H2,1);
h3=zeros(H3,1);
h4=zeros(H4,1);
%%%%%%%%%%%%%%%%%%%%%%%% Neural Network Calculation %%%%%%%%%%
sig=@(t)(1./(1+exp(-t)));
out=@(w,t,h)(sig(w*h+t));
%%%%%%%%%%%%%%%%%%%%%% Training Initialization %%%%%%%%
w0=0.01*randn(H0,H1);
w1=0.01*randn(H1,H2);
%%% trained w2 in bottle-neck is used
%%% trained w3 in bottle-neck is used
th0=0.01*randn(N,1);
th1=0.01*randn(H1,1);
%%% trained th2 in bottle-neck is used
%%% trained th3 in bottle-neck is used
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
dw0=zeros(H0,H1);
dw1=zeros(H1,H2);
dw2=zeros(H2,H3);
dw3=zeros(H3,H4);
dth0=zeros(H0,1);
dth1=zeros(H1,1);
dth2=zeros(H2,1);
dth3=zeros(H3,1);
ETA0=0.8;
ALPHA=0.3;
EPSILON=0.0001;
%%%%%%%%%%%%%%%%%%%% Backpropagation Learning %%%%%%%%%%%%
for cycle=0:1:(CYCLE-1)
ETA=ETA0*CYCLE/(CYCLE+10*cycle);
for i=1:1:n
ii=(n/2)*mod(i-1,2)+floor((i+1)/2);
h4=xdata(:,ii);
t=ydata(:,ii);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
delta0=(h0-t).*(h0.*(1-h0)+EPSILON);
delta1=(delta0'*w0)'.*(h1.*(1-h1)+EPSILON);
delta2=(delta1'*w1)'.*(h2.*(1-h2)+EPSILON);
delta3=(delta2'*w2)'.*(h3.*(1-h3)+EPSILON);
%%%%%%%%%%%%%%%%%% gradient %%%%%%%%%%%
dw0=-ETA*delta0*h1'+ALPHA*dw0;
dth0=-ETA*delta0+ALPHA*dth0;
dw1=-ETA*delta1*h2'+ALPHA*dw1;
dth1=-ETA*delta1+ALPHA*dth1;
dw2=-ETA*delta2*h3'+ALPHA*dw2;
dth2=-ETA*delta2+ALPHA*dth2;
dw3=-ETA*delta3*h4'+ALPHA*dw3;
dth3=-ETA*delta3+ALPHA*dth3;
%%%%%%%%%%%%%%%%%%% steepest descent %%%%%%%%%%
w0=w0+dw0-fff(w0);
th0=th0+dth0-fff(th0);
w1=w1+dw1-fff(w1);
th1=th1+dth1-fff(th1);
w2=w2+dw2-fff(w2);
th2=th2+dth2-fff(th2);
w3=w3+dw3-fff(w3);
th3=th3+dth3-fff(th3);
end
%%%%%%%%%%%%%% Calculation of Training and Generalization Errors %%%%
if(mod(cycle,MODCYC)==0)
Err0(cycle/MODCYC+1)=cycle;
err1=0;
for i=1:1:n
h4=xdata(:,i);
t=ydata(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err1=err1+dot(t-h0,t-h0);
end
Err1(cycle/MODCYC+1)=err1/n;
err2=0;
for i=1:1:ntest
h4=xtest(:,i);
t=ytest(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
h0=out(w0,th0,h1);
err2=err2+dot(t-h0,t-h0);
end
Err2(cycle/MODCYC+1)=err2/ntest;
fprintf('[%g] Training error=%f, Test error=%f\n',cycle,err1,err2);
figure(3);
deep_see; drawnow;
end
end
figure(4);
plot(Err0,Err1,'b-',Err0,Err2,'r-');
title('X: Training Cycle. Blue: Training Error, Red: Test Error.');
%%%%%%%%%%%%%%%%%%%%%%%%%% Trained Data %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
counter1=0;
fprintf('Error in Train:');
for i=1:1:n
h4=xdata(:,i);
t=ydata(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
o=out(w0,th0,h1);
[max1,maxarg1]=max(o);
[max2,maxarg2]=max(t);
if(not(maxarg1==maxarg2))
fprintf('%g ',i);
counter1=counter1+1;
end
end
fprintf('\n Error/TRAINED = %g/%g = %.3f \n',counter1,n,counter1/n);
%%%%%%%%%%%%%%%%%%%%%%%%%% Test Data %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
counter2=0;
fprintf('Error in Test:');
for i=1:1:ntest
h4=xtest(:,i);
t=ytest(:,i);
h3=out(w3,th3,h4);
h2=out(w2,th2,h3);
h1=out(w1,th1,h2);
o=out(w0,th0,h1);
[max1,maxarg1]=max(o);
[max2,maxarg2]=max(t);
if(not(maxarg1==maxarg2))
fprintf('%g ',i);
counter2=counter2+1;
end
end
fprintf('\n Error/TEST = %g/%g = %.3f \n',counter2,ntest,counter2/ntest);