%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%% Neural Network for Classification %%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clear;
%%%%%%%%%%%%%% True Classfication Rule %%%%%%%%%%%%%%%%%%%%%%%%%%%%
true_rule=@(x1,x2)((x1/4).^4-(x1/4).^2.*(x2/7)+(x2/7).^3);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
figure(1);
clf;
%%%%%%%%%%%%%%%%%%%%%%%% Hyperparameters %%%%%%%%%%%%%
HYPERPARAMETER=0.0000001; %%%%% 0.00000 0.00002
diffhyper=@(a)(HYPERPARAMETER*a);
%%%%%%%%%%%%%%%%%%%%%%% Training Conditions %%%%%%%%%%%%%%%%%
CYCLEONE=5; %%%%%%%%%% 2 5 50
CYCLEALL=200; %%% training cycles
%%%%%%%%%%%%%%%%%%%%%%% Neural Network Architecture %%%%%%%%%%%
N=1; %%% output Units
H=10; %%% hidden Units
M=2; %%% input Units
%%%%%%%%%%%%%%%%%%%%%%% Training Parameters %%%%%%%%%%%%%%%%%
ETA=0.8; %%% gradient constant
ALPHA=0.3; %%% accelerator
EPSILON=0.01; %%% regularization
%%%%%%%%%%%%%%%%%%%%%% Training Initialization %%%%%%%%
rng(100); %%% random seed
u=0.5*randn(N,H); %%% weight from hidden to output
w=0.5*randn(H,M); %%% weight from input to hidden
ph=0.5*randn(N,1); %%% bias of output
th=0.5*randn(H,1); %%% bias of hidden
du=zeros(N,H); %%% gradient weight from hidden to output
dw=zeros(H,M); %%% gradient weight from input to hidden
dph=zeros(N,1); %%% gradient bias of output
dth=zeros(H,1); %%% gradient bias of hidden
%%%%%%%%%%%%%%%%%% Neural Network Sigmoid Function %%%%%%%%%%%%
neuron=@(u,ph,h)(1./(1+exp(-(u*h+ph))));
%%%%%%%%%%%%%%%%%%%%% Generate Training Data %%%%%%%%%%%%%%%%%%%%%
n=100; %%%%% Number of Training samples
xdata=-4*rand(2,n)+2;
ydata(1,:)=0.01+0.98*(sign(true_rule(xdata(1,:),xdata(2,:)))+1)/2;
%%%%%%%%%%%%%%%%%%%%% True and Data Drawing %%%%%%%%%%%%%%%%%%
subplot(2,2,1);
for i=1:1:n
if(ydata(i)>0.5)
plot(xdata(1,i),xdata(2,i),'ro'); hold on;
else
plot(xdata(1,i),xdata(2,i),'b*'); hold on;
end
end
xlim([-2,2]);
ylim([-2,2]);
title('Data');
hold off;
drawnow;
subplot(2,2,2);
Xaa=-2:0.01:2;
Yaa=-2:0.01:2;
[XX,YY] = meshgrid(Xaa,Yaa);
ZZ=true_rule(XX,YY);
contourf(XX,YY,-ZZ,[0,0],'k-');
title('True');
hold off;
drawnow;
%%%%%%%%%%%%%%%%%%%% Test Points %%%%%%%%%%%%%%%%%%%%%%%
TESTXNUMBER=41;
for j=1:1:TESTXNUMBER
for k=1:1:TESTXNUMBER
testx1(j,k)=-(TESTXNUMBER-1)/20+4*(j-1)/(TESTXNUMBER-1);
testx2(j,k)=-(TESTXNUMBER-1)/20+4*(k-1)/(TESTXNUMBER-1);
end
end
%%%%%%%%%%%%%%%%%%%% Backpropagation Learning %%%%%%%%%%%%
for cycle=1:1:CYCLEALL
training_e=0;
for i=1:1:n
x=xdata(:,i);
t=ydata(:,i);
h=neuron(w,th,x);
o=neuron(u,ph,h);
training_e=training_e+(t-o)^2;
%%%%%%%%%%%%%%%%%% delta calculation %%%%%%%%%%%%
delta1=(o-t).*(o.*(1-o)+EPSILON);
delta2=(delta1'*u)'.*(h.*(1-h)+EPSILON);
%%%%%%%%%%%%%%%%%% gradient %%%%%%%%%%%
du=delta1*h'+ALPHA*du;
dph=delta1+ALPHA*dph;
dw=delta2*x'+ALPHA*dw;
dth=delta2+ALPHA*dth;
%%%%%%%%%%%%%%%%%%% stochastic steepest descent %%%%%%%%%%
u=u-ETA*du-diffhyper(u);
ph=ph-ETA*dph;
w=w-ETA*dw-diffhyper(w);
th=th-ETA*dth;
end
%%%%%%%%%% Draw Trained Results %%%%%%%%%%%%%%%%
if(mod(cycle,CYCLEONE)==0)
test_e=0;
for j=1:1:TESTXNUMBER
for k=1:1:TESTXNUMBER
xxx(1,1)=testx1(j,k);
xxx(2,1)=testx2(j,k);
truey(1,1)=0.01+0.98*(sign(true_rule(xxx(1,1),xxx(2,1)))+1)/2;
h=neuron(w,th,xxx);
output=neuron(u,ph,h);
testy(j,k)=output;
test_e=test_e+(output-truey)^2;
end
end
training_err(cycle/CYCLEONE)=training_e/n;
test_err(cycle/CYCLEONE)=test_e/(TESTXNUMBER^2);
train_process(cycle/CYCLEONE)=cycle;
fprintf('[%g], Trained=%f, Test=%f\n',cycle,training_e/n,test_e/(TESTXNUMBER^2));
%%%% Neural network Drawing
subplot(2,2,4);
oekaki;
drawnow;
end
end