%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Let's compare PSISCV and WAIC for fixed inputs
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Cross validation, PSIS, and WAIC should be compared with
%%% the generalization error. PSISCV = Pareto Smoothing Cross Validation
%%% Program was made by Sumio Watanabe
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clear;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
n=10; %%% training sample size
BAISU=100; %%% Test multiplier
ntest=BAISU*n; %%% test sample size
%%%%%%%%%%%%%%%%%%%
a0=0.2; %%% true parameter
s0=100; %%% true parameter
mu=0.01; %%% hyperparameter
%%%%%%%%%%%%%%%%%%%
KKK=500; %%% posteior sample number
CYCLE=100; %%% recommended 10000. Independent Trial number
Leverage=0; %%% If a leverage sample point is included, Leverage=1.
%%%%%%%%%%% Regression Model: Y = aX^2 + Noise(0,s)
%%% Model: p(y|x,a,s)=(s/(2pi))^{0.5}exp(-(s/2)(y-ax)^2)
%%%%%%%%%%% Prior : Gamma distribution
%%% Prior: phi(a,s|mu) = (1/Z) s exp( -(mu/2)s*(1+a^2) )
%%%%%%%%%%% statistical model
prob=@(x,y,a,s)( (s/(2*pi)).^0.5.*exp(-(s/2).*(y-a*x.^2).^2) );
%%%%% Random seed
rng(1);
%%%%%%%%%%%%%%%%% Training and test data
for i=1:1:n
x(i)=0.1*i;
end
if(Leverage==1)
x(n)=10;
end
for i=1:1:ntest
xtest(i)=x(mod((i-1),n)+1);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
ytest=a0*xtest.^2+(1/s0)^0.5*randn(1,ntest);
Stest=-mean(log(prob(xtest,ytest,a0,s0)));
%%%%%%%%%%%%%%%%%%%%%
for kur=1:1:CYCLE
%%%%%%%%%%%%%%%%% Training sample generation
y=a0*x.^2+(1/s0)^0.5*randn(1,n);
Sn=-mean(log(prob(x,y,a0,s0)));
%%%%%%%%%%%%%%%%%%%% constant values
cc1=mu+sum(x.^4);
mm1=sum(x.^2.*y)/cc1;
aa1=0.5*(mu+sum(y.^2)-mm1^2*cc1);
%%%%%%%%%%% Posterior Sampling %%%%%%%%%%%%%%%%
sp=gamrnd((n+3)/2,1/aa1,1,KKK);
ap=mm1+(1./(sp*cc1).^0.5).*randn(1,KKK);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Remark: Gamma distribution in Maltab ---------
%%% X=gamrnd(a,b,1,n);
%%% gamrnd(a,b) --- f(x)=x^{a-1}exp(-x/b)/(b^a*Gamma(a))
%%% ----------------------------------------------
for i=1:1:n
fff=prob(x(i),y(i),ap,sp);
waic_each(i)=-log(mean(fff))+var(log(fff));
iscv_each(i)=log(mean(1./fff));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
invf=sort(1./fff); %%% original importance weights are sorted.
uu=invf(0.8*KKK); %%% 0.8KKK location
ggg=invf((0.8*KKK+1):1:KKK)-uu; %%% tail weights in [0,infty)
para=gpfit(ggg); %%% statistical estimation of general Pareto dist.
yyy=1/(0.4*KKK):1/(0.2*KKK):(1-1/(0.4*KKK)); %%% Uniform in [0,1]
fff2=invf; %%% Improved importance weights
fff2(0.8*KKK+1:1:KKK)=uu+(para(2)/para(1))*((1-yyy).^(-para(1))-1); %%% replaced by estimation
trunc=KKK^0.75*mean(fff2); %%% VGG upperbound.
hhh=-(abs(trunc-fff2)-trunc-fff2)/2; %%% truncation of weights
psis_each(i)=-log(mean(hhh./invf)/mean(hhh)); %%% PSISCV
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
for j=1:1:ntest
ftest=prob(xtest(j),ytest(j),ap,sp);
ge_each(j)=-log(mean(ftest));
end
WAIC(kur)=mean(waic_each)-Sn;
ISCV(kur)=mean(iscv_each)-Sn;
GE(kur)=mean(ge_each)-Stest;
PSIS(kur)=mean(psis_each)-Sn;
if(mod(kur,10)==0)
fprintf('%g\n',kur);
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
fprintf('WAIC(mean,std) = %f,%f\n',mean(WAIC),std(WAIC));
fprintf('ISCV(mean,std) = %f,%f\n',mean(ISCV),std(ISCV));
fprintf('PSIS(mean,std) = %f,%f\n',mean(PSIS),std(PSIS));
fprintf('GEN (mean,std) = %f,%f\n',mean(GE),std(GE));
fprintf('mean(|WAIC-GE|) = %f\n',mean(abs(WAIC-GE)));
fprintf('mean(|ISCV-GE|) = %f\n',mean(abs(ISCV-GE)));
fprintf('mean(|PSIS-GE|) = %f\n',mean(abs(PSIS-GE)));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
figure(1);
hist(abs(ISCV-GE)-abs(WAIC-GE),3*CYCLE^0.5);
xlim([-0.2/(n/10),0.2/(n/10)]);
xlabel('|ISCV-GE|-|WAIC-GE|');
grid on;
figure(2);
hist(abs(PSIS-GE)-abs(WAIC-GE),3*CYCLE^0.5);
xlim([-0.2/(n/10),0.2/(n/10)]);
xlabel('|PSIS-GE|-|WAIC-GE|');
grid on;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%