library(rstan)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write=TRUE)

sigmoid <- function(x) 1/(1+exp(-x))

sigmoidal_nn <- function(x,w_1,threshold_1,w_2,threshold_2){
  H <- nrow(w_1)
  hidden_unit <- sigmoid(w_1 %*% x+threshold_1)
  
  f <- sigmoid(w_2 %*% hidden_unit + threshold_2)
  return (f)
}

set.seed(1)

n <- 100
M <- 1

H0 <- 3
true_w1 <- matrix(0, nrow=H0, ncol=M)
true_th1 <- numeric(H0)
true_w2 <- numeric(H0)
true_th2 <- 0
for(i in 1:H0){
  true_w1[i,] <- rnorm(M)
}
true_th1 <- rnorm(H0)
true_w2 <- rnorm(H0)
true_th2 <- rnorm(1)

x_range <- c(-5,5)
x <- matrix(0,nrow=n, ncol=M)
y <- numeric(n)
for(i in 1:n){
  x[i] <- runif(1, min=x_range[1], max=x_range[2]) 
  y[i] <- sigmoidal_nn(x[i,], true_w1, true_th1, true_w2, true_th2)+rnorm(1,0,1)
} 

Learning_H <- 3
nn_data <- list(n=n, M=M, x=x, y=y, H = Learning_H)

nn_fit <- stan(file = 'neural_net.stan', data = nn_data,
            iter = 8000, chains = 1)
post_param <- extract(nn_fit, permuted=T)
parameter_num <- length(post_param$threshold_2)


predicted_y <- numeric(n)
for(i in 1:n){
  for(j in 1:parameter_num){
    predicted_y[i] <- predicted_y[i] + sigmoidal_nn(x[i,], post_param$w_1[j,,],
                                                    post_param$threshold_1[j,], post_param$w_2[j,], post_param$threshold_2[j])
  }
  predicted_y[i] <- predicted_y[i] / parameter_num  
}