# SHANNON's H.  Measures and simulations to assess  H for studies of Wason's selection task
# by Phil Johnson-Laird November-December 2016 in LISP
# Reimplemented and adapted for R by M. Ragni <ragni@cs.uni-freiburg.de> on 30th Dec 2016
# Adapted for 16p on 13th June 2017
#------------------------------------------------------------------------------------------------
# Part 1: Shannon's measure of entropy H
#------------------------------------------------------------------------------------------------
library(readxl) #load the Excel-package
library(readr)

# Shannon's function defined with exception for 0
minus_plog2p <- function(x){
 if(isTRUE(all.equal(x,0))){return(as.numeric(0) )
   } else {return(-(x * (log2(x))))}
}

# Function calculating Shannon for vector of arbitrary length
shn_inf_vec <- function(data){
    n_responses <- sum(data)
    myfreqs <- round(data/n_responses,4)
    sh <- round(sum(minus_plog2p(myfreqs),na.rm = TRUE),4)
  return(sh)
}

# Generalize vector function to matrix; apply shn_inf_vec to each row
shn_inf_mat <- function(data){
    sh <-apply(data,1,shn_inf_vec)
  return(as.vector(sh))
}

# Extend input matrix with a colum with Shannon's measure
matrix_and_output <- function(data){
    data_ext <-cbind(data,shn_inf_mat(data))
  return(data_ext)
}

#--------------------------------------------------------------------------------------------
# Part 2:  Randomizing test to determine whether selections of items in Wason's selection 
# task are independent of one another
#-----------------------------------------------------------------------------------------------

#   0. Input file with n_of_expts to run; each row is one answer pattern in the sequence 
#       (p,pq, pqnotq,pnotq), where numbers are of Ss #making the selection (independent data).
#   1. Smaller functions to read out frequencies, transform them to probabilities, Compute no-of##        -Ss in original study
#   2. Compute probabilities with which each of the four cards were selected in the data-pairs.
#   3. Compute amount of information, H in input data-pairs, e.g., 2.8 bits. (Part 1)
#   4. run-n-experiments synthetic 
#       For each simulated experiment
#       For each of N simulated participants
#       For each of 3 cards
#       select it or not according to its probability in the original experiment 
#       => 1 selection of cards for the participant
#       => set of n selections for the experiment.
#   5. Output the statistics for the set of experiments

# Loaded file: Assume ordered input (p,pq, pqnotq,pnotq)
# Needs to be generalized for arbitrary patterns
freq_of_items <- function(data){
    cardp   <- rowSums(data[,c(9:16)])
    cardnp  <- rowSums(data[,c(5:8,13:16)])
    cardq   <- rowSums(data[,c(3,4,7,8,11,12,15,16)])
    cardnq  <- rowSums(data[,c(2,4,6,8,10,12,14,16)])
    cards   <- cbind(cardp,cardnp,cardq,cardnq)
  return(cards)
}

#Sum over all participants
sum_nos <- function(data){
    n_responses <- rowSums(data,1:ncol(data))
  return(n_responses)
}

# Compute probabilities from frequencies
convert_freqs_to_probs <- function(frequencies_of_items,n_of_ss){
    output <- round(frequencies_of_items/n_of_ss,4)
  return(output)
}

 transform_to_decimal <- function(N,probp,probnp,probq,probnq){
        output <- 8*rbinom(N,1,probp) + 4*rbinom(N,1,probnp) + 2*rbinom(N,1,probq) + rbinom(N,1,probnq)
   return(output)
}

chosen_pattern <- function(list_numbers){
    null  <- length(which(list_numbers == 0))
     nq   <- length(which(list_numbers == 1))
      q   <- length(which(list_numbers == 2))
     qnq  <- length(which(list_numbers == 3))
      np  <- length(which(list_numbers == 4))
    npnq  <- length(which(list_numbers == 5))
    npq   <- length(which(list_numbers == 6))
    npqnq <- length(which(list_numbers == 7))
       p  <- length(which(list_numbers == 8))
      pnq <- length(which(list_numbers == 9))
      pq <- length(which(list_numbers == 10))
    pqnq <- length(which(list_numbers == 11))
    pnp <- length(which(list_numbers == 12))
    pnpnq <- length(which(list_numbers == 13))
    pnpq <- length(which(list_numbers == 14))
    pnpqnq <- length(which(list_numbers == 15))
    selection <- cbind(null,nq,q,qnq,np,npnq,npq,npqnq,p,pnq,pq,pqnq,pnp,pnpnq,pnpq,pnpqnq)
  return(selection)
}

# Run for a given number of synthetic experiments (Parameter: iter), synthetic participants, (Parameter: N), and probablities for cards nq and q (in our restricted version: Parameter: prob2 and prob 3) generates 
run_n_experiment <- function(iter,N,prob_p,prob_np,prob_q,prob_nq){
  out <- matrix(NA, nrow=iter, ncol=16)
  for (n in 1:iter){
    out[n,] <- chosen_pattern(transform_to_decimal(N,prob_p,prob_np,prob_q,prob_nq))
  }
  return(out)
}

probs_data <- function(data){
  out <- convert_freqs_to_probs(freq_of_items(data),sum_nos(data))
 return(out) 
}

analysis_synthetic_experiment <- function(iter,i,data){
    probs_data    <- convert_freqs_to_probs(freq_of_items(data),sum_nos(data))
    Ss_no         <- sum_nos(data[i,])
    shn_orig      <- shn_inf_vec(data[i,])
    data_test     <- run_n_experiment(iter,sum_nos(data[i,]),probs_data[i,1],probs_data[i,2],probs_data[i,3],probs_data[i,4])
    output_matrix <- cbind(data_test,shn_inf_mat(data_test))
    output_shn_vec <- as.vector(output_matrix[,17])
    less_inf      <- length(which(output_shn_vec <=  shn_orig))
    more_inf      <- length(which(output_shn_vec  > shn_orig))
    mean_inf      <- round(mean(output_shn_vec),4)
    prob1         <- (less_inf / (less_inf + more_inf))
  return(c(shn_orig,mean_inf,less_inf,more_inf,prob1))
}

all_experiments <- function(iter,N,data){
    out <- matrix(NA, nrow=0, ncol=5)
    colnames(out) <- c("Shannon_original", "Mean_synthetic","less_inf","more_inf","Probability")
     for (i in 1:iter){
     out <- rbind(out,analysis_synthetic_experiment(N,i,data))
     }
  return(out)
}
## END of Shannon-measure

## Compute values
data1 <- read_excel("Studies16.xlsx")
data1b <- data1[,5:20]

data1c <- all_experiments(99,10000,data1b)
data1d <- cbind(data1,data1c)

write.csv2(x = data1d, file = "Overall-Simulation-Shannon-results.csv",
          row.names = FALSE)

