1 Data preparation

data <- read_csv("Exp_data.csv")

# delete trials 1 to 4 

data <- subset(data, trialNr > 4)


#new column with correct answer text, trial type, likelihood ratio, and strategies used
data <- data %>% 
  mutate(correctAns2 = if_else(correctAns == 1, "Correct", "Wrong"),
         urnSize = ifelse(trialNr %% 2 == 0, "Big", "Small"),
         LR = if_else(trialNr<7, "Infinite",
                         if_else(trialNr<15 & trialNr>6, "LR 4","LR 1.5")),
         trialType = if_else(trialNr<7, "Control",
                            if_else(trialNr>6 & trialNr<9,"A", 
                                    if_else(trialNr>14 & trialNr<17, "A", 
                                            if_else(trialNr>8 & trialNr<11, "B",
                                                    if_else(trialNr>16 & trialNr<19, "B", 
                                                            if_else(trialNr>10 & trialNr<13, "C", 
                                                                    if_else(trialNr>18 & trialNr<21, "C", "D"))))))))


data <- subset(data, select = c(sID_new, age, sex, trialNr, correctAns, correctAns2, urnSize, LR, trialType))

data_withC <- data

data <- subset(data, trialType != "Control")

data$trialOrd <- rep(1:16,90)
data$trialOrd2 <- rep(c("First_half","First_half","First_half","First_half","First_half","First_half","First_half","First_half","Second_half","Second_half","Second_half","Second_half","Second_half","Second_half","Second_half","Second_half"),90)

data$age <- factor(data$age, levels = c(3:5), labels = c("3 years", "4 years", "5 years"))

data$correctAns2 <- factor(data$correctAns2, levels = c("Wrong", "Correct"))

data$trialType <- factor(data$trialType, levels = c("A", "B", "C", "D"))

data$LR <- factor(data$LR, levels = c("LR 1.5", "LR 4"))

data$trialOrd2 <- factor(data$trialOrd2, levels = c("First_half", "Second_half"))

2 Initial Step: Check performance in the control trials

data_control <- subset(data_withC, trialType == "Control")


proptest_data <- data_control %>%
  group_by(age, correctAns2) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.

Against chance-level performance:

age3 <- prop.test(proptest_data$n[1], proptest_data$n[1] + proptest_data$n[2], p = 0.5)
age3

## 
##  1-sample proportions test with continuity correction
## 
## data:  proptest_data$n[1] out of proptest_data$n[1] + proptest_data$n[2], null probability 0.5
## X-squared = 40.017, df = 1, p-value = 2.518e-10
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.8088376 0.9689137
## sample estimates:
##         p 
## 0.9166667

age4 <- prop.test(proptest_data$n[3], proptest_data$n[3] + proptest_data$n[4], p = 0.5)
age4

## 
##  1-sample proportions test with continuity correction
## 
## data:  proptest_data$n[3] out of proptest_data$n[3] + proptest_data$n[4], null probability 0.5
## X-squared = 28.017, df = 1, p-value = 1.203e-07
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.7292309 0.9249853
## sample estimates:
##    p 
## 0.85

age5 <- prop.test(proptest_data$n[5], proptest_data$n[5] + proptest_data$n[6], p = 0.5)
age5

## 
##  1-sample proportions test with continuity correction
## 
## data:  proptest_data$n[5] out of proptest_data$n[5] + proptest_data$n[6], null probability 0.5
## X-squared = 43.35, df = 1, p-value = 4.577e-11
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.8299403 0.9784276
## sample estimates:
##         p 
## 0.9333333

Very high accuracy in all age groups in the control trials.

3 Analyses of test trials

3.1 Graphs

Define a theme:

myTheme <- theme(plot.title = element_text(face="bold", size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        axis.text.x = element_text(size = 14, angle = 0), 
        axis.text.y = element_text(size = 16, angle = 0),
        legend.text = element_text(size = 18),
        legend.title = element_text(face = "bold", size = 18),
        strip.text.x = element_text(size = 18),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        panel.background = element_blank(), 
        axis.line.x = element_line(colour = "black"), 
        axis.line.y = element_line(colour = "black"),
        axis.text = element_text(colour ="black"), 
        axis.ticks = element_line(colour ="black"))

Make the plots:

Age overall

# create a summary dataset that also contains the percentages
# AGE

plotdata <- data %>%
  group_by(sID_new, age, correctAns2) %>%
  summarize(n = n()) #%>%

## `summarise()` has grouped output by 'sID_new', 'age'. You can override using
## the `.groups` argument.

  #mutate(pct = n/sum(n),
   #      lbl = scales::percent(pct))

plotdata_age_correct <- subset(plotdata, correctAns2 == "Correct")
plotdata_age_wrong <- subset(plotdata, correctAns2 == "Wrong" & n == 16)

plotdata_age_wrong[plotdata_age_wrong=="Wrong"] <- "Correct"
plotdata_age_wrong[plotdata_age_wrong==16] <- 0

plotdata_age <- rbind(plotdata_age_correct, plotdata_age_wrong)

plotdat <- plotdata_age

theme_set(theme_light(base_size = 20, base_family = "Poppins"))


library(ggdist)
library(ggridges)

## 
## Attaching package: 'ggridges'

## The following objects are masked from 'package:ggdist':
## 
##     scale_point_color_continuous, scale_point_color_discrete,
##     scale_point_colour_continuous, scale_point_colour_discrete,
##     scale_point_fill_continuous, scale_point_fill_discrete,
##     scale_point_size_continuous

g_age <- ggplot(plotdat, aes(x = n, y = age, fill = age)) +
  #facet_grid( ~ LR,labeller = label_both)+
  ggtitle("Performance by age") +
  scale_x_continuous(limits = c(-1, 17), breaks=seq(0, 16, 1), expand = c(0,0))+
  stat_slab(aes(thickness = after_stat(pdf*n)), scale = 0.8, alpha = 0.7, fill = "#66c2a5") +
  stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, point_interval = "mean_qi", color = "black", interval_alpha = 0, 
                    slab_alpha = 0.7, point_alpha = 0, fill = "#66c2a5") +
  #stat_dotsinterval()+
  #geom_density_ridges(alpha = 0.5)+
   #stat_summary(aes(x = rating_rec), fun.x=mean, geom="point", 
  #             color = "black", shape = 22, size = 2, group=1, alpha = 1)+
  stat_summary(aes(y = age, color = age), fun.data = mean_cl_boot, 
               geom = "errorbar", width = 0, size = 1, color = "black") +
  stat_summary(aes(y = age, fill = age), fun.y=mean, geom="point", 
               color = "black", shape = 22, size = 3, group=1, alpha = 1, fill = "black")+
  scale_fill_manual(values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  scale_color_manual(values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  #scale_fill_viridis_c(name = "Explanation \nRating", option = "C", breaks=c(-5,0,5), labels=c("narrow scope", "no preference", "broad scope"))+
  labs(x = "Number of correct choices", y = "Age group") +
  stat_summary(aes(label=round(after_stat(x),2)), fun.y=mean, geom="text", size=5.5,
             vjust = -1)+
  #scale_y_discrete(limits=rev)+
  myTheme+
  theme_light(base_family = "Poppins", base_size = 15)+
  theme(panel.grid = element_blank(), axis.text = element_text(colour ="black"))+
  theme(legend.position="none",
        legend.title=element_blank(),legend.key.width = unit(1.95, 'cm'))+
  theme(axis.text.y = element_text(size = 14, angle = 0))+
  annotate('text', x = 10, y = 3.5, label = '80.4%',size = 5.5)+
  annotate('text', x = 7, y = 2.5, label = '70.4%',size = 5.5)+
  annotate('text', x = 4, y = 1.3, label = '63.6%',size = 5.5)
  #+
   #annotate('curve', x = 13, y = 3.4, yend = 3.5, xend = 11.3, linewidth = 0.8, curvature = 0.2, arrow = arrow(length = unit(0.2, 'cm')))
  

g_age

ggsave("PScores_dist_age_bw.svg",width=6.5,height=5)
ggsave("PScores_dist_age_bw.pdf",width=6.5,height=5)

plotdata <- data %>%
  group_by(sID_new, age, LR, correctAns2) %>%
  summarize(n = n()) #%>%

## `summarise()` has grouped output by 'sID_new', 'age', 'LR'. You can override
## using the `.groups` argument.

  #mutate(pct = n/sum(n),
   #      lbl = scales::percent(pct))

plotdata_LR_correct <- subset(plotdata, correctAns2 == "Correct")
plotdata_LR_wrong <- subset(plotdata, correctAns2 == "Wrong" & n == 8)

plotdata_LR_wrong[plotdata_LR_wrong=="Wrong"] <- "Correct"
plotdata_LR_wrong[plotdata_LR_wrong==8] <- 0

plotdata_LR <- rbind(plotdata_LR_correct, plotdata_LR_wrong)

plotdat <- plotdata_LR

theme_set(theme_light(base_size = 20, base_family = "Poppins"))


library(ggdist)
library(ggridges)
g_LR <- ggplot(plotdat, aes(x = n, y = LR)) +
  ggtitle("Performance by LR") +
  #facet_grid( ~ age, labeller = label_both)+
  scale_x_continuous(limits = c(-1, 9), breaks=seq(0, 8, 1), expand = c(0,0))+
  stat_slab(aes(thickness = after_stat(pdf*n)), scale = 0.8, alpha = 0.7, fill = "#e78ac3") +
  stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, point_interval = "mean_qi", color = "black", interval_alpha = 0, 
                    slab_alpha = 0.7, point_alpha = 0, fill = "#e78ac3") +
  #stat_dotsinterval()+
  #geom_density_ridges(alpha = 0.5)+
   #stat_summary(aes(x = rating_rec), fun.x=mean, geom="point", 
  #             color = "black", shape = 22, size = 2, group=1, alpha = 1)+
  stat_summary(aes(y = LR), fun.data = mean_cl_boot, 
               geom = "errorbar", width = 0, size = 1, color = "black") +
  stat_summary(aes(y = LR), fun.y=mean, geom="point", 
               color = "black", shape = 22, size = 3, group=1, alpha = 1, fill = "black")+
  scale_fill_manual(values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  scale_color_manual(values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  #scale_fill_viridis_c(name = "Explanation \nRating", option = "C", breaks=c(-5,0,5), labels=c("narrow scope", "no preference", "broad scope"))+
  labs(x = "Number of correct choices", y = "Likelihood ratio") +
  stat_summary(aes(label=round(after_stat(x),2)), fun.y=mean, geom="text", size=5.5,
             vjust = -1)+
  #scale_y_discrete(limits=rev)+
  myTheme+
  theme_light(base_family = "Poppins", base_size = 15)+
  theme(panel.grid = element_blank(), axis.text = element_text(colour ="black"))+
  theme(legend.position="none",
        legend.title=element_blank(),legend.key.width = unit(1.95, 'cm'))+
  theme(axis.text.y = element_text(size = 14, angle = 0))+
  annotate('text', x = 1, y = 2.5, label = '71.6%',size = 5.5)+
  annotate('text', x = 1, y = 1.3, label = '71.3%',size = 5.5)
  
  

g_LR

ggsave("PScores_dist_LR.svg",width=6,height=4)
ggsave("PScores_dist_LR.pdf",width=6,height=4)

Trial Type

plotdata <- data %>%
  group_by(sID_new, age, trialType, correctAns2) %>%
  summarize(n = n()) #%>%

## `summarise()` has grouped output by 'sID_new', 'age', 'trialType'. You can
## override using the `.groups` argument.

  #mutate(pct = n/sum(n),
   #      lbl = scales::percent(pct))

plotdata_trial_correct <- subset(plotdata, correctAns2 == "Correct")
plotdata_trial_wrong <- subset(plotdata, correctAns2 == "Wrong" & n == 4)

plotdata_trial_wrong[plotdata_trial_wrong=="Wrong"] <- "Correct"
plotdata_trial_wrong[plotdata_trial_wrong==4] <- 0

plotdata_trial <- rbind(plotdata_trial_correct, plotdata_trial_wrong)

plotdat <- plotdata_trial

theme_set(theme_light(base_size = 20, base_family = "Poppins"))

colnames(plotdat) <- c("sID_new", "age", "Trial type", "correctAns2", "n") 


library(ggdist)
library(ggridges)
g_TT <- ggplot(plotdat, aes(x = n, y = `Trial type`)) +
  ggtitle("Performance by trial type")+
  #facet_grid( ~ age, labeller = label_both)+
  scale_x_continuous(limits = c(-1, 5), breaks=seq(0, 4, 1), expand = c(0,0))+
  stat_slab(aes(thickness = after_stat(pdf*n)), scale = 0.8, alpha = 0.7, fill = "#8da0cb") +
  stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, point_interval = "mean_qi", color = "black", interval_alpha = 0, 
                    slab_alpha = 0.7, point_alpha = 0, fill = "#8da0cb") +
  #stat_dotsinterval()+
  #geom_density_ridges(alpha = 0.5)+
   #stat_summary(aes(x = rating_rec), fun.x=mean, geom="point", 
  #             color = "black", shape = 22, size = 2, group=1, alpha = 1)+
  stat_summary(aes(y = `Trial type`), fun.data = mean_cl_boot, 
               geom = "errorbar", width = 0, size = 1, color = "black") +
  stat_summary(aes(y = `Trial type`), fun.y=mean, geom="point", 
               color = "black", shape = 22, size = 3, group=1, alpha = 1, fill ="black")+
  scale_fill_manual(values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  scale_color_manual(values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  #scale_fill_viridis_c(name = "Explanation \nRating", option = "C", breaks=c(-5,0,5), labels=c("narrow scope", "no preference", "broad scope"))+
  labs(x = "Number of correct choices", y = "Trial type") +
  stat_summary(aes(label=round(after_stat(x),2)), fun.y=mean, geom="text", size=5.5,
             vjust = -1)+
  scale_y_discrete(limits=rev)+
  myTheme+
  theme_light(base_family = "Poppins", base_size = 15)+
  theme(panel.grid = element_blank(), axis.text = element_text(colour ="black"))+
  theme(legend.position="none",
        legend.title=element_blank(),legend.key.width = unit(1.95, 'cm'))+
  theme(axis.text.y = element_text(size = 14, angle = 0))+  
  annotate('text', x = 0.5, y = 4.3, label = '68.3%',size = 5.5)+
  annotate('text', x = 0.5, y = 3.3, label = '75.5%',size = 5.5)+
  annotate('text', x = 0.5, y = 2.3, label = '66.8%',size = 5.5)+
  annotate('text', x = 0.5, y = 1.3, label = '75.25%',size = 5.5)
  

g_TT

ggsave("PScores_dist_trialType.svg",width=6,height=4)
ggsave("PScores_dist_trialType.pdf",width=6,height=4)

Combine the plots

library(ggpubr)

figure1 <- ggarrange(g_age, 
                    ggarrange(g_LR, g_TT, labels = c("B", "C"), ncol=2, font.label = list(size = 20, color = "black")),
                    nrow = 2, 
                    labels = "A",
                    font.label = list(size = 20, color = "black"))
figure1

ggsave("ExpRes_comb_bw.svg",width=10,height=10)
ggsave("ExpRes_comb_bw.pdf",width=10,height=10)

3.2 Statistical Analyses

3.2.1 Overall effect of age

ANOVA:

library(afex)

## ************
## Welcome to afex. For support visit: http://afex.singmann.science/

## - Functions for ANOVAs: aov_car(), aov_ez(), and aov_4()
## - Methods for calculating p-values with mixed(): 'S', 'KR', 'LRT', and 'PB'
## - 'afex_aov' and 'mixed' objects can be passed to emmeans() for follow-up tests
## - Get and set global package options with: afex_options()
## - Set sum-to-zero contrasts globally: set_sum_contrasts()
## - For example analyses see: browseVignettes("afex")
## ************

## 
## Attaching package: 'afex'

## The following object is masked from 'package:lme4':
## 
##     lmer

library(emmeans)

a1 <- aov_car(n ~ age + Error(sID_new/1), plotdata_age, 
              anova_table = list(es = "pes"))

## Contrasts set to contr.sum for the following variables: age

a1

## Anova Table (Type 3 tests)
## 
## Response: n
##   Effect    df  MSE        F  pes p.value
## 1    age 2, 87 7.30 7.57 *** .148   <.001
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

Polynomial contrast to test the trends:

contrasts(plotdata_age$age) <- "contr.poly"

LinearModel.3 <- lm(n ~ age, data=plotdata_age)
summary(LinearModel.3)

## 
## Call:
## lm(formula = n ~ age, data = plotdata_age)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.8667 -2.2417  0.1333  2.1333  5.8333 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  11.4333     0.2849  40.132  < 2e-16 ***
## age.L         1.9092     0.4934   3.869 0.000211 ***
## age.Q         0.2041     0.4934   0.414 0.680132    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.703 on 87 degrees of freedom
## Multiple R-squared:  0.1482, Adjusted R-squared:  0.1287 
## F-statistic: 7.571 on 2 and 87 DF,  p-value: 0.0009308

Use emmeans to get the individual means and their CIs:

library(lsmeans)

## The 'lsmeans' package is now basically a front end for 'emmeans'.
## Users are encouraged to switch the rest of the way.
## See help('transition') for more information, including how to
## convert old 'lsmeans' objects and scripts to work with 'emmeans'.

# means

ls2 <- lsmeans(a1, c("age")) 
ls2

##  age     lsmean    SE df lower.CL upper.CL
##  3 years   10.2 0.493 87     9.19     11.1
##  4 years   11.3 0.493 87    10.29     12.2
##  5 years   12.9 0.493 87    11.89     13.8
## 
## Confidence level used: 0.95

contrasts <- emmeans(a1, ~ age)
s <- pairs(contrasts, adjust = "none") 


s

##  contrast          estimate    SE df t.ratio p.value
##  3 years - 4 years     -1.1 0.698 87  -1.576  0.1186
##  3 years - 5 years     -2.7 0.698 87  -3.869  0.0002
##  4 years - 5 years     -1.6 0.698 87  -2.293  0.0243

confint(s, level = 0.95)

##  contrast          estimate    SE df lower.CL upper.CL
##  3 years - 4 years     -1.1 0.698 87    -2.49    0.287
##  3 years - 5 years     -2.7 0.698 87    -4.09   -1.313
##  4 years - 5 years     -1.6 0.698 87    -2.99   -0.213
## 
## Confidence level used: 0.95

3.3 Proportion tests

proptest_data <- data %>%
  group_by(age, correctAns2) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.

Against chance-level performance:

age3 <- prop.test(proptest_data$n[2], proptest_data$n[1] + proptest_data$n[2], p = 0.5)
age3

## 
##  1-sample proportions test with continuity correction
## 
## data:  proptest_data$n[2] out of proptest_data$n[1] + proptest_data$n[2], null probability 0.5
## X-squared = 34.669, df = 1, p-value = 3.909e-09
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.5903820 0.6782493
## sample estimates:
##         p 
## 0.6354167

age4 <- prop.test(proptest_data$n[4], proptest_data$n[3] + proptest_data$n[4], p = 0.5)
age4

## 
##  1-sample proportions test with continuity correction
## 
## data:  proptest_data$n[4] out of proptest_data$n[3] + proptest_data$n[4], null probability 0.5
## X-squared = 79.219, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.6607705 0.7442386
## sample estimates:
##         p 
## 0.7041667

age5 <- prop.test(proptest_data$n[6], proptest_data$n[5] + proptest_data$n[6], p = 0.5)
age5

## 
##  1-sample proportions test with continuity correction
## 
## data:  proptest_data$n[6] out of proptest_data$n[5] + proptest_data$n[6], null probability 0.5
## X-squared = 176.42, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.7652057 0.8381569
## sample estimates:
##         p 
## 0.8041667

Against the best heuristic strategy (More good and Less bad predict 75% correct urns):

3- and 4-year-olds have a performance below 0.75, but the 5-year-olds are better. Is it significantly better?

age5 <- prop.test(proptest_data$n[6], proptest_data$n[5] + proptest_data$n[6], p = 0.75, alternative = "greater")
age5

## 
##  1-sample proportions test with continuity correction
## 
## data:  proptest_data$n[6] out of proptest_data$n[5] + proptest_data$n[6], null probability 0.75
## X-squared = 7.225, df = 1, p-value = 0.003595
## alternative hypothesis: true p is greater than 0.75
## 95 percent confidence interval:
##  0.771608 1.000000
## sample estimates:
##         p 
## 0.8041667

3.4 GLMM

to test also the other factors included in the experiment.

data$correctAns <- as.numeric(data$correctAns)

contr=glmerControl(optimizer="bobyqa", optCtrl = list(maxfun=100000)) # settings needed for model to converge


# null model
null = glmer(correctAns ~ 1 + (1|sID_new), data=data, family="binomial", control = contr)

# add age
age = glmer(correctAns ~ age + (1|sID_new), data=data, family="binomial", control = contr)

# add urnsize
urnsize = glmer(correctAns ~ age + urnSize + (1|sID_new), data=data, family="binomial", control = contr)

# add LR 
LR = glmer(correctAns ~ age + urnSize + LR + (1|sID_new), data=data, family="binomial", control = contr)

# add trial type
trialType = glmer(correctAns ~ age + urnSize + LR + trialType + (1|sID_new), data=data, family="binomial", control = contr)


# add interaction between LR and trial type
interTTLR = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + (1|sID_new), data=data, family="binomial", control = contr)

# add interaction between LR and age
interLRage = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + LR*age + (1|sID_new), data=data, family="binomial", control = contr)

# add interaction between TT and age
interTTage = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + LR*age + trialType*age + (1|sID_new), data=data, family="binomial", control = contr)



# add interaction between LR and trial type and age
interTTLRage = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + LR*age + trialType*LR*age + (1|sID_new), data=data, family="binomial", control = contr)



# test models
anova(null, age, urnsize, LR, trialType, interTTLR, interLRage, interTTage, interTTLRage)

## Data: data
## Models:
## null: correctAns ~ 1 + (1 | sID_new)
## age: correctAns ~ age + (1 | sID_new)
## urnsize: correctAns ~ age + urnSize + (1 | sID_new)
## LR: correctAns ~ age + urnSize + LR + (1 | sID_new)
## trialType: correctAns ~ age + urnSize + LR + trialType + (1 | sID_new)
## interTTLR: correctAns ~ age + urnSize + LR + trialType + trialType * LR + (1 | sID_new)
## interLRage: correctAns ~ age + urnSize + LR + trialType + trialType * LR + LR * age + (1 | sID_new)
## interTTage: correctAns ~ age + urnSize + LR + trialType + trialType * LR + LR * age + trialType * age + (1 | sID_new)
## interTTLRage: correctAns ~ age + urnSize + LR + trialType + trialType * LR + LR * age + trialType * LR * age + (1 | sID_new)
##              npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)    
## null            2 1665.4 1675.9 -830.70   1661.4                          
## age             4 1654.0 1675.1 -823.02   1646.0 15.3571  2  0.0004626 ***
## urnsize         5 1655.7 1682.1 -822.87   1645.7  0.3081  1  0.5788791    
## LR              6 1657.7 1689.3 -822.85   1645.7  0.0342  1  0.8532104    
## trialType       9 1651.0 1698.5 -816.52   1633.0 12.6669  3  0.0054153 ** 
## interTTLR      12 1645.6 1708.9 -810.81   1621.6 11.4086  3  0.0097099 ** 
## interLRage     14 1644.7 1718.5 -808.36   1616.7  4.9015  2  0.0862305 .  
## interTTage     20 1649.6 1755.0 -804.78   1609.6  7.1547  6  0.3067746    
## interTTLRage   26 1654.7 1791.8 -801.33   1602.7  6.9003  6  0.3301699    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

In addition to age, trial type, the interaction between LR and trial type is significant.

Follow up on the interaction by looking at a graph:

plotdata <- data %>%
  group_by(sID_new, LR, trialType, correctAns2) %>%
  summarize(n = n()) #%>%

## `summarise()` has grouped output by 'sID_new', 'LR', 'trialType'. You can
## override using the `.groups` argument.

  #mutate(pct = n/sum(n),
   #      lbl = scales::percent(pct))


plotdata_trialLR_correct <- subset(plotdata, correctAns2 == "Correct")
plotdata_trialLR_wrong <- subset(plotdata, correctAns2 == "Wrong" & n == 2)

plotdata_trialLR_wrong[plotdata_trialLR_wrong=="Wrong"] <- "Correct"
plotdata_trialLR_wrong[plotdata_trialLR_wrong==2] <- 0

plotdata_trialLR <- rbind(plotdata_trialLR_correct, plotdata_trialLR_wrong)

plotdat <- plotdata_trialLR

theme_set(theme_light(base_size = 20, base_family = "Poppins"))

colnames(plotdat) <- c("sID_new", "LR", "Trial type", "correctAns2", "n") 



# annotations LR 1.5  
ann_text_LR15_A <- data.frame(n = 2.5,`Trial type`=4.4,lab = "Text",
                       LR = factor("LR 1.5",levels = c("LR 1.5", "LR 4")))
colnames(ann_text_LR15_A)[2] <- "Trial type"

ann_text_LR15_B <- data.frame(n = 2.5,`Trial type`=3.4,lab = "Text",
                       LR = factor("LR 1.5",levels = c("LR 1.5", "LR 4")))
colnames(ann_text_LR15_B)[2] <- "Trial type"

ann_text_LR15_C <- data.frame(n = 2.5,`Trial type`=2.4,lab = "Text",
                       LR = factor("LR 1.5",levels = c("LR 1.5", "LR 4")))
colnames(ann_text_LR15_C)[2] <- "Trial type"

ann_text_LR15_D <- data.frame(n = 2.5,`Trial type`=1.4,lab = "Text",
                       LR = factor("LR 1.5",levels = c("LR 1.5", "LR 4")))
colnames(ann_text_LR15_D)[2] <- "Trial type"


# annotations LR 4 
ann_text_LR4_A <- data.frame(n = 2.5,`Trial type`=4.4,lab = "Text",
                       LR = factor("LR 4",levels = c("LR 1.5", "LR 4")))
colnames(ann_text_LR4_A)[2] <- "Trial type"

ann_text_LR4_B <- data.frame(n = 2.5,`Trial type`=3.4,lab = "Text",
                       LR = factor("LR 4",levels = c("LR 1.5", "LR 4")))
colnames(ann_text_LR4_B)[2] <- "Trial type"

ann_text_LR4_C <- data.frame(n = 2.5,`Trial type`=2.4,lab = "Text",
                       LR = factor("LR 4",levels = c("LR 1.5", "LR 4")))
colnames(ann_text_LR4_C)[2] <- "Trial type"

ann_text_LR4_D <- data.frame(n = 2.5,`Trial type`=1.4,lab = "Text",
                       LR = factor("LR 4",levels = c("LR 1.5", "LR 4")))
colnames(ann_text_LR4_D)[2] <- "Trial type"



library(ggdist)
library(ggridges)
g_TTLR <- ggplot(plotdat, aes(x = n, y = `Trial type`, fill = LR)) +
  ggtitle("Performance by trial type and LR")+
  facet_grid( ~ LR)+
  scale_x_continuous(limits = c(-1, 3), breaks=seq(0, 2, 1), expand = c(0,0))+
  stat_slab(aes(thickness = after_stat(pdf*n)), scale = 0.8, alpha = 0.7) +
  stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, point_interval = "mean_qi", color = "black", interval_alpha = 0, 
                    slab_alpha = 0.7, point_alpha = 0) +
  #stat_dotsinterval()+
  #geom_density_ridges(alpha = 0.5)+
   #stat_summary(aes(x = rating_rec), fun.x=mean, geom="point", 
  #             color = "black", shape = 22, size = 2, group=1, alpha = 1)+
  stat_summary(aes(y = `Trial type`, color = LR), fun.data = mean_cl_boot, 
               geom = "errorbar", width = 0, size = 1, color = "black") +
  stat_summary(aes(y = `Trial type`, fill = LR), fun.y=mean, geom="point", 
               color = "black", shape = 22, size = 3, group=1, alpha = 1, fill = "black")+
  scale_fill_manual(values=c("#8da0cb", "#8da0cb", "#8da0cb", "#a6d854"))+
  scale_color_manual(values=c("#8da0cb", "#8da0cb", "#8da0cb", "#a6d854"))+
  #scale_fill_viridis_c(name = "Explanation \nRating", option = "C", breaks=c(-5,0,5), labels=c("narrow scope", "no preference", "broad scope"))+
  labs(x = "Number of correct choices", y = "Trial type") +
  stat_summary(aes(label=round(after_stat(x),2)), fun.y=mean, geom="text", size=5.5,
             vjust = -1)+
  scale_y_discrete(limits=rev)+
  myTheme+
  theme_light(base_family = "Poppins", base_size = 15)+
  theme(panel.grid = element_blank(), axis.text = element_text(colour ="black"))+
  theme(legend.position="none",
        legend.title=element_blank(),legend.key.width = unit(1.95, 'cm'))+
  theme(axis.text.y = element_text(size = 14, angle = 0))+
  geom_text(data = ann_text_LR15_A,label = "62.0%", size = 3.5)+ # LR 15
  geom_text(data = ann_text_LR15_B,label = "77.0%", size = 3.5)+
  geom_text(data = ann_text_LR15_C,label = "66.0%", size = 3.5)+
  geom_text(data = ann_text_LR15_D,label = "79.5%", size = 3.5)+
  geom_text(data = ann_text_LR4_A,label = "74.5%", size = 3.5)+ # LR 4
  geom_text(data = ann_text_LR4_B,label = "74.0%", size = 3.5)+
  geom_text(data = ann_text_LR4_C,label = "67.0%", size = 3.5)+
  geom_text(data = ann_text_LR4_D,label = "71.0%", size = 3.5)

g_TTLR

ggsave("PScores_dist_interTTLR.svg",width=6,height=4)
ggsave("PScores_dist_interTTLR.pdf",width=6,height=4)

A and C trials were harder, but only for the more difficult LR

Do the tests (contrasts):

library(lsmeans)
# means

ls2 <- lsmeans(interTTLR, c("age")) 
ls2

##  age     lsmean    SE  df asymp.LCL asymp.UCL
##  3 years  0.629 0.165 Inf     0.306     0.951
##  4 years  0.974 0.168 Inf     0.644     1.304
##  5 years  1.602 0.182 Inf     1.245     1.960
## 
## Results are averaged over the levels of: urnSize, LR, trialType 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95

contrasts <- emmeans(interTTLR, ~ trialType|LR)
s <- pairs(contrasts, adjust = "none") 


s

## LR = LR 1.5:
##  contrast estimate    SE  df z.ratio p.value
##  A - B     -0.8117 0.248 Inf  -3.274  0.0011
##  A - C     -0.1926 0.234 Inf  -0.824  0.4099
##  A - D     -0.9560 0.253 Inf  -3.780  0.0002
##  B - C      0.6191 0.250 Inf   2.477  0.0132
##  B - D     -0.1443 0.267 Inf  -0.540  0.5889
##  C - D     -0.7635 0.255 Inf  -2.996  0.0027
## 
## LR = LR 4:
##  contrast estimate    SE  df z.ratio p.value
##  A - B      0.0323 0.253 Inf   0.128  0.8982
##  A - C      0.3941 0.246 Inf   1.603  0.1089
##  A - D      0.1883 0.249 Inf   0.755  0.4500
##  B - C      0.3618 0.245 Inf   1.477  0.1398
##  B - D      0.1560 0.249 Inf   0.628  0.5302
##  C - D     -0.2058 0.241 Inf  -0.852  0.3940
## 
## Results are averaged over the levels of: age, urnSize 
## Results are given on the log odds ratio (not the response) scale.

confint(s, level = 0.95)

## LR = LR 1.5:
##  contrast estimate    SE  df asymp.LCL asymp.UCL
##  A - B     -0.8117 0.248 Inf   -1.2976    -0.326
##  A - C     -0.1926 0.234 Inf   -0.6505     0.265
##  A - D     -0.9560 0.253 Inf   -1.4517    -0.460
##  B - C      0.6191 0.250 Inf    0.1293     1.109
##  B - D     -0.1443 0.267 Inf   -0.6678     0.379
##  C - D     -0.7635 0.255 Inf   -1.2629    -0.264
## 
## LR = LR 4:
##  contrast estimate    SE  df asymp.LCL asymp.UCL
##  A - B      0.0323 0.253 Inf   -0.4630     0.528
##  A - C      0.3941 0.246 Inf   -0.0877     0.876
##  A - D      0.1883 0.249 Inf   -0.3004     0.677
##  B - C      0.3618 0.245 Inf   -0.1184     0.842
##  B - D      0.1560 0.249 Inf   -0.3311     0.643
##  C - D     -0.2058 0.241 Inf   -0.6790     0.267
## 
## Results are averaged over the levels of: age, urnSize 
## Results are given on the log odds ratio (not the response) scale. 
## Confidence level used: 0.95

3.5 Exploratory analyses

Might there be a learning effect in the different age groups?

trialOrder = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + trialOrd + (1|sID_new), data=data, family="binomial", control = contr) # take out the interactions that were not a sig. improvement 

Inter_trialOrderAge = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + trialOrd + trialOrd*age + (1|sID_new), data=data, family="binomial", control = contr) # take out the interactions that were not a sig. improvement 

trialOrder2 = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + trialOrd2 + (1|sID_new), data=data, family="binomial", control = contr) # take out the interactions that were not a sig. improvement 

Inter_trialOrder2Age = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + trialOrd2 + trialOrd2*age + (1|sID_new), data=data, family="binomial", control = contr) # take out the interactions that were not a sig. improvement 


# test models
anova(null, age, urnsize, LR, trialType, interTTLR, trialOrder)

## Data: data
## Models:
## null: correctAns ~ 1 + (1 | sID_new)
## age: correctAns ~ age + (1 | sID_new)
## urnsize: correctAns ~ age + urnSize + (1 | sID_new)
## LR: correctAns ~ age + urnSize + LR + (1 | sID_new)
## trialType: correctAns ~ age + urnSize + LR + trialType + (1 | sID_new)
## interTTLR: correctAns ~ age + urnSize + LR + trialType + trialType * LR + (1 | sID_new)
## trialOrder: correctAns ~ age + urnSize + LR + trialType + trialType * LR + trialOrd + (1 | sID_new)
##            npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)    
## null          2 1665.4 1675.9 -830.70   1661.4                          
## age           4 1654.0 1675.1 -823.02   1646.0 15.3571  2  0.0004626 ***
## urnsize       5 1655.7 1682.1 -822.87   1645.7  0.3081  1  0.5788791    
## LR            6 1657.7 1689.3 -822.85   1645.7  0.0342  1  0.8532104    
## trialType     9 1651.0 1698.5 -816.52   1633.0 12.6669  3  0.0054153 ** 
## interTTLR    12 1645.6 1708.9 -810.81   1621.6 11.4086  3  0.0097099 ** 
## trialOrder   13 1635.1 1703.7 -804.56   1609.1 12.4969  1  0.0004076 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(null, age, urnsize, LR, trialType, interTTLR, trialOrder, Inter_trialOrderAge)

## Data: data
## Models:
## null: correctAns ~ 1 + (1 | sID_new)
## age: correctAns ~ age + (1 | sID_new)
## urnsize: correctAns ~ age + urnSize + (1 | sID_new)
## LR: correctAns ~ age + urnSize + LR + (1 | sID_new)
## trialType: correctAns ~ age + urnSize + LR + trialType + (1 | sID_new)
## interTTLR: correctAns ~ age + urnSize + LR + trialType + trialType * LR + (1 | sID_new)
## trialOrder: correctAns ~ age + urnSize + LR + trialType + trialType * LR + trialOrd + (1 | sID_new)
## Inter_trialOrderAge: correctAns ~ age + urnSize + LR + trialType + trialType * LR + trialOrd + trialOrd * age + (1 | sID_new)
##                     npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)
## null                   2 1665.4 1675.9 -830.70   1661.4                      
## age                    4 1654.0 1675.1 -823.02   1646.0 15.3571  2  0.0004626
## urnsize                5 1655.7 1682.1 -822.87   1645.7  0.3081  1  0.5788791
## LR                     6 1657.7 1689.3 -822.85   1645.7  0.0342  1  0.8532104
## trialType              9 1651.0 1698.5 -816.52   1633.0 12.6669  3  0.0054153
## interTTLR             12 1645.6 1708.9 -810.81   1621.6 11.4086  3  0.0097099
## trialOrder            13 1635.1 1703.7 -804.56   1609.1 12.4969  1  0.0004076
## Inter_trialOrderAge   15 1634.7 1713.8 -802.34   1604.7  4.4523  2  0.1079436
##                        
## null                   
## age                 ***
## urnsize                
## LR                     
## trialType           ** 
## interTTLR           ** 
## trialOrder          ***
## Inter_trialOrderAge    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(null, age, urnsize, LR, trialType, interTTLR, trialOrder2)

## Data: data
## Models:
## null: correctAns ~ 1 + (1 | sID_new)
## age: correctAns ~ age + (1 | sID_new)
## urnsize: correctAns ~ age + urnSize + (1 | sID_new)
## LR: correctAns ~ age + urnSize + LR + (1 | sID_new)
## trialType: correctAns ~ age + urnSize + LR + trialType + (1 | sID_new)
## interTTLR: correctAns ~ age + urnSize + LR + trialType + trialType * LR + (1 | sID_new)
## trialOrder2: correctAns ~ age + urnSize + LR + trialType + trialType * LR + trialOrd2 + (1 | sID_new)
##             npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)    
## null           2 1665.4 1675.9 -830.70   1661.4                          
## age            4 1654.0 1675.1 -823.02   1646.0 15.3571  2  0.0004626 ***
## urnsize        5 1655.7 1682.1 -822.87   1645.7  0.3081  1  0.5788791    
## LR             6 1657.7 1689.3 -822.85   1645.7  0.0342  1  0.8532104    
## trialType      9 1651.0 1698.5 -816.52   1633.0 12.6669  3  0.0054153 ** 
## interTTLR     12 1645.6 1708.9 -810.81   1621.6 11.4086  3  0.0097099 ** 
## trialOrder2   13 1638.1 1706.6 -806.05   1612.1  9.5215  1  0.0020308 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(null, age, urnsize, LR, trialType, interTTLR, trialOrder2, Inter_trialOrder2Age)

## Data: data
## Models:
## null: correctAns ~ 1 + (1 | sID_new)
## age: correctAns ~ age + (1 | sID_new)
## urnsize: correctAns ~ age + urnSize + (1 | sID_new)
## LR: correctAns ~ age + urnSize + LR + (1 | sID_new)
## trialType: correctAns ~ age + urnSize + LR + trialType + (1 | sID_new)
## interTTLR: correctAns ~ age + urnSize + LR + trialType + trialType * LR + (1 | sID_new)
## trialOrder2: correctAns ~ age + urnSize + LR + trialType + trialType * LR + trialOrd2 + (1 | sID_new)
## Inter_trialOrder2Age: correctAns ~ age + urnSize + LR + trialType + trialType * LR + trialOrd2 + trialOrd2 * age + (1 | sID_new)
##                      npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)
## null                    2 1665.4 1675.9 -830.70   1661.4                      
## age                     4 1654.0 1675.1 -823.02   1646.0 15.3571  2  0.0004626
## urnsize                 5 1655.7 1682.1 -822.87   1645.7  0.3081  1  0.5788791
## LR                      6 1657.7 1689.3 -822.85   1645.7  0.0342  1  0.8532104
## trialType               9 1651.0 1698.5 -816.52   1633.0 12.6669  3  0.0054153
## interTTLR              12 1645.6 1708.9 -810.81   1621.6 11.4086  3  0.0097099
## trialOrder2            13 1638.1 1706.6 -806.05   1612.1  9.5215  1  0.0020308
## Inter_trialOrder2Age   15 1640.1 1719.2 -805.04   1610.1  2.0173  2  0.3647171
##                         
## null                    
## age                  ***
## urnsize                 
## LR                      
## trialType            ** 
## interTTLR            ** 
## trialOrder2          ** 
## Inter_trialOrder2Age    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

gender = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + trialOrd + sex + (1|sID_new), data=data, family="binomial", control = contr) # take out the interactions that were not a sign. improvement


anova(null, age, urnsize, LR, trialType, interTTLR, trialOrder, gender)

## Data: data
## Models:
## null: correctAns ~ 1 + (1 | sID_new)
## age: correctAns ~ age + (1 | sID_new)
## urnsize: correctAns ~ age + urnSize + (1 | sID_new)
## LR: correctAns ~ age + urnSize + LR + (1 | sID_new)
## trialType: correctAns ~ age + urnSize + LR + trialType + (1 | sID_new)
## interTTLR: correctAns ~ age + urnSize + LR + trialType + trialType * LR + (1 | sID_new)
## trialOrder: correctAns ~ age + urnSize + LR + trialType + trialType * LR + trialOrd + (1 | sID_new)
## gender: correctAns ~ age + urnSize + LR + trialType + trialType * LR + trialOrd + sex + (1 | sID_new)
##            npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)    
## null          2 1665.4 1675.9 -830.70   1661.4                          
## age           4 1654.0 1675.1 -823.02   1646.0 15.3571  2  0.0004626 ***
## urnsize       5 1655.7 1682.1 -822.87   1645.7  0.3081  1  0.5788791    
## LR            6 1657.7 1689.3 -822.85   1645.7  0.0342  1  0.8532104    
## trialType     9 1651.0 1698.5 -816.52   1633.0 12.6669  3  0.0054153 ** 
## interTTLR    12 1645.6 1708.9 -810.81   1621.6 11.4086  3  0.0097099 ** 
## trialOrder   13 1635.1 1703.7 -804.56   1609.1 12.4969  1  0.0004076 ***
## gender       14 1637.1 1710.9 -804.53   1609.1  0.0582  1  0.8093636    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#learning effect in 8 first trials to compare results with Gualtieri et al.
data_8trials <- subset(data, trialOrd <9)

# null model
null_8trials = glmer(correctAns ~ 1 + (1|sID_new), data=data_8trials, family="binomial", control = contr)

# add age
age_8trials = glmer(correctAns ~ age + (1|sID_new), data=data_8trials, family="binomial", control = contr)

# add urnsize
urnsize_8trials = glmer(correctAns ~ age + urnSize + (1|sID_new), data=data_8trials, family="binomial", control = contr)

# add LR 
LR_8trials = glmer(correctAns ~ age + urnSize + LR + (1|sID_new), data=data_8trials, family="binomial", control = contr)

# add trial type
trialType_8trials = glmer(correctAns ~ age + urnSize + LR + trialType + (1|sID_new), data=data_8trials, family="binomial", control = contr)


# add interaction between LR and trial type
interTTLR_8trials = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + (1|sID_new), data=data_8trials, family="binomial", control = contr)


#add trial order
trialOrder_8trials = glmer(correctAns ~ age + urnSize + LR + trialType + trialType*LR + trialOrd + (1|sID_new), data=data_8trials, family="binomial", control = contr)

anova(null_8trials, age_8trials, urnsize_8trials, LR_8trials, trialType_8trials, interTTLR_8trials, trialOrder_8trials)

## Data: data_8trials
## Models:
## null_8trials: correctAns ~ 1 + (1 | sID_new)
## age_8trials: correctAns ~ age + (1 | sID_new)
## urnsize_8trials: correctAns ~ age + urnSize + (1 | sID_new)
## LR_8trials: correctAns ~ age + urnSize + LR + (1 | sID_new)
## trialType_8trials: correctAns ~ age + urnSize + LR + trialType + (1 | sID_new)
## interTTLR_8trials: correctAns ~ age + urnSize + LR + trialType + trialType * LR + (1 | sID_new)
## trialOrder_8trials: correctAns ~ age + urnSize + LR + trialType + trialType * LR + trialOrd + (1 | sID_new)
##                    npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)   
## null_8trials          2 888.56 897.72 -442.28   884.56                         
## age_8trials           4 885.06 903.38 -438.53   877.06  7.4947  2    0.02358 * 
## urnsize_8trials       5 885.83 908.72 -437.91   875.83  1.2357  1    0.26631   
## LR_8trials            6 887.82 915.29 -437.91   875.82  0.0114  1    0.91503   
## trialType_8trials     9 880.64 921.85 -431.32   862.64 13.1766  3    0.00427 **
## interTTLR_8trials    12 881.92 936.87 -428.96   857.92  4.7205  3    0.19345   
## trialOrder_8trials   13 881.43 940.96 -427.71   855.43  2.4917  1    0.11445   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

no effect of learning in the first 8 trials

##Learning effect

data$trialOrd <- as.numeric(data$trialOrd)

plotdata_trialOrd <- data %>%
  group_by(age, trialOrd, correctAns2) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'age', 'trialOrd'. You can override using
## the `.groups` argument.

plotdata_trialOrd <- subset(plotdata_trialOrd, correctAns2 == "Correct")

theme_set(theme_light(base_size = 20, base_family = "Poppins"))


reg <- ggplot(plotdata_trialOrd, aes(x=trialOrd, y=pct)) +
  facet_grid(~ age) +
  geom_point(shape=1) +    
  geom_smooth(method=lm,   
              se=FALSE, level = .95, colour = "#045a8d")+
  myTheme+
  theme(axis.title=element_text(size=18),axis.text.x = element_text(size = 12),axis.text.y = element_text(size = 18),
        legend.position = "none")+
  labs(x = "Trial order", y = "Proportion of correct choices") +
  scale_y_continuous(breaks = c(0,0.25,0.50,0.75,1), limits = c(0,1))+
  scale_x_continuous(limits = c(0, 17), breaks=seq(1, 16, 1), expand = c(0,0))+
  stat_cor(method = "pearson", label.x = 1, label.y = 0.2, size = 5, digits = 2, alternative = "two.sided")

reg

## `geom_smooth()` using formula = 'y ~ x'

ggsave("learning_effect.pdf",width=10,height=5)

## `geom_smooth()` using formula = 'y ~ x'

ggsave("learning_effect.svg",width=10,height=5)

## `geom_smooth()` using formula = 'y ~ x'

# compare if correlation in 5-year-olds is higher from those in 4- and 3-year-olds
library(psych)

## 
## Attaching package: 'psych'

## The following object is masked from 'package:car':
## 
##     logit

## The following objects are masked from 'package:scales':
## 
##     alpha, rescale

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

paired.r(0.77,0.57,n=16, twotailed=T) # 5 vs. 4 years

## Call: paired.r(xy = 0.77, xz = 0.57, n = 16, twotailed = T)
## [1] "test of difference between two independent correlations"
## z = 0.95  With probability =  0.34

paired.r(0.72,0.16,n=16, twotailed=T) # 5 vs. 3 years

## Call: paired.r(xy = 0.72, xz = 0.16, n = 16, twotailed = T)
## [1] "test of difference between two independent correlations"
## z = 1.9  With probability =  0.06

3.6 Learning Effect due to trial-error-learning?

# delete trials 1 to 4 
data_learning <- read_csv("Exp_data.csv")

## Rows: 1980 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): sex, col1, col2, correctUrn, correctAns, drawnCol, winCol, feedBack...
## dbl (9): subjId, age, trialNr, n1Urn1, n2Urn1, n1Urn2, n2Urn2, selectedUrn, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data_learning <- subset(data_learning, trialNr > 4)

data_learning$trialOrd <- rep(1:18,90)

data_learning <- subset(data_learning, select = c(sID_new, age, correctAns, trialOrd, feedBack))





data_learning$age <- factor(data_learning$age, levels = c(3:5), labels = c("3 years", "4 years", "5 years"))

data_learning$feedBack <- factor(data_learning$feedBack, levels = c("positive","negative"))

data_learning$trialOrd <- as.numeric(data_learning$trialOrd)

data_learning$correctAns <- as.numeric(data_learning$correctAns)

# Define lag (number of trials to look back for feedback)
lag <- 1  # Look back at feedback from the previous trial

data_grouped <- data_learning %>%
  group_by(sID_new, age) %>%  # Group by individual ID
  mutate(correctAns.prev = lag(correctAns, 1),
         feedBack.prev = lag(feedBack, 1)) %>%  
  ungroup() 


data_grouped <- subset(data_grouped, trialOrd != 1)

data_grouped <- data_grouped %>% 
  mutate(correctAns_prev = if_else(correctAns.prev == 1, "Correct", "Wrong"))



data_grouped_5years <- subset(data_grouped, age == "5 years")
data_grouped_4years <- subset(data_grouped, age == "4 years")
data_grouped_3years <- subset(data_grouped, age == "3 years")

# Logistic regression model (correctness as outcome)
#glm_0 <- glm(correctAns ~ 1 + (1|sID_new), data = data_grouped, family = binomial)

glm_learning_5years <- glm(correctAns ~ feedBack.prev * correctAns_prev + (1|sID_new), data = data_grouped_5years, family = binomial)
glm_learning_4years <- glm(correctAns ~ feedBack.prev * correctAns_prev + (1|sID_new), data = data_grouped_4years, family = binomial)
glm_learning_3years <- glm(correctAns ~ feedBack.prev * correctAns_prev + (1|sID_new), data = data_grouped_3years, family = binomial)
# Summary of the model
summary(glm_learning_5years)

## 
## Call:
## glm(formula = correctAns ~ feedBack.prev * correctAns_prev + 
##     (1 | sID_new), family = binomial, data = data_grouped_5years)
## 
## Coefficients: (1 not defined because of singularities)
##                                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                                  1.6713     0.1590  10.512   <2e-16
## feedBack.prevnegative                        0.1811     0.3124   0.580    0.562
## correctAns_prevWrong                        -0.2362     0.5224  -0.452    0.651
## 1 | sID_newTRUE                                  NA         NA      NA       NA
## feedBack.prevnegative:correctAns_prevWrong  -0.7201     0.6447  -1.117    0.264
##                                               
## (Intercept)                                ***
## feedBack.prevnegative                         
## correctAns_prevWrong                          
## 1 | sID_newTRUE                               
## feedBack.prevnegative:correctAns_prevWrong    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 469.10  on 509  degrees of freedom
## Residual deviance: 461.63  on 506  degrees of freedom
## AIC: 469.63
## 
## Number of Fisher Scoring iterations: 4

summary(glm_learning_4years)

## 
## Call:
## glm(formula = correctAns ~ feedBack.prev * correctAns_prev + 
##     (1 | sID_new), family = binomial, data = data_grouped_4years)
## 
## Coefficients: (1 not defined because of singularities)
##                                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                                 0.98261    0.14012   7.013 2.34e-12
## feedBack.prevnegative                       0.22933    0.26747   0.857    0.391
## correctAns_prevWrong                       -0.35009    0.33122  -1.057    0.291
## 1 | sID_newTRUE                                  NA         NA      NA       NA
## feedBack.prevnegative:correctAns_prevWrong -0.08866    0.45862  -0.193    0.847
##                                               
## (Intercept)                                ***
## feedBack.prevnegative                         
## correctAns_prevWrong                          
## 1 | sID_newTRUE                               
## feedBack.prevnegative:correctAns_prevWrong    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 603.30  on 509  degrees of freedom
## Residual deviance: 600.15  on 506  degrees of freedom
## AIC: 608.15
## 
## Number of Fisher Scoring iterations: 4

summary(glm_learning_3years)

## 
## Call:
## glm(formula = correctAns ~ feedBack.prev * correctAns_prev + 
##     (1 | sID_new), family = binomial, data = data_grouped_3years)
## 
## Coefficients: (1 not defined because of singularities)
##                                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                                  0.7781     0.1359   5.725 1.04e-08
## feedBack.prevnegative                       -0.0849     0.2650  -0.320    0.749
## correctAns_prevWrong                         0.1913     0.3793   0.505    0.614
## 1 | sID_newTRUE                                  NA         NA      NA       NA
## feedBack.prevnegative:correctAns_prevWrong  -0.3249     0.4778  -0.680    0.497
##                                               
## (Intercept)                                ***
## feedBack.prevnegative                         
## correctAns_prevWrong                          
## 1 | sID_newTRUE                               
## feedBack.prevnegative:correctAns_prevWrong    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 645.01  on 509  degrees of freedom
## Residual deviance: 643.52  on 506  degrees of freedom
## AIC: 651.52
## 
## Number of Fisher Scoring iterations: 4

#anova(glm_0,glm_learning)

No significant predictors. Hence, we don’t expect there to be a learning effect that is based on trial-error learning.

3.7 Individual-level analyses

# delete trials 1 to 4 

data <- subset(data, trialNr > 4)

data <- subset(data, select = c(sID_new,age,sex,trialNr,selectedUrn,correctAns))

#new column with correct answer text
data <- data %>% 
  mutate(
         given_answer = if_else(selectedUrn == 2, 0, 1))

data <- subset(data, trialNr > 6)

data$age <- factor(data$age, levels = c(3:5), labels = c("3 years", "4 years", "5 years"))
data$trialNr <- factor(data$trialNr, levels = c(7:22))

##new data frame with strategies
#create new data frame with the urn that should be chose according to different strategies
sID_new <- rep(1:90, each = 16) 
trialNr <- rep(7:22,90) 
strat_ratio <- rep(1:0,720) 
strat_more_good <- rep(c(1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0),90)
strat_less_bad <- rep(c(1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,1),90) 
strat_random <- rep(0.5,1440) 
strat_good_urn <- rep(c(1,0,0.5,0.5,0.5,0.5,0.5,0.5,1,0,0.5,0.5,0.5,0.5,0.5,0.5),90) 

strategies <- data.frame(sID_new, trialNr, strat_random, strat_ratio, strat_more_good, strat_less_bad, strat_good_urn)

strategies$trialNr <- factor(strategies$trialNr, levels = c(7:22))

# merge both data frames
data_strategies<- merge(data, strategies, by = c("sID_new", "trialNr"))
data_strategies <- left_join(data, strategies)

## Joining with `by = join_by(sID_new, trialNr)`

data_strategies <- subset(data_strategies, select = c(sID_new,age,sex,trialNr,given_answer,strat_random,strat_ratio,strat_more_good,strat_less_bad,strat_good_urn))

# calculate absolute difference between given answer and different strategies

data_strategies <- data_strategies %>%
  mutate(diff_strat_random = abs(given_answer - strat_random))
data_strategies <- data_strategies %>%
  mutate(diff_strat_ratio = abs(given_answer - strat_ratio))
data_strategies <- data_strategies %>%
  mutate(diff_strat_more_good = abs(given_answer - strat_more_good))
data_strategies <- data_strategies %>%
  mutate(diff_strat_less_bad = abs(given_answer - strat_less_bad))
data_strategies <- data_strategies %>%
  mutate(diff_strat_good_urn = abs(given_answer - strat_good_urn))

#summarize data to get the sum of new columns
data_strategies_sum <- data_strategies %>%
  group_by(sID_new,age) %>%
  summarise(diff_strat_random = sum(diff_strat_random),
            diff_strat_ratio = sum(diff_strat_ratio),
            diff_strat_more_good = sum(diff_strat_more_good),
            diff_strat_less_bad = sum(diff_strat_less_bad),
            diff_strat_good_urn = sum(diff_strat_good_urn))

## `summarise()` has grouped output by 'sID_new'. You can override using the
## `.groups` argument.

data_strategies_final <- data_strategies_sum %>%
   mutate(random = (16 - diff_strat_random) / 16,
          ratio = (16 - diff_strat_ratio) / 16,
          more_good = (16 - diff_strat_more_good) / 16,
          less_bad = (16 - diff_strat_less_bad) / 16,
          good_urn = (16 - diff_strat_good_urn) / 16)

data_strategies_final <- subset(data_strategies_final, select = c(sID_new,age,random,ratio,more_good,less_bad,good_urn))

#final step, qualitative choice
data_strategies_final <- data_strategies_final %>%
   mutate(random_qual = ifelse(random == max(c(random, ratio, more_good, less_bad, good_urn)),1, 0),
          ratio_qual = ifelse(ratio == max(c(random, ratio, more_good, less_bad, good_urn)),1, 0),
          more_good_qual = ifelse(more_good == max(c(random, ratio, more_good, less_bad, good_urn)),1,0),
          less_bad_qual = ifelse(less_bad == max(c(random, ratio, more_good, less_bad, good_urn)),1,0),
          good_urn_qual = ifelse(good_urn == max(c(random, ratio, more_good, less_bad, good_urn)),1,0))

#replace by 0 all 1 if random =1
data_strategies_final <- data_strategies_final %>%
  mutate(
    across(c(ratio_qual, more_good_qual, less_bad_qual, good_urn_qual), ~ ifelse(.x == 1 & random_qual == 1, 0, .x)),
    .after = random_qual # Ensure the new column comes after random_qual
  )

#weigh 
#divide by the sum of 1s
data_strategies_final <- data_strategies_final %>%
  mutate(random_qual_weighed = random_qual / sum(random_qual,ratio_qual, more_good_qual, less_bad_qual, good_urn_qual),
         ratio_qual_weighed = ratio_qual / sum(random_qual,ratio_qual, more_good_qual, less_bad_qual, good_urn_qual),
         more_good_qual_weighed = more_good_qual / sum(random_qual,ratio_qual, more_good_qual, less_bad_qual, good_urn_qual),
         less_bad_qual_weighed = less_bad_qual / sum(random_qual,ratio_qual, more_good_qual, less_bad_qual, good_urn_qual),
         good_urn_qual_weighed = good_urn_qual / sum(random_qual,ratio_qual, more_good_qual, less_bad_qual, good_urn_qual))

#final data frame with weighed values
plotdata_strategies <- subset(data_strategies_final, select = c(sID_new,age,random_qual_weighed,ratio_qual_weighed,more_good_qual_weighed,less_bad_qual_weighed,good_urn_qual_weighed))



plotdata_strategies <- rename(plotdata_strategies, random = random_qual_weighed)
plotdata_strategies <- rename(plotdata_strategies, ratio = ratio_qual_weighed)
plotdata_strategies <- rename(plotdata_strategies, more_good = more_good_qual_weighed)
plotdata_strategies <- rename(plotdata_strategies, less_bad = less_bad_qual_weighed)
plotdata_strategies <- rename(plotdata_strategies, good_urn = good_urn_qual_weighed)


#transform in long format
plotdata_strategies_long <- plotdata_strategies %>%
  gather(key = "strategy", value = "count", -sID_new, -age)

#plot data
plotdata <- plotdata_strategies_long

age_strategy_summary <- plotdata %>%
  group_by(age, strategy) %>%
  summarize(average_count = mean(count)*100)

## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.

round(age_strategy_summary$average_count,3)

##  [1]  0.833 10.833 40.833  3.333 44.167  0.000 12.222 20.556  6.667 60.556
## [11]  0.000 12.778 16.111  3.333 67.778

g <- ggplot(age_strategy_summary, aes(x = age, y = average_count, fill = factor(strategy, levels = c("good_urn", "random", "less_bad", "more_good", "ratio")))) +
  geom_bar(stat = "identity") +
  labs( x = "Age", y = "Percentage") +  # Add labels and title (corrected "Percentage" to "Average Count")
  coord_cartesian(ylim = c(0, 100)) +  # Set y-axis limits from 0 to 100 
  theme_classic() +
  scale_fill_brewer(palette = "Pastel1", labels = c("Good urn", "Random", "Less bad", "More good", "Ratio"), name = "Strategy") 


g

ggsave("ind_strategies_bars.pdf",width=6,height=5)

ggsave("ind_strategies_bars.svg",width=6,height=5)

Analysis