# Rnotes for class discussion of Thursday, Nov. 16, 2006

# We use the following data preparation:

data <- read.csv("../../Childes_11-07-06.csv", comment.char="")

> names(data)
 [1] "X"                              "utterance"
 [3] "verb"                           "theme"
 [5] "recipient"                      "child"
 [7] "age"                            "file"
 [9] "NP_PP"                          "NP_NP"
[11] "null.prep"                      "elliptical"
[13] "preposed"                       "question"
[15] "theme.animate"                  "theme.toy"
[17] "recip.animate"                  "recip.toy"
[19] "theme.overt"                    "recip.overt"
[21] "theme.Nwords"                   "recip.Nwords"
[23] "V.in.previous.10.lines"         "constr.in.previous.10.lines"
[25] "theme.in.previous.10.lines"     "theme.in.previous.10.lines.bin"
[27] "theme.is.spkr"                  "recip.in.previous.10.lines"
[29] "recip.in.previous.10.lines.bin" "recip.is.spkr"
[31] "syncat.of.theme"                "syncat.of.recip"
[33] "v.particle."                    "idiomatic."
[35] "abstract.theme."                "deverbal.theme."
[37] "Notes"                          "age.numeric"
[39] "number"                         "recip.animate2"
[41] "syncat.of.theme.pron"           "syncat.of.recip.pron"
>

# add more factors

data$recip.given <- rep("given", nrow(data))

data[data$recip.in.previous.10.lines == "no",]$recip.given <-
rep("new", nrow(data[data$recip.in.previous.10.lines == "no",]))

data$recip.given <- factor(data$recip.given)


data$theme.given <- rep("given", nrow(data))

data[data$theme.in.previous.10.lines == "no",]$theme.given <-
rep("new", nrow(data[data$theme.in.previous.10.lines == "no",]))

data$theme.given <- factor(data$theme.given)


data$paral <- rep(1, nrow(data))

data[data$constr.in.previous.10.lines == "no",]$paral <-
rep(0, nrow(data[data$constr.in.previous.10.lines == "no",]))


# subtract some factors
#
# 
# over half the occurrences of the verb "show" have CP (clausal) complements,
# which very rarely alternate
#

data.all <- data
#> nrow(data.all)
#[1] 638
#>

data <- data[data$syncat.of.theme!="CP",]
#> nrow(data)
#[1] 542
#>

################

# First we go through the exercises given in class


library(lme4)

# i) original lmer model

fit.lmer <- lmer(NP_PP ~ recip.animate + verb + theme.given +
recip.given + theme.Nwords + paral + (1 | child), data = data,
family = 'binomial', method = "Laplace")

Estimated scale (compare to  1 )  1.083942

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)
(Intercept)      1.3713     0.5601   2.448 0.014345 *
recip.animate    0.4304     0.3668   1.173 0.240733
verbshow        -0.6893     0.3867  -1.782 0.074689 .
theme.givennew  -1.0316     0.2900  -3.557 0.000376 ***
recip.givennew   0.8709     0.2910   2.993 0.002765 **
theme.Nwords    -1.6401     0.2499  -6.564 5.24e-11 ***
paral           -1.0238     0.2938  -3.484 0.000494 ***
---

Correlation of Fixed Effects:
            (Intr) rcp.nm vrbshw thm.gv rcp.gv thm.Nw
recip.animt -0.590
verbshow    -0.115 -0.088
theme.gvnnw -0.089  0.024  0.059
recip.gvnnw -0.185  0.002 -0.182 -0.109
theme.Nwrds -0.622  0.060  0.078 -0.150  0.035
paral       -0.264 -0.007  0.036  0.211  0.240  0.092
>


# ii) new model w verb as crossed ranef
#     note "verb" has to be removed as fixed effect because of errors

fit.v.lmer <- lmer(NP_PP ~ recip.animate + theme.given +
recip.given + theme.Nwords + paral + (1 | child) + (1 | verb),
data = data, family = 'binomial', method = "Laplace")
Estimated scale (compare to  1 )  1.078645

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)
(Intercept)      1.1469     0.5977   1.919 0.055006 .
recip.animate    0.4062     0.3663   1.109 0.267481
theme.givennew  -1.0214     0.2898  -3.525 0.000423 ***
recip.givennew   0.8268     0.2877   2.874 0.004051 **
theme.Nwords    -1.6285     0.2490  -6.541 6.13e-11 ***
paral           -1.0185     0.2936  -3.469 0.000522 ***
---

Correlation of Fixed Effects:
            (Intr) rcp.nm thm.gv rcp.gv thm.Nw
recip.animt -0.573
theme.gvnnw -0.073  0.027
recip.gvnnw -0.211 -0.008 -0.097
theme.Nwrds -0.568  0.067 -0.155  0.041
paral       -0.240 -0.004  0.213  0.250  0.085
>


# iii) compare (i) and (ii) -- must have same fixed effects, so remove
# "verb" from fit.lmer -- it's barely significant anyway:

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)
(Intercept)      0.8380     0.5362   1.563   0.1181
recip.animate    0.4455     0.3603   1.237   0.2163
verbshow        -0.6583     0.3813  -1.726   0.0843 .
theme.givennew  -0.8187     0.2845  -2.878   0.0040 **
recip.givennew   1.1577     0.2788   4.153 3.28e-05 ***
theme.Nwords    -1.5960     0.2460  -6.489 8.64e-11 ***
-

# refit w/o verb factor:

fit0.lmer <- lmer(NP_PP ~ recip.animate + theme.given +
recip.given + theme.Nwords + paral + (1 | child), data = data,
family = 'binomial', method = "Laplace")

# use anova() to compare ranefs of nexted models w same fixed effects:

anova(fit0.lmer, fit.v.lmer)
Data: data
Models:
fit0.lmer: NP_PP ~ recip.animate + theme.given + recip.given + theme.Nwords +
fit.v.lmer:     paral + (1 | child)
fit0.lmer: NP_PP ~ recip.animate + theme.given + recip.given + theme.Nwords +
fit.v.lmer:     paral + (1 | child) + (1 | verb)
           Df     AIC     BIC  logLik  Chisq Chi Df Pr(>Chisq)
fit0.lmer   7  458.31  488.37 -222.15
fit.v.lmer  8  459.83  494.19 -221.92 0.4743      1      0.491

# no evidence that verb adds anything as a random effect,
# so we will stay with our original model, fit.lmer

# iv)
# compute C and Dxy of fit.lmer

source("functions/C.r")

probs <- 1/(1 + exp(-fitted(fit.lmer)))

concordance.fnc(as.factor(data$NP_PP), probs)

> concordance.fnc(as.factor(data$NP_PP), probs)
$C
[1] 0.823168

$Dxy
[1] 0.646336

>

# v)
# plot fit of fit.lmer
# 

#############
# needs library(Design) for cut2()
#
# glitch with Baayen's function; here's my quick ugly version

library(Design)

plot.lmer.fit <- function(fitobject, response){

 model.prob <- 1/(1+exp(-fitted(fitobject)))

 try.eqprob <- cut2(model.prob, 
                    seq(range(model.prob)[1],range(model.prob)[2], 0.1), 
                    levels.mean = T)

 temp <- lapply(split(response, try.eqprob), mean)

 tbl.eqprob <-  unlist(temp)

 plot(as.numeric(names(tbl.eqprob)),tbl.eqprob, ylim = c(0,1), xlim =
 range(model.prob), pch = 19,
 xlab = "Grouped predicted probabilities of PP  realization",
 ylab = "Proportions of observed PP realization")

abline(0,1)
}

plot.lmer.fit(fit.lmer,data$NP_PP)


##############
##############
# interpretation
#

# our previous lrm models showed significant harmonic alignment
  effects

# final lrm model as of BLS abstract date:

fit.bls.lrm <- lrm(NP_PP ~ age.numeric + recip.animate +
 theme.animate + log(recip.Nwords) + verb + syncat.of.theme.pron,
 data = data, x=T,y=T)


       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy
       542      3e-06     403.23          6          0      0.975       0.95
     Gamma      Tau-a         R2      Brier
     0.951       0.33      0.802       0.04

                     Coef    S.E.   Wald Z P
Intercept            -3.2368 1.1054 -2.93  0.0034
age.numeric          -0.7079 0.3216 -2.20  0.0277
recip.animate         1.5450 0.6412  2.41  0.0160
theme.animate        -2.5091 1.7055 -1.47  0.1412
recip.Nwords          7.3017 0.7132 10.24  0.0000
verb=show            -0.7957 0.6462 -1.23  0.2182
syncat.of.theme.pron  2.3763 0.4548  5.22  0.0000

# interpretation of BLS lrm model:
#
# the Harmonic Alignment hypothesis discussed in Bresnan, Cueni,
# Nikitina, and Baayen supposes that shorter, more discourse-given, definite,
# pronominal, and animate arguments are syntactically placed before
# longer, more discourse-new, indefinite, nominal, and inanimate
# arguments.
#
# in the binomial family of models with logit link function,
# log odds > 0 favor PP and log odds < 0, NP
# 
# so since Coef of recip.Nwords and syncat.of.theme.pron are both > 0, 
# they favor PP, harmonically placing longer recips second and
# pronouns first 
#
# but the Coef of recip.animate is also > 0, meaning that animate
# recips favor PP.  HA predicts the opposite
#
# we remarked that this model could be well approximated by a minimal
# model having only length of the recipient and pronominality of the
# theme as predictors, but we were interested in the question of
# whether any linguistic properties show harmonic alignment in
# children's dative productions
# 

# Uriel's final lrm model is the same as our minimal model with the
#  addition of a clever measure of syllable weight for the theme:
# (see below for his function that counts nvowels)

fit.ur.lrm <- lrm(NP_PP ~ age.numeric + log(recip.Nwords) +
 syncat.of.theme.pron + log(theme.Nvowels), data = data, x=T,y=T)

fit.ur.lrm

       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy
       542      1e-11     401.87          4          0      0.973      0.947
     Gamma      Tau-a         R2      Brier
     0.948      0.329        0.8      0.043

                     Coef    S.E.   Wald Z P
Intercept             0.1222 1.3262  0.09  0.9266
age.numeric          -0.6403 0.3002 -2.13  0.0329
recip.Nwords          6.8170 0.6539 10.43  0.0000
syncat.of.theme.pron  1.6754 0.5543  3.02  0.0025
theme.Nvowels        -1.9411 0.7778 -2.50  0.0126

# Uriel's model is also consistent with the HA hypothesis
#
# interpretation:
#
# longer recip favors PP; pronominal theme favors PP; more theme
# syllables (measured by graphemic vowels) favors NP
# 
#

# However, both of these models treat the dative utterances as
# independent of the children producing them; even though "child"
# turned out not to be a significant factor, the models still suffer
# from the false assumption of independence of observations from the
# same children

# Let's look at the effect of the child factor in Uriel's model,
# using bootstrap validation by sampling with replacement from
# "child".  Recall the discussion of the method in Bresnan et al:
# we use the l.r. model as a "working independence" model, and then
# make multiple copies of the data, randomly sampling the child
# groups with replacements.  Thus we might get 3 copies of Trevor's
# data and none of Abe's in one copy; etc.  

# This method is implemented with bootcov() in the Design library.

bootcov(fit.ur.lrm, data$child, B = 10000)

# if you don't have a lot 
# of RAM, you might want to keep B smaller, but you should really
# do a lot of these simulations to test whether the differences among
# children greatly affect the model
#
# the result:

Logistic Regression Model

lrm(formula = NP_PP ~ age.numeric + log(recip.Nwords) + syncat.of.theme.pron +
    log(theme.Nvowels), data = data, x = T, y = T)


Frequencies of Responses
  0   1
421 121

       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy
       542      1e-11     401.87          4          0      0.973      0.947
     Gamma      Tau-a         R2      Brier
     0.948      0.329        0.8      0.043

                     Coef    S.E.   Wald Z P
Intercept             0.1222 4.6676  0.03  0.9791
age.numeric          -0.6403 1.1177 -0.57  0.5668 <-
recip.Nwords          6.8170 2.2354  3.05  0.0023  <-
syncat.of.theme.pron  1.6754 1.3752  1.22  0.2231 <-
theme.Nvowels        -1.9411 0.9071 -2.14  0.0324  <-

>

# only the length covariates remain significant, and their level of
# significance is weakened -- some would say that these are
# "processing" constraints that show us little about syntax and
# harmonic alignment and are not surprising, since we already know that
# young children's processing abilities are limited in a way
# that affects their syntactic productions

# Now we check the model used for our BLS abstract:

> bootcov(fit.bls.lrm, data$child, B = 10000)
singular information matrix in lrm.fit (rank= 6 ).  Offending variable(s):
verb

> table(data$verb)

give show
 462   80
>

# there are so few "show" verbs, so to avoid massive fit failures on
# the copied datasets,  we will simply remove this
# variable. We will also remove theme.animate to avoid similar
# problems:

> table(data$theme.animate)

  0   1
529  13
>

#
# So here is our model trimmed for bootstrap cluster validation;
# **we are still trying to keep some linguistic nonprocessing
# predictors in the model in order to learn about harmonic alignment
# in children's dative syntax**
#

fit.bls.lrm <- lrm(NP_PP ~ age.numeric + recip.animate +
 log(recip.Nwords) + syncat.of.theme.pron,
 data = data, x=T,y=T)

                     Coef    S.E.   Wald Z P
Intercept            -3.0335 1.0684 -2.84  0.0045
age.numeric          -0.7217 0.3119 -2.31  0.0207
recip.animate         1.2722 0.6057  2.10  0.0357
recip.Nwords          7.0546 0.6677 10.57  0.0000
syncat.of.theme.pron  2.3764 0.4516  5.26  0.0000

>

bootcov(fit.bls.lrm, data$child, B = 10000)

[...]

       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy
       542      1e-06     401.65          5          0      0.974      0.947
     Gamma      Tau-a         R2      Brier
     0.948      0.329        0.8       0.04

                     Coef    S.E.   Wald Z P
Intercept            -2.9940 11.384 -0.26  0.7925
age.numeric          -0.7602  2.200 -0.35  0.7297
recip.animate         1.3813  1.438  0.96  0.3367
theme.animate        -2.3457  2.188 -1.07  0.2837
recip.Nwords          7.1844  6.852  1.05  0.2944
syncat.of.theme.pron  2.3509  1.991  1.18  0.2377

Warning message:
fit failure in 10 resamples.  Might try increasing maxit in: bootcov(fit.bls.lrm, data$child, B = 10000)
>

#
# Gasp.  it is even worse!
# nothing is significant at all!  
#
# in effect, we have no evidence of any 'linguistic' harmonic
# alignment apart from grammatical and phonological complexity measures
#

######################
#
# This is our motivation for using the mixed models.
#
# we use the mixed model (aka "multilevel" models) to learn whether
# anything linguistically interesting appears in our data when we model
# "child" as a level of random variation
#
# the model makes an adjustment (Best Linear Unbiased Prediction,
# "BLUP") for each child, representing 
# that child's bias toward or against PP dative responses compared to
# the mean of the child population:
#

> ranef(fit.lmer)
An object of class ranef.lmer
[[1]]
       (Intercept)
abe     0.32767282
adam   -0.50141438
naomi  -0.26841577
nina    0.27657861
sarah   0.08772794
shem    0.65761564
trevor -0.51965796


> summary(fit.lmer)

Generalized linear mixed model fit using Laplace
Formula: NP_PP ~ recip.animate + verb + theme.given + recip.given + theme.Nwords +      paral + (1 | child)
   Data: data
 Family: binomial(logit link)
 AIC   BIC logLik deviance
 457 491.4 -220.5      441
Random effects:
 Groups Name        Variance Std.Dev.
 child  (Intercept) 0.28481  0.53368
number of obs: 542, groups: child, 7

Estimated scale (compare to  1 )  1.083942

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)
(Intercept)      1.3713     0.5601   2.448 0.014345 *
recip.animate    0.4304     0.3668   1.173 0.240733
verbshow        -0.6893     0.3867  -1.782 0.074689 .
theme.givennew  -1.0316     0.2900  -3.557 0.000376 ***
recip.givennew   0.8709     0.2910   2.993 0.002765 **
theme.Nwords    -1.6401     0.2499  -6.564 5.24e-11 ***
paral           -1.0238     0.2938  -3.484 0.000494 ***
---

Correlation of Fixed Effects:
            (Intr) rcp.nm vrbshw thm.gv rcp.gv thm.Nw
recip.animt -0.590
verbshow    -0.115 -0.088
theme.gvnnw -0.089  0.024  0.059
recip.gvnnw -0.185  0.002 -0.182 -0.109
theme.Nwrds -0.622  0.060  0.078 -0.150  0.035
paral       -0.264 -0.007  0.036  0.211  0.240  0.092
>

#
# interpretation:

# from the signs of the estimated coefficients of the fixed effects
# (in the "Estimate" column), we see that
# the model shows quantitative harmonic alignment of
# give/new themes and given/new recipients, as well as
# the known effects of the "processing" predictors of length of theme
# in words (a measure of grammatical complexity) and parallelism
# in the discourse context.  Further, the presence of a parallel PP dative in
# the context increases the likelihood of a PP production.
# 
# These each contribute independently to the response.
#
# The model also shows that the apparent anti-harmonic alignment of
# animacy seen in the data -- the tendency for animate recipients to be
# placed in (final) PP position ---

plot(xtabs(~ NP_PP + recip.animate, data = data))

# can now be explained in terms of discourse structure: disproportionately
# more *new* animates are placed in PP position than in first NP position:

plot(xtabs(~ NP_PP + recip.given + recip.animate, data = data))

# -- and newness/givenness has a significant effect on children's
# choice of dative structures.
# This effect could not be seen when the children's givenness data was
# pooled:

> mean(as.numeric(data[data$NP_PP==1,]$recip.given) -1) # proportion "new"
[1] 0.3884298                                           # in PPs over
                                                        # all children
#
# compare proportion "new" PPs for each child; it varies enormously
# with 3 children having more than the mean, three having less, and
# one having none

tapply((as.numeric(data[data$NP_PP==1,]$recip.given) -1),
      data[data$NP_PP==1,]$child, mean)                     

      abe      adam     naomi      nina     sarah      shem    trevor
0.2857143 0.6097561 0.3333333 0.1578947 0.7142857 0.5000000 0.0000000
>

################

# What if we add Uriel's theme.Nvowels to this model?
# Here's his function. 

nvowels = function(vec) {
             ret = c();
             for (x in as.character(vec)){
                ret = c(ret, nrow(data.frame(a=unlist(strsplit(x,"[aeuioy]")))))
                                        1
             }
             return(ret);
          }

data$recip.Nvowels = nvowels(data$recipient) # as shown in JB's emailed
                                             # comments, this covariate is
					     # pretty useless as a measure
# (to do: add capital vowels as vowels, as
# common digraphs like "ou" as single vowels)

data$theme.Nvowels = nvowels(data$theme) # this one is pretty good

fit.ur.lmer <- lmer(NP_PP ~ recip.animate + verb + theme.given +
recip.given + theme.Nwords + paral + theme.Nvowels + (1 | child), data = data,
family = 'binomial', method = "Laplace")

Estimated scale (compare to  1 )  1.055818

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)
(Intercept)      1.9139     0.5863   3.264  0.00110 **
recip.animate    0.3110     0.3722   0.836  0.40340
verbshow        -0.5635     0.3851  -1.463  0.14337
theme.givennew  -0.8860     0.2970  -2.983  0.00285 **
recip.givennew   0.7575     0.2941   2.575  0.01001 *
theme.Nwords    -0.7743     0.3571  -2.168  0.03014 *
paral           -1.0452     0.2958  -3.533  0.00041 ***
theme.Nvowels   -0.5801     0.1817  -3.194  0.00141 **
---

Correlation of Fixed Effects:
            (Intr) rcp.nm vrbshw thm.gv rcp.gv thm.Nw paral
recip.animt -0.600
verbshow    -0.093 -0.094
theme.gvnnw -0.038  0.038  0.044
recip.gvnnw -0.188 -0.014 -0.162 -0.134
theme.Nwrds -0.235  0.018  0.089 -0.052 -0.040
paral       -0.253 -0.012  0.030  0.203  0.254  0.034
theme.Nvwls -0.293  0.060 -0.048 -0.112  0.084 -0.698  0.027
>

# It is significant, but highly correlated with theme.Nwords --
#  witness -0.698 cell in correlation matrix
#

# Notice that adding this predictor reduces the variance in our
# children:

fit.lmer:

Random effects:
 Groups Name        Variance Std.Dev.
 child  (Intercept) 0.28481  0.53368

fit.ur.lmer:

Random effects:
 Groups Name        Variance Std.Dev.
 child  (Intercept) 0.21738  0.46624

# we could try dropping theme.Nwords, which is less significant

fit.ur2.lmer <- lmer(NP_PP ~ recip.animate + verb + theme.given +
recip.given + paral + theme.Nvowels + (1 | child), data = data,
family = 'binomial', method = "Laplace")


> fit.ur2.lmer
Generalized linear mixed model fit using Laplace
Formula: NP_PP ~ recip.animate + verb + theme.given + recip.given + paral +  theme.Nvowels + (1 | child)
   Data: data
 Family: binomial(logit link)
   AIC   BIC logLik deviance
 448.9 483.2 -216.4    432.9
Random effects:
 Groups Name        Variance Std.Dev.
 child  (Intercept) 0.1915   0.43761
number of obs: 542, groups: child, 7

Estimated scale (compare to  1 )  1.037307

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)
(Intercept)      1.6770     0.5682   2.952 0.003162 **
recip.animate    0.3255     0.3700   0.880 0.378916
verbshow        -0.4950     0.3790  -1.306 0.191588
theme.givennew  -0.9275     0.2956  -3.138 0.001700 **
recip.givennew   0.7359     0.2917   2.523 0.011629 *
paral           -1.0322     0.2934  -3.518 0.000435 ***
theme.Nvowels   -0.8860     0.1348  -6.573 4.92e-11 ***
---

Correlation of Fixed Effects:
            (Intr) rcp.nm vrbshw thm.gv rcp.gv paral
recip.animt -0.607
verbshow    -0.077 -0.094
theme.gvnnw -0.059  0.059  0.043
recip.gvnnw -0.194 -0.027 -0.158 -0.151
paral       -0.247 -0.013  0.019  0.203  0.263
theme.Nvwls -0.668  0.089  0.032 -0.200  0.074  0.063
>

#
# This seems to be our best model!
# It shows that complexity of the theme, measured in graphemic vowels,
# and parallelism are highly significant predictors of dative
# construction choice
# -- it also shows that harmonic alignment of
# given/new themes and recipients contributes independently to
# construction choice 
# 
# These effects are parallel to those seem in the multilevel
# model of adult spoken  dative productions
#

# Ta da.  We do have results.


#
#####################

Why does a model which conditions its estimates on each child seem to do
worse than the ordinary l.r. model?

the children show enormous variation in their responses:  

>  mean(data$NP_PP)
[1] 0.2232472

> tapply(data$NP_PP, data$child, mean)
      abe      adam     naomi      nina     sarah      shem    trevor
0.3134328 0.1782609 0.1363636 0.2483660 0.3043478 0.5333333 0.0937500
>