This terse document reproduces the results presented in the associated manuscript.

Asymptotic likelihood expression

First we use Stirling’s approximation \(\log(n!)\sim (n+1/2)\log(n)-n+\log(2\pi)/2\) to simplify equation 4 of the manuscript Following is a sage-compatible input [the \(\sqrt{2\pi}\) bit cancels]:

var('B n r')
pretty_print_default(True)
X=log(B-1) + (
+ ((B+n-r-2)*log(B+n-r-2)-(B+n-r-2)+log(B+n-r-2)/2)
- ((B+n  -1)*log(B+n  -1)-(B+n  -1)+log(B+n  -1)/2)
)
X.taylor(n,Infinity,2)

sage returns this:

\[ -(r+1)\log n -\frac{(2B-3)r-r^2+2B-2}{2n} -\frac{3(2B-3)r^2-6B^2-6(B^2-3B+2)r+12B-5}{12n^2} +\log(B-1)+\mathcal{O}(n^{-3}) \]

Note that if we use instead \(n~\sim n^ne^{-n}\sqrt{2\pi n}\left(1+\frac{1}{12n}\right)\), then only terms of \(\mathcal{O}(n^{-2})\) and above are changed; the first-order terms and indeed the zeroth order terms are unaffected. Anyway, after simplifying and taking an asymptotic expansion to order \(n^{-1}\), we find, to first order,

\[ \mathcal{S}=\log\mathcal{L}_{r,n}(a)=\log(B-1)-B\frac{r+1}{n} + K + \mathcal{O}\left(n^{-2}\right) \]

where \(K\) is a constant.

Definition of likelihood and log-likelihood in R

Below I give R idiom for the ikelihood expression given by equation 4 in the manuscript. Some inline comments are included.

f_single <- function(a,r,n,log=FALSE){  # not vectorised
    B <- 1/(1-a)
    if(log){
        out <- log(B-1) - lgamma(B+n) + lgamma(B+n-r-1)
    } else {  # not-log
        out <- (B-1)/prod(B+(n-r-1):(n-1))
    }
   return(out)
}

f_vec_a <- function(a,r,n, log=FALSE){  # vectorised in 'a' but not in 'r'
    sapply(a,function(a){f_single(a,r=r,n=n,log=log)})
}

f_vec_arn <- function(a,r,n,log=FALSE){
    ## vectorized in 'a'; treats 'r' and 'n' as
    ## vectors of independent observations

    M <- cbind(r,n)

    if(log){
        out <- 0
        for(i in seq_len(nrow(M))){out <- out + f_vec_a(a,r=M[i,1],n=M[i,2],log=TRUE)}
    } else {
        out <- 1
        for(i in seq_len(nrow(M))){out <- out * f_vec_a(a,r=M[i,1],n=M[i,2],log=FALSE)}
    }
    return(out)
}
fapprox <- function(a,r,n,log=FALSE){
    if(log){
        return(log(B-1)-B*(r+1)/n)
    } else {
        return(B-1 - (r+1)*exp(B)/n)
    }
}

Above, fapprox() gives the asymptotic expression (5) in the manuscript.

fdash <- function(a,r,n,log=TRUE){
    B <- 1/(1-a)
    out <- (1/(B-1) - psigamma(B+n) + psigamma(B+n-r-1))*B^2
    if(!log){out <- out * f_vec_arn(a,r,n,log=FALSE)}
    return(out)
}

Notation is \(\mathcal{L}\) for likelihood and \(\mathcal{S}=\log\mathcal{L}\) for log-likelihood (support). Above we see fdash() implements either \(\frac{\partial\mathcal{S}}{\partial a}\) or \(\frac{\partial\mathcal{L}}{\partial a}\) [depending on the value of argument log. We see that

\[ \frac{\partial\mathcal{S}}{\partial a}= B^2\left(\frac{1}{B-1}-\psi(B+n)+\psi(B+n-r-1)\right)\]

and \(\frac{\partial\mathcal{L}}{\partial a}\) is of course just \(\frac{\mathcal{L}\partial\mathcal{S}}{\partial a}\). Now, numerical verification of fdash():

a <- 0.34
d <- 1e-3
r <- 4
n <- 10
c(
  numerical  = (f_single(a+d/2,r,n,log=TRUE)-f_single(a-d/2,r,n,log=TRUE))/d,
  analytical = fdash(a,r,n)
)
##  numerical analytical 
##     3.0693     3.0693
c(
  numerical  = (f_single(a+d/2,r,n,log=FALSE)-f_single(a-d/2,r,n,log=FALSE))/d,
  analytical = fdash(a,r,n,log=FALSE)
)
##  numerical analytical 
## 3.7904e-05 3.7904e-05

Above we see very close agreemen between numerical and analytical results, for both log=TRUE and log=FALSE.

Maximum likelihood estimation

Use numerical optimization [optim()] to find the maximum likelihood point [R function MLE()] and, as a check, use calculus uniroot() to find the point of zero derivative; R function MLEa()] to find the maximum likelihood point:

MLE <- function(r,n,give=FALSE){
    d <- 1e-6
    out <- optimize(f_vec_arn,c(d,1-d),r=r,n=n,log=TRUE,maximum=TRUE)
    if(!give){out <- out$maximum}
    return(out)
}

MLEa <- function(r,n,give=FALSE,small=1e-4){
    out <- uniroot(f=function(a){fdash(a,r,n)},interval=c(small,1-small))
    if(!give){out <- out$root}
    return(out)
}

showdiff <- function(x,y){c(way1=x,way2=y,diff=x-y,logratio=log(x/y))}

rbind(
showdiff(MLE(1,9),MLEa(1,9)),
showdiff(MLE(3,9),MLEa(3,9)),
showdiff(MLE(3,7),MLEa(3,7))
)
##         way1    way2        diff    logratio
## [1,] 0.89457 0.89457  1.5450e-06  1.7270e-06
## [2,] 0.71078 0.71083 -4.8565e-05 -6.8323e-05
## [3,] 0.63966 0.63966 -2.3780e-06 -3.7175e-06

Above we see very close agreement between the two methods. We can get a feel for the accuracy of the asymptotic result numerically using similar methods:

rbind(
showdiff(MLE(10, 100), 100 / 111),
showdiff(MLE(10, 500), 500 / 511),
showdiff(MLE(10,1000),1000 /1011),
showdiff(MLE(10,5000),5000 /5011)
)
##         way1    way2       diff   logratio
## [1,] 0.90467 0.90090 0.00376741 0.00417310
## [2,] 0.98020 0.97847 0.00172527 0.00176167
## [3,] 0.99007 0.98912 0.00094552 0.00095547
## [4,] 0.99801 0.99780 0.00020880 0.00020924
rbind(
showdiff(MLEa(10, 100), 100 / 111),
showdiff(MLEa(10, 500), 500 / 511),
showdiff(MLEa(10,1000),1000 /1011),
showdiff(MLEa(10,5000),5000 /5011)
)
##         way1    way2       diff   logratio
## [1,] 0.90468 0.90090 0.00377468 0.00418115
## [2,] 0.98018 0.97847 0.00170876 0.00174483
## [3,] 0.99004 0.98912 0.00092123 0.00093093
## [4,] 0.99799 0.99780 0.00018383 0.00018422

Now another verification, this time comparing the R implementation against two different direct numerical implementations of equation 4 of the manuscript:

a <- 0.23423434
b <- 1-a
B <- 1/b
n <- 9
r <- 3
c(
way1 = b^3*a/((a+9*b) * (a+8*b) * (a+7*b) * (a+6*b)),
way2 = (B-1)/prod(B+(5:8)),
way3 = f_vec_arn(a,r,n,log=FALSE)
)
##     way1     way2     way3 
## 8.59e-05 8.59e-05 8.59e-05

Above we see close agreement.

Produce Maximum Likelihood diagram, Figure 1

out <- list()
for(n in 1:10){
  out[[n]]  <- rep(NA,n+1)
  for(r in 0:n){
    if(r==0){
      jj <- 1
    } else if(r==n){n
      jj <- 0
    } else {
      jj <- MLE(r,n)
    }
    out[[n]][r+1] <- jj
  }
}
out
## [[1]]
## [1] 1 0
## 
## [[2]]
## [1] 1.00000 0.58578 0.00000
## 
## [[3]]
## [1] 1.00000 0.71010 0.46791 0.00000
## 
## [[4]]
## [1] 1.00000 0.77599 0.58736 0.40866 0.00000
## 
## [[5]]
## [1] 1.00000 0.81727 0.66025 0.51735 0.37190 0.00000
## 
## [[6]]
## [1] 1.00000 0.84562 0.71055 0.58814 0.47121 0.34637 0.00000
## 
## [[7]]
## [1] 1.00000 0.86631 0.74764 0.63966 0.53826 0.43810 0.32740 0.00000
## 
## [[8]]
## [1] 1.00000 0.88211 0.77618 0.67924 0.58863 0.50135 0.41293 0.31255 0.00000
## 
## [[9]]
##  [1] 1.00000 0.89457 0.79886 0.71078 0.62844 0.54987 0.47270 0.39298 0.30054
## [10] 0.00000
## 
## [[10]]
##  [1] 1.00000 0.90463 0.81735 0.73658 0.66091 0.58896 0.51924 0.44969 0.37671
## [10] 0.29056 0.00000
plotterp <- function(...){
plot(NA,xlim=c(1,10),ylim=c(0,1),type="n",xlab="n",ylab=expression(hat(a)))
for(n in 1:10){
  for(r in 0:n){   points(n,out[[n]][r+1],pch=16) }
  for(r in 0:n){   text(n+0.2,out[[n]][r+1],r,cex=0.5,col='gray') }
}
}
plotterp()

pdf(file="dotprobs.pdf")
plotterp()
dev.off()
## png 
##   2

Visuals

Produce the rainbow-coloured likelihood plot

plotter <- function(...){
a <- seq(from=0,to=1,by=0.01)
n <- 8
jj <- f_vec_arn(a,3,n,log=FALSE)
plot(a,jj/max(jj,na.rm=TRUE),type='n',xlab=expression(a),ylab="likelihood")
grid()
rain <- rainbow(n+1)
for(r in seq(from=0,to=n)){
  y <- f_vec_a(a,r,n,log=FALSE)
  if(r==0){y[length(y)] <- 1}
  y <- y/max(y,na.rm=TRUE)
  if(r>0){y[length(y)] <- 0}
  points(a,y,type='l',lwd=4,col=rain[r+1])
}
abline(v=MLE(r=4,n=8))
text(0.07,0.95,"r=8",col=rain[9])
text(0.20,0.95,"r=7",col=rain[8])
text(0.26,0.91,"r=6",col=rain[7])
text(0.29,0.83,"r=5",col=rain[6])
text(0.33,0.76,"r=4",col=rain[5])
text(0.35,0.64,"r=3",col=rain[4])
text(0.39,0.53,"r=2",col=rain[3])
text(0.45,0.41,"r=1",col=rain[2])
text(0.63,0.22,"r=0",col=rain[1])
}

plotter()

pdf(file="ninelikes.pdf")
plotter()
dev.off()
## png 
##   2

Alternative production of rainbow figure, as a check

n <- 7
a <- seq(from=0,to=1,by=0.01)
fhyper3 <- function(r,n){
  out <- hyper3()
  out['a'] <- 1
  out['b'] <- r
  for(i in (n-r):n){
    out[c(a=1,b=i)] %<>% dec
  }
  return(out)
}

plot(NA,xlim=c(0,1),ylim=c(0,1),type='n')
M <- cbind(a=a,b=1-a)
for(r in 0:7){
  y <- loglik(M,fhyper3(r,n),log=FALSE)
  y <- y/max(y, na.rm=TRUE)
points(M[,1],y,type="l",lwd=8)
}

1 Formula 1: Checo’s career arc

readstring <- function(year){read.table(paste("formula1_",year,".txt",sep=""))}
getfoc <- function(year,comp="Perez"){  # get focal competitor
  M <- as.matrix(readstring(year))
  out <- suppressWarnings(as.numeric(M[comp,seq_len(ncol(M)-1)]))
  out[is.na(out)] <- nrow(M)
  return(out)
}
getnum <- function(year){nrow(readstring(year))} # number of competitors

perez <- lapply(2011:2023,getfoc)
print(perez)
## [[1]]
##  [1] 28 28 17 14  9 28 28 11  7 11 15 28 28 10  8 16 10 11 13
## 
## [[2]]
##  [1]  8  2 11 11 25 11  3  9 25  6 14 25  2 10 25 11 25 15 11 25
## 
## [[3]]
##  [1] 11  9 11  6  9 16 11 20  8  9 11 12  8 10 15  5  9  7  6
## 
## [[4]]
##  [1] 10 25  3  9  9 25 11  6 11 10 25  8  7  7 10 10 25 15  7
## 
## [[5]]
##  [1] 10 13 11  8 13  7 11  9  9 22  5  6  7 12  3  5  8 12  5
## 
## [[6]]
##  [1] 13 16 11  9  7  3 10  3 17  6 11 10  5  8  8  6  7  8 10  4  8
## 
## [[7]]
##  [1]  7  9  7  6  4 13  5 25  7  9  8 17  9  5  6  7  8  7  9  7
## 
## [[8]]
##  [1] 11 16 12  3  9 12 14 20  7 10  7 14  5  7 16 10  7  8 20 10  8
## 
## [[9]]
##  [1] 13 10  8  6 15 12 12 12 11 17 20 11  6  7 20  7  8  7 10  9  7
## 
## [[10]]
##  [1]  6  6  7 23 23  5 10 10  5  4  4  7  6  2 18  1 23
## 
## [[11]]
##  [1]  5 11  4  5  4  1  3  4  6 16 21 19  8  5  9  3  3  3  4  4 21 15
## 
## [[12]]
##  [1] 18  4  2  2  4  2  1  2 22  2 22  4  5  2  5  6  1  2  4  3  7  3
## 
## [[13]]
##  [1]  2  1  5  1  2 16  4  6  3  6  3  2  4  2  8 22 10  4 22  4  3  4
y <- unlist(lapply(seq_along(perez),function(i){mean(perez[[i]])}))
x <- 2011:2023
plot(x,y)

checo_like <- function(a){
  out <- a*0
  for(year in 2011:2023){ out <- out + f_vec_arn(a,getfoc(year)-1,getnum(year)-1,log=TRUE) }
  return(out)
}

a <- seq(from=0.45,to=0.62,by=0.01)
cL <- checo_like(a)
cL <- cL - max(cL)
plot(a,cL,type='b')
abline(h=c(0,-2))

f1_logistic <- function(vec){
  alpha <- vec[1]
  beta  <- vec[2]
  out <- 0
  for(year in 2011:2023){
     LO <- alpha + beta*(year-2011)
     strength <- exp(LO)/(1+exp(LO))
     out <- out + f_vec_arn(strength,getfoc(year)-1,getnum(year)-1,log=TRUE)
  }
  return(out)
}
a <- seq(from=-0.8,to=0.0,by=0.01)
b <- seq(from=0,to=0.2,by=0.01)
jj <- as.matrix(expand.grid(a,b))
L <- apply(jj,1,f1_logistic)
L <- L-max(L)
L <- matrix(L,length(a),length(b))
L <- pmax(L,-40)
showchec <- function(...){
contour(a,b,L,xlab=expression(alpha),ylab=expression(beta),levels=-c(2,5*(1:5)))
points(-0.27,0.0813,pch=16,cex=2)
}
showchec()

pdf(file="showchecolike.pdf")
showchec(a,b,L)
dev.off()
## png 
##   2
f1_logistic(c(0,0))
##       n 
## -7037.1
f1_logistic(c(0,0.0001))
##     n 
## -7037
f1_logistic(c(0.001,0))
##       n 
## -7037.1
o <- optim(c(-0.2,0.1),fn=f1_logistic, control=list(fnscale = -1),hessian=TRUE)
o
## $par
## [1] -0.270069  0.081324
## 
## $value
## [1] -7024.1
## 
## $counts
## function gradient 
##       41       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL
## 
## $hessian
##          [,1]     [,2]
## [1,]  -194.85  -1204.2
## [2,] -1204.21 -10205.2
o$par
## [1] -0.270069  0.081324
o$hessian
##          [,1]     [,2]
## [1,]  -194.85  -1204.2
## [2,] -1204.21 -10205.2
eigen(o$hessian)
## eigen() decomposition
## $values
## [1]    -52.021 -10347.982
## 
## $vectors
##          [,1]    [,2]
## [1,] -0.99304 0.11778
## [2,]  0.11778 0.99304
best <- o$par
f1_logistic(best)
##       n 
## -7024.1
jj <- best
jj[1] <- jj[1]*1.01
f1_logistic(jj) - f1_logistic(best)
##           n 
## -0.00055258
jj <- best
jj[1] <- jj[1]*0.99
f1_logistic(jj) - f1_logistic(best)
##           n 
## -0.00086857
jj <- best
jj[2] <- jj[2]*1.00001
f1_logistic(jj) - f1_logistic(best)
##           n 
## -1.2234e-07
jj <- best
jj[2] <- jj[2]*0.99
f1_logistic(jj) - f1_logistic(best)
##          n 
## -0.0032498
o_free <- optim(c(-0.2,0.1),fn=f1_logistic, control=list(fnscale = -1),hessian=TRUE)
o_constrained <- optim(c(-0.2,0.1),fn=function(v){
v[2] <- 0
return(f1_logistic(v))}, control=list(fnscale = -1),hessian=TRUE)
o_free
## $par
## [1] -0.270069  0.081324
## 
## $value
## [1] -7024.1
## 
## $counts
## function gradient 
##       41       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL
## 
## $hessian
##          [,1]     [,2]
## [1,]  -194.85  -1204.2
## [2,] -1204.21 -10205.2
o_constrained
## $par
## [1]  0.198232 -0.042386
## 
## $value
## [1] -7033.4
## 
## $counts
## function gradient 
##       45       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL
## 
## $hessian
##        [,1] [,2]
## [1,] -196.2    0
## [2,]    0.0    0
pchisq(2*(o_free$value - o_constrained$value),df=1,lower.tail=FALSE)
## [1] 1.6637e-05

2 Athletics

O <- read.table("olympic_athletics_mens_100m.txt",header=TRUE)
head(O)
##   year heat rank n
## 1 2004   H1    6 8
## 2 2004   H2    7 8
## 3 2004   H3    5 9
## 4 2004   H4    2 9
## 5 2004   H5    1 8
## 6 2004   H6    6 8
jjf <- function(x){f_vec_arn(x,O$rank,O$n,log=TRUE)}
system.time(optolym <- optimize(jjf, c(0.45,0.5), maximum=TRUE))
##    user  system elapsed 
##   0.006   0.000   0.006
optolym
## $maximum
## [1] 0.47423
## 
## $objective
##       n 
## -417.63
jjf(0.5)
##       n 
## -417.79
jjf(optolym$maximum)
##       n 
## -417.63
(LR <- 2*(jjf(optolym$maximum)-jjf(0.5)))
##       n 
## 0.31774
pchisq(LR,df=1,lower.tail=FALSE)
##       n 
## 0.57297
a <- seq(from=0.3,to=0.65,len=45)
L <- f_vec_arn(a,O$rank,O$n,log=TRUE)
Lmax <- jjf(optolym$maximum)
Lmax
##       n 
## -417.63
L <- L-Lmax
(a_lower <- uniroot(function(x){jjf(x)+2-Lmax},interval=c(0.3,0.5))$root)
## [1] 0.38036
(a_upper <- uniroot(function(x){jjf(x)+2-Lmax},interval=c(0.5,0.6))$root)
## [1] 0.56303
plotolymp <- function(...){
plot(a,L,type='l',ylab="log-likelihood",lwd=2)
abline(h=c(0,-2))
ahat <- optolym$maximum
segments(x0=ahat,y0=-1.5,y1=0)
text(ahat,-0.91,expression(hat(a)==0.474),pos=2)
abline(v=0.5,lty=3)

segments(x0=a_lower,y0=-1.5,y1=-3.5)
segments(x0=a_upper,y0=-1.5,y1=-3.5)

text(x=a_lower,y=-3,"0.380",pos=4)
text(x=a_upper,y=-3,"0.563",pos=2)
}
plotolymp()

pdf(file="plotolymp.pdf")
plotolymp()
dev.off()
## png 
##   2

Education

rank <- c( 9, 7, 2, 3,2 ,8 ,5 ,4 ,9 )
class_size <- c(12, 17,23, 9,13,14,13,12,15)
course <- c("rings and modules","group theory","calculus","linear algebra",
"differential equations","topology","special relativity","fluid mechanics","Lie algebra")
category=c("pure","pure","applied","applied","applied","pure","applied","applied","pure")
data.frame(course,category,rank,class_size)
##                   course category rank class_size
## 1      rings and modules     pure    9         12
## 2           group theory     pure    7         17
## 3               calculus  applied    2         23
## 4         linear algebra  applied    3          9
## 5 differential equations  applied    2         13
## 6               topology     pure    8         14
## 7     special relativity  applied    5         13
## 8        fluid mechanics  applied    4         12
## 9            Lie algebra     pure    9         15
a <- seq(from=0.2,by=0.01,to=0.8)
wp <- category=='pure'
wa <- category=='applied'

Following lines show optimization of the marginal Bradley Terry strength for pure and applied mathematics:

Lpure <- f_vec_arn(a,rank[wp]-1,class_size[wp]-1,log=TRUE)
Lpure <- Lpure - max(Lpure,na.rm=TRUE)
plot(a,Lpure,ylab='pure strength')

optimize(function(a){f_vec_arn(a,rank[wp]-1,class_size[wp]-1,log=TRUE)},c(0.1,0.9),maximum=TRUE)
## $maximum
## [1] 0.54235
## 
## $objective
##      n 
## -76.48
optimize(function(a){f_vec_arn(a,rank[wa]-1,class_size[wa]-1,log=TRUE)},c(0.1,0.9),maximum=TRUE)
## $maximum
## [1] 0.82041
## 
## $objective
##       n 
## -35.749

[the evaluates above do not appear in the manuscript] Now we define a likelihood function for the two subjects jointly:

supp2_ed <- function(vec,place1,place2,runners1,runners2){
  out <- 0
  M1 <- cbind(place1,runners1)
  M2 <- cbind(place2,runners2)
  for(i in seq_len(nrow(M1))){
    out <- out + f_vec_arn(vec[1],r=M1[i,1]-1,n=M1[i,2]-1,log=TRUE)
  }
  for(i in seq_len(nrow(M2))){
    out <- out + f_vec_arn(vec[2],r=M2[i,1]-1,n=M2[i,2]-1,log=TRUE)
  }
  return(out)
}
x <- seq(from=0.1,to=0.99,by=0.01)
jj <- as.matrix(expand.grid(x,x))
L <- apply(jj,1,supp2_ed,rank[wp]-1,rank[wa]-1,class_size[wp]-1,class_size[wa]-1)
L <- L - max(L,na.rm=TRUE)
L <- matrix(L,length(x),length(x))
plotpureandapplied <- function(...){
par(pty='s')
contour(x,x,L,asp=1,xlim=range(x),ylim=range(x),levels=-(0:5),
xlab='pure strength',ylab='applied strength')
abline(0,1)
}
plotpureandapplied()

pdf(file="plotpureandapplied.pdf")
plotpureandapplied()
dev.off()
## png 
##   2

Following code shows joint maximization of likelihood:

jj1 <-
optim(
   c(.4,.8),
   function(vec){
      supp2_ed(vec,rank[wp]-1,rank[wa]-1,class_size[wp]-1,class_size[wa]-1)},
   control=list(fnscale = -1)
)
jj1
## $par
## [1] 0.56735 0.88686
## 
## $value
## [1] -86.945
## 
## $counts
## function gradient 
##       45       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

Following code shows constrained maximization of likelihood [constrained, that is, to follow the diagonal null line]:

jj2 <-
optimize(function(v){supp2_ed(c(v,v),rank[wp]-1,rank[wa]-1,class_size[wp]-1,class_size[wa]-1)},
c(0.1,0.9),maximum=TRUE)
jj2
## $maximum
## [1] 0.7129
## 
## $objective
## [1] -89.373

Extra support is thus \(-86.9449 + 89.3732=2.4283\) (to 4 d.p.) Now calculate the asymptotic \(p\)-value obtained from Wilks’s theorem:

pchisq(2*(jj1$value-jj2$objective),lower.tail=FALSE,df=1)
## [1] 0.02754