(takes maybe two or three hours to run without cache; each parkrunlikefunobs chunk takes maybe thirty minutes).
First we define BT() which specifies Bradley-Terry
strengths \(\alpha x^i\):
BT <- function(n,x){
out <- x^(0:(n-1))
if(x != 1){
out <- out*(1-x)/(1-x^n)
} else {
out <- out/n
}
names(out) <- str_pad(1:n, width=ceiling(log10(n)), pad = "0")
out
}
BT(4,0.5)
## 1 2 3 4
## 0.53333333 0.26666667 0.13333333 0.06666667
sum(BT(4,0.5))
## [1] 1
We will consider \(n=9\), \(x=0.5\), \(r=4\) and make repeated observations:
makeracetable <- function(no_of_races, no_of_competitors, x){
t(replicate(no_of_races,
as.numeric(rrace(BT(no_of_competitors, x)))))
}
makeracetable(no_of_races=10, no_of_competitors=5, x=0.7)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 3 1 2 4 5
## [2,] 1 4 3 2 5
## [3,] 3 1 2 5 4
## [4,] 3 4 1 2 5
## [5,] 4 2 1 3 5
## [6,] 1 2 3 4 5
## [7,] 1 3 2 5 4
## [8,] 1 2 5 3 4
## [9,] 2 1 4 3 5
## [10,] 5 2 1 4 3
get_one_competitor <- function(racetable, competitor){
jj <- apply(racetable, 1, function(x){which(x==competitor)})
tabulate(jj, ncol(racetable))
}
X <- makeracetable(1e4,11,0.7)
get_one_competitor(X,4)
## [1] 1042 1218 1268 1484 1526 1370 1069 640 290 85 8
get_one_competitor(X,11)
## [1] 87 118 136 177 238 353 515 789 1273 2067 4247
rbind(
get_one_competitor(X,1) ,
get_one_competitor(X,2) ,
get_one_competitor(X,9) ,
get_one_competitor(X,10)
)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] 3104 2443 1796 1245 785 402 157 55 10 3 0
## [2,] 2052 2134 1912 1524 1155 652 354 162 43 12 0
## [3,] 158 202 254 354 462 712 991 1324 1784 2173 1586
## [4,] 130 162 209 235 311 487 706 1076 1530 2413 2741
t(sapply(seq_len(11),function(i){get_one_competitor(X,i)}))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] 3104 2443 1796 1245 785 402 157 55 10 3 0
## [2,] 2052 2134 1912 1524 1155 652 354 162 43 12 0
## [3,] 1525 1553 1682 1634 1437 1027 683 323 99 36 1
## [4,] 1042 1218 1268 1484 1526 1370 1069 640 290 85 8
## [5,] 777 796 1069 1169 1391 1498 1332 1027 641 246 54
## [6,] 512 646 738 971 1194 1364 1498 1388 1024 516 149
## [7,] 373 424 537 663 871 1206 1473 1654 1478 927 394
## [8,] 240 304 399 544 630 929 1222 1562 1828 1522 820
## [9,] 158 202 254 354 462 712 991 1324 1784 2173 1586
## [10,] 130 162 209 235 311 487 706 1076 1530 2413 2741
## [11,] 87 118 136 177 238 353 515 789 1273 2067 4247
OK now to put above code in to functional form:
get_all_competitors <- function(no_of_races, no_of_competitors, x){
X <- makeracetable(no_of_races=no_of_races, no_of_competitors=no_of_competitors, x=x)
out <- t(sapply(seq_len(no_of_competitors),function(i){get_one_competitor(X,i)}))
dimnames(out) <- list(
true_rank = paste("r_", seq_len(no_of_competitors), sep=""),
observed_rank = paste("c_", seq_len(no_of_competitors), sep=""))
return(out/no_of_races)
}
Now use it:
get_all_competitors(100,6,0.5)
## observed_rank
## true_rank c_1 c_2 c_3 c_4 c_5 c_6
## r_1 0.49 0.33 0.15 0.02 0.01 0.00
## r_2 0.18 0.31 0.29 0.13 0.07 0.02
## r_3 0.20 0.20 0.30 0.20 0.07 0.03
## r_4 0.05 0.11 0.13 0.37 0.26 0.08
## r_5 0.05 0.04 0.06 0.18 0.39 0.28
## r_6 0.03 0.01 0.07 0.10 0.20 0.59
Above, we see from the first line that, with a sample of 100 races, the top competitor comes first with (estimated) probability about 0.55, second with probability abot 0.25, and so on, coming fifth with probability 0.01
get_all_competitors(100, 6, 0.99)
## observed_rank
## true_rank c_1 c_2 c_3 c_4 c_5 c_6
## r_1 0.22 0.08 0.20 0.12 0.17 0.21
## r_2 0.17 0.19 0.19 0.15 0.15 0.15
## r_3 0.19 0.17 0.14 0.20 0.16 0.14
## r_4 0.08 0.21 0.12 0.20 0.23 0.16
## r_5 0.15 0.17 0.19 0.17 0.15 0.17
## r_6 0.19 0.18 0.16 0.16 0.14 0.17
Above, all competitors are about the same. We see a more uniform spread of observations.
get_all_competitors(100, 6, 0.1)
## observed_rank
## true_rank c_1 c_2 c_3 c_4 c_5 c_6
## r_1 0.88 0.12 0.00 0.00 0.00 0.00
## r_2 0.12 0.78 0.09 0.01 0.00 0.00
## r_3 0.00 0.08 0.88 0.04 0.00 0.00
## r_4 0.00 0.02 0.03 0.82 0.13 0.00
## r_5 0.00 0.00 0.00 0.13 0.82 0.05
## r_6 0.00 0.00 0.00 0.00 0.05 0.95
Above, each competitor is ten times as good as the next one. We see underdispersed results. So what we need to do now is a “meaningful grid”. This is somewhat computationally intensive.
no_of_races <- 1000
no_of_competitors <- 9
true_x <- seq(from=0.1, by=0.1, to=0.9)
First, we combine a whole bunch of X matrices together:
allX <- function(no_of_competitors, no_of_races, true_x){
out <- NULL
for(i in seq_along(true_x)){
out <- rbind(out,
cbind(
true_x[i],
seq_len(no_of_competitors),
get_all_competitors(
no_of_races = no_of_races,
no_of_competitors = no_of_competitors,
true_x[i])
)
)
}
rownames(out) <- NULL
jj <- colnames(out)
jj[1:2] <- c("x","r")
colnames(out) <- jj
return(out)
}
allX9 <- allX(
no_of_competitors = 9,
no_of_races = 10000,
true_x = seq(from=0.1, to=0.9, by=0.1)
)
head(allX9)
## x r c_1 c_2 c_3 c_4 c_5 c_6 c_7 c_8 c_9
## [1,] 0.1 1 0.8996 0.0983 0.0020 0.0001 0.0000 0.0000 0.0000 0.0000 0
## [2,] 0.1 2 0.0918 0.8126 0.0938 0.0018 0.0000 0.0000 0.0000 0.0000 0
## [3,] 0.1 3 0.0073 0.0808 0.8122 0.0978 0.0019 0.0000 0.0000 0.0000 0
## [4,] 0.1 4 0.0011 0.0077 0.0845 0.8039 0.1003 0.0025 0.0000 0.0000 0
## [5,] 0.1 5 0.0002 0.0006 0.0069 0.0872 0.8100 0.0933 0.0018 0.0000 0
## [6,] 0.1 6 0.0000 0.0000 0.0006 0.0083 0.0784 0.8126 0.0984 0.0017 0
allX9[seq(from=round(nrow(allX9)/2 - 5), to=round(nrow(allX9)/2 + 5)),]
## x r c_1 c_2 c_3 c_4 c_5 c_6 c_7 c_8 c_9
## [1,] 0.4 9 0.0003 0.0010 0.0023 0.0039 0.0122 0.0309 0.0717 0.1991 0.6786
## [2,] 0.5 1 0.5071 0.3046 0.1326 0.0463 0.0086 0.0008 0.0000 0.0000 0.0000
## [3,] 0.5 2 0.2449 0.3138 0.2585 0.1275 0.0446 0.0096 0.0010 0.0001 0.0000
## [4,] 0.5 3 0.1247 0.1871 0.2793 0.2339 0.1255 0.0424 0.0069 0.0002 0.0000
## [5,] 0.5 4 0.0640 0.1003 0.1575 0.2688 0.2337 0.1271 0.0413 0.0065 0.0008
## [6,] 0.5 5 0.0304 0.0471 0.0887 0.1598 0.2788 0.2341 0.1219 0.0359 0.0033
## [7,] 0.5 6 0.0154 0.0253 0.0437 0.0829 0.1613 0.2748 0.2509 0.1199 0.0258
## [8,] 0.5 7 0.0072 0.0123 0.0244 0.0456 0.0825 0.1748 0.3041 0.2584 0.0907
## [9,] 0.5 8 0.0042 0.0061 0.0107 0.0237 0.0450 0.0878 0.1802 0.3633 0.2790
## [10,] 0.5 9 0.0021 0.0034 0.0046 0.0115 0.0200 0.0486 0.0937 0.2157 0.6004
## [11,] 0.6 1 0.4027 0.2881 0.1774 0.0829 0.0371 0.0103 0.0015 0.0000 0.0000
tail(allX9)
## x r c_1 c_2 c_3 c_4 c_5 c_6 c_7 c_8 c_9
## [76,] 0.9 4 0.1218 0.1170 0.1159 0.1256 0.1163 0.1179 0.1069 0.1012 0.0774
## [77,] 0.9 5 0.1030 0.1153 0.1091 0.1143 0.1161 0.1090 0.1184 0.1166 0.0982
## [78,] 0.9 6 0.0938 0.1043 0.1027 0.1053 0.1092 0.1150 0.1223 0.1271 0.1203
## [79,] 0.9 7 0.0837 0.0864 0.0967 0.0965 0.1066 0.1110 0.1240 0.1372 0.1579
## [80,] 0.9 8 0.0826 0.0805 0.0888 0.0915 0.0956 0.1149 0.1238 0.1419 0.1804
## [81,] 0.9 9 0.0751 0.0786 0.0751 0.0892 0.0979 0.1065 0.1194 0.1473 0.2109
Now some observations:
make_obs <- function(no_of_observations, no_of_competitors, r_true, x_true){
kk <- makeracetable(no_of_races = no_of_observations, no_of_competitors = no_of_competitors, x = x_true)
obs <- tabulate(apply(kk,1,function(o){which(o == r_true)}), nbins=no_of_competitors)
return(obs)
}
make_obs(no_of_observations=20, no_of_competitors=9, x_true = 0.7, r_true = 6)
## [1] 1 0 0 4 0 5 4 3 3
make_obs(no_of_observations=20, no_of_competitors=9, x_true = 0.7, r_true = 9)
## [1] 0 0 1 1 0 0 2 6 10
make_obs(no_of_observations=20, no_of_competitors=9, x_true = 0.1, r_true = 6)
## [1] 0 0 0 0 2 15 3 0 0
makemaxliketable <- function(n, nobs, no_of_competitors, allX, r_true, x_true){
maxlike <- function(obs){
support <- function(i){dmultinom(obs, prob=allX[i, -(1:2)], log=TRUE)}
p <- sapply(seq_len(nrow(allX)), support)
# print(allX[which.max(p),])
allX[which.max(p), 1:2]
}
evaluate <- matrix(NA,n,2)
for(i in seq_len(n)){
obs <- make_obs(no_of_observations=nobs, no_of_competitors=no_of_competitors, x_true=x_true, r_true=r_true)
evaluate[i,] <- maxlike(obs)
# print("---")
# print("obs")
# print(obs)
# print("---")
}
return(table(evaluate[,1], evaluate[,2]))
}
makemaxliketable(n=100, nobs=10, no_of_competitors=9, allX=allX9, r_true = 4, x_true = 0.2)
##
## 3 4
## 0.1 0 29
## 0.2 0 41
## 0.3 0 23
## 0.4 3 4
makemaxliketable(n=100, nobs=10, no_of_competitors=9, allX=allX9, r_true = 4, x_true = 0.5)
##
## 2 3 4 5
## 0.3 0 1 4 2
## 0.4 0 5 22 6
## 0.5 0 8 24 7
## 0.6 0 5 11 0
## 0.7 1 0 1 0
## 0.8 1 2 0 0
makemaxliketable(n=100, nobs=10, no_of_competitors=9, allX=allX9, r_true = 4, x_true = 0.7)
##
## 1 2 3 4 5 6
## 0.2 0 0 0 0 2 0
## 0.4 0 0 0 1 4 1
## 0.5 0 0 2 3 5 2
## 0.6 1 0 5 9 9 2
## 0.7 1 0 4 7 11 0
## 0.8 5 6 3 4 1 1
## 0.9 6 1 3 0 1 0
makemaxliketable(n=100, nobs=10, no_of_competitors=9, allX=allX9, r_true = 4, x_true = 0.9)
##
## 1 2 3 4 5 6 7 8 9
## 0.4 0 0 0 1 0 0 0 0 0
## 0.5 0 0 0 0 0 1 0 0 0
## 0.6 0 0 0 2 5 3 0 0 0
## 0.7 0 0 0 6 4 4 3 0 0
## 0.8 2 1 3 2 3 1 2 2 1
## 0.9 7 8 2 8 12 5 2 4 6
makemaxliketable(n=100, nobs=10, no_of_competitors=9, allX=allX9, r_true = 1, x_true = 0.7)
##
## 1 2 3 4
## 0.3 1 2 2 0
## 0.4 4 8 5 0
## 0.5 9 15 2 1
## 0.6 17 12 0 0
## 0.7 14 1 0 0
## 0.8 7 0 0 0
makemaxliketable(n=100, nobs=10, no_of_competitors=9, allX=allX9, r_true = 1, x_true = 0.2)
##
## 1
## 0.1 34
## 0.2 39
## 0.3 23
## 0.4 4
makemaxliketable(n=100, nobs=10, no_of_competitors=9, allX=allX9, r_true = 1, x_true = 0.2)
##
## 1
## 0.1 39
## 0.2 42
## 0.3 15
## 0.4 4
makemaxliketable(n=100, nobs=10, no_of_competitors=9, allX=allX9, r_true = 1, x_true = 0.5)
##
## 1 2 3
## 0.2 5 3 0
## 0.3 14 1 1
## 0.4 17 6 0
## 0.5 30 2 0
## 0.6 18 0 0
## 0.7 3 0 0
makemaxliketable(n=100, nobs=10, no_of_competitors=9, allX=allX9, r_true = 1, x_true = 0.9)
##
## 1 2 3 4 5 6 7 8 9
## 0.3 0 0 0 0 1 0 0 0 0
## 0.4 0 0 0 1 0 0 0 0 0
## 0.5 0 0 0 2 1 0 0 0 0
## 0.6 1 0 1 3 2 0 0 0 0
## 0.7 0 3 5 6 3 0 0 0 0
## 0.8 14 9 6 9 0 0 0 0 0
## 0.9 10 8 1 6 4 1 1 1 1