To cite the hyper2 package in publications, please use Hankin (2017). Here I consider a single basketball match from the perspective of the Bradley-Terry model. File NBA.txt contains a point-by-point analysis of a basketball match between the Cleveland Cavaliers and the Golden State Warriors on 13 June 2017, data taken from here. Some further documentation is given at inst/NBA.Rd in the package.

NBA_table  <- read.csv("NBA.txt",header=TRUE)
head(NBA_table)
##    time points possession Team Love James Thompson Irving Smith Jefferson Williams Shumpert Korver Green Durant
## 1 11.41      2          C    C TRUE  TRUE     TRUE   TRUE  TRUE     FALSE    FALSE    FALSE  FALSE  TRUE   TRUE
## 2 11.30      1          W    W TRUE  TRUE     TRUE   TRUE  TRUE     FALSE    FALSE    FALSE  FALSE  TRUE   TRUE
## 3 11.22      2          C    W TRUE  TRUE     TRUE   TRUE  TRUE     FALSE    FALSE    FALSE  FALSE  TRUE   TRUE
## 4 11.07      2          C    C TRUE  TRUE     TRUE   TRUE  TRUE     FALSE    FALSE    FALSE  FALSE  TRUE   TRUE
## 5 10.49      3          W    W TRUE  TRUE     TRUE   TRUE  TRUE     FALSE    FALSE    FALSE  FALSE  TRUE   TRUE
## 6 10.17      3          C    W TRUE  TRUE     TRUE   TRUE  TRUE     FALSE    FALSE    FALSE  FALSE  TRUE   TRUE
##   Pachiuila Curry Thompson.1  West Barnes Iguodala Livingston McCaw
## 1      TRUE  TRUE       TRUE FALSE  FALSE    FALSE      FALSE FALSE
## 2      TRUE  TRUE       TRUE FALSE  FALSE    FALSE      FALSE FALSE
## 3      TRUE  TRUE       TRUE FALSE  FALSE    FALSE      FALSE FALSE
## 4      TRUE  TRUE       TRUE FALSE  FALSE    FALSE      FALSE FALSE
## 5      TRUE  TRUE       TRUE FALSE  FALSE    FALSE      FALSE FALSE
## 6      TRUE  TRUE       TRUE FALSE  FALSE    FALSE      FALSE FALSE

Each row corresponds to a point scored. The first column is the time of the score, the second is the number of points scored, the third shows which team had possession at the start of play, and the fourth shows which team scored. The other columns show the players. Table entries show whether or not that particular player was on the pitch when the point was scored. Note that the two “ghost” players represent the effect of having possession.

First some basics:

(TT <- table(NBA_table[,3:4]))
##           Team
## possession  C  W
##          C 40 28
##          W 22 41
fisher.test(TT,alternative="greater")
## 
##  Fisher's Exact Test for Count Data
## 
## data:  TT
## p-value = 0.005
## alternative hypothesis: true odds ratio is greater than 1
## 95 percent confidence interval:
##  1.3827    Inf
## sample estimates:
## odds ratio 
##     2.6419

Thus the identity of the scoring team is not independent of the identity of the possessing team (as one might expect).
We can convert NBA_table to a likelihood function:

allplayers <- as.matrix(NBA_table[,5:23])
NBA <- hyper2()
NBA2 <- hyper2()

## players 1-9 Cleveland
## players 10-19 (sic) Warriors:
C_onpitch <- allplayers[,1:9]
W_onpitch <- allplayers[,10:19]

possession <- NBA_table[,3]
scored     <- NBA_table[,4]

for(i in seq_len(nrow(NBA_table))){
  if(scored[i] == 'W'){  # Warriors score a point
    onpitch_scored  <- names(which(W_onpitch[i,]))
    onpitch_noscore <- names(which(C_onpitch[i,]))
  } else if(scored[i] == "C"){ # Cleveland score a point
    onpitch_scored  <- names(which(C_onpitch[i,]))
    onpitch_noscore <- names(which(W_onpitch[i,]))
  } else {
    stop("scorint team must be either W or C")
  }
  if(possession[i] == 'W'){
    onpitch_scored1 <- c(onpitch_scored,"possession")
    onpitch_scored2 <- c(onpitch_scored,"W_possession")
  } else if (possession[i] == 'C'){
    onpitch_scored1 <- c(onpitch_scored,"possession")
    onpitch_scored2 <- c(onpitch_scored,"C_possession")
  } else {
    stop("possession must be either W or C")
  }

  NBA[onpitch_scored1] %<>% inc()
  NBA[c(onpitch_scored1,onpitch_noscore)] %<>% dec()

  NBA2[onpitch_scored2] %<>% inc()
  NBA2[c(onpitch_scored,onpitch_noscore)] %<>% dec()
}

NBA2_maxp <- maxp(NBA2)

NBA2_maxp %<>% ordertrans(c(colnames(NBA_table)[-(1:4)],"C_possession","W_possession"))
dotchart(NBA2_maxp,col=c(rep("black",9),rep("red",10),rep("blue",2)),pch=16)

NBA_maxp_precomputed  <- c(
                                        # precomputed; very very computer
                                        # intensive but far higher likelihood
                                        # than the result of maxp(NBA):
    

Barnes    = 0.165210241492775,    Curry      = 1.11314508611164e-06,
Durant    = 0.00538619616133955,  Green      = 0.0110738194130432,
Iguodala  = 1.0005765074368e-06,  Irving     = 0.0198810257932021,
James     = 1.00000582238398e-06, Jefferson  = 0.00990927936847305,
Korver    = 0.00843806151505123,  Livingston = 0.075108213128189,
Love      = 0.00175976502052894,  McCaw      = 0.0119305671842558,
Pachiuila = 0.0496281731046277,   possession = 0.479366441714251,
Shumpert  = 0.0666196978946451,   Smith      = 1.0008827880116e-06,
Thompson  = 0.00159564073991825,  Thompson.1 = 0.00135198197477646,
West      = 0.0927348818403838,   Williams   = 1.89904433611776e-06)



NBA_maxp <- maxp(NBA)
NBA_maxp %<>% ordertrans(c(colnames(NBA_table)[-(1:4)],"possession"))


dotchart(NBA_maxp,col=c(rep("black",9),rep("red",10),rep("blue",1)),pch=16)

1 Statistical analysis

samep.test(NBA2,c("C_possession","W_possession"))
## 
##  Constrained support maximization
## 
## data:  NBA2
## null hypothesis: C_possession = W_possession
## null estimate:
##       Barnes C_possession        Curry       Durant        Green     Iguodala       Irving        James    Jefferson 
##   1.2513e-01   2.9761e-01   1.0195e-06   1.0197e-06   1.0928e-06   1.0245e-06   1.0437e-06   1.0262e-06   1.1119e-06 
##       Korver   Livingston         Love        McCaw    Pachiuila     Shumpert        Smith     Thompson   Thompson.1 
##   1.7136e-06   3.8650e-04   1.0430e-06   3.0320e-02   1.1201e-06   1.3979e-01   1.0172e-06   1.0315e-06   1.0018e-06 
## W_possession         West     Williams 
##   2.9761e-01   1.0913e-01   1.0019e-06 
## (argmax, constrained optimization)
## Support for null:  847.59 + K
## 
## alternative hypothesis:  sum p_i=1 
## alternative estimate:
##       Barnes C_possession        Curry       Durant        Green     Iguodala       Irving        James    Jefferson 
##   4.1930e-02   3.0220e-01   5.9474e-03   1.0870e-02   1.4768e-03   1.0001e-06   1.9311e-02   4.7272e-03   4.5515e-02 
##       Korver   Livingston         Love        McCaw    Pachiuila     Shumpert        Smith     Thompson   Thompson.1 
##   1.6588e-02   1.7713e-02   2.2082e-02   8.3922e-03   4.3110e-02   2.4424e-02   2.4081e-05   3.4050e-02   7.8847e-03 
## W_possession         West     Williams 
##   2.8717e-01   5.4955e-02   5.1631e-02 
## (argmax, free optimization)
## Support for alternative:  125.85 + K
## 
## degrees of freedom: 1
## support difference = -721.74
## p-value: 1

We may thus assume that Cleveland having possession is the same strength as the Warriors having possession, and from now on we work with NBA rather than NBA2.

(C_allstrengths <- sort(NBA_maxp[01:9],decreasing=TRUE))
##   Williams   Shumpert  Jefferson     Korver   Thompson       Love     Irving      James      Smith 
## 2.6525e-01 5.7277e-02 4.6295e-02 4.1295e-02 2.3222e-02 2.0494e-02 7.4882e-03 3.0894e-06 1.9109e-06
(W_allstrengths <- sort(NBA_maxp[10:19],decreasing=TRUE))
##       West     Barnes  Pachiuila Livingston      McCaw     Durant   Iguodala Thompson.1      Curry      Green 
## 7.3701e-02 6.7657e-02 5.0463e-02 4.1710e-02 3.9470e-02 8.4888e-03 4.4725e-03 1.6967e-03 2.3753e-06 1.0001e-06
samep.test(NBA, names(C_allstrengths))
## 
##  Constrained support maximization
## 
## data:  NBA
## null hypothesis: Williams = Shumpert = Jefferson = Korver = Thompson = Love = Irving = James = Smith
## null estimate:
##     Barnes      Curry     Durant      Green   Iguodala     Irving      James  Jefferson     Korver Livingston 
## 4.7656e-02 1.0000e-06 1.0000e-06 1.0002e-06 1.0001e-06 1.0001e-06 1.0001e-06 1.0001e-06 1.0001e-06 3.8207e-04 
##       Love      McCaw  Pachiuila possession   Shumpert      Smith   Thompson Thompson.1       West   Williams 
## 1.0001e-06 4.3096e-06 1.1759e-03 9.5077e-01 1.0001e-06 1.0001e-06 1.0001e-06 1.0001e-06 1.0471e-06 1.0001e-06 
## (argmax, constrained optimization)
## Support for null:  -0.22163 + K
## 
## alternative hypothesis:  sum p_i=1 
## alternative estimate:
##     Barnes      Curry     Durant      Green   Iguodala     Irving      James  Jefferson     Korver Livingston 
## 6.7657e-02 2.3753e-06 8.4888e-03 1.0001e-06 4.4725e-03 7.4882e-03 3.0894e-06 4.6295e-02 4.1295e-02 4.1710e-02 
##       Love      McCaw  Pachiuila possession   Shumpert      Smith   Thompson Thompson.1       West   Williams 
## 2.0494e-02 3.9470e-02 5.0463e-02 2.5101e-01 5.7277e-02 1.9109e-06 2.3222e-02 1.6967e-03 7.3701e-02 2.6525e-01 
## (argmax, free optimization)
## Support for alternative:  -32.925 + K
## 
## degrees of freedom: 8
## support difference = -32.704
## p-value: 1
samep.test(NBA, names(W_allstrengths))
## 
##  Constrained support maximization
## 
## data:  NBA
## null hypothesis: West = Barnes = Pachiuila = Livingston = McCaw = Durant = Iguodala = Thompson.1 = Curry = Green
## null estimate:
##     Barnes      Curry     Durant      Green   Iguodala     Irving      James  Jefferson     Korver Livingston 
## 1.0005e-06 1.0005e-06 1.0005e-06 1.0005e-06 1.0005e-06 1.2594e-06 1.0002e-06 3.4445e-02 2.0600e-02 1.0005e-06 
##       Love      McCaw  Pachiuila possession   Shumpert      Smith   Thompson Thompson.1       West   Williams 
## 1.3273e-06 1.0005e-06 1.0005e-06 9.0686e-01 3.2884e-02 2.5081e-06 5.1962e-03 1.0005e-06 1.0005e-06 1.0087e-06 
## (argmax, constrained optimization)
## Support for null:  -2.1358 + K
## 
## alternative hypothesis:  sum p_i=1 
## alternative estimate:
##     Barnes      Curry     Durant      Green   Iguodala     Irving      James  Jefferson     Korver Livingston 
## 6.7657e-02 2.3753e-06 8.4888e-03 1.0001e-06 4.4725e-03 7.4882e-03 3.0894e-06 4.6295e-02 4.1295e-02 4.1710e-02 
##       Love      McCaw  Pachiuila possession   Shumpert      Smith   Thompson Thompson.1       West   Williams 
## 2.0494e-02 3.9470e-02 5.0463e-02 2.5101e-01 5.7277e-02 1.9109e-06 2.3222e-02 1.6967e-03 7.3701e-02 2.6525e-01 
## (argmax, free optimization)
## Support for alternative:  -32.925 + K
## 
## degrees of freedom: 9
## support difference = -30.789
## p-value: 1

Now, what is the probability of C scoring both with and without possession? We may condition on the estimated player and possession strengths, and assume that only the 5 strongest players on each team play.

C_strongestteam <- sum(C_allstrengths[1:5])
W_strongestteam <- sum(W_allstrengths[1:5])
possession <- 0.2155786 # taken from samep.test() above

# probability of C scoring without possession:
(C_strongestteam           )/(C_strongestteam + W_strongestteam + possession)
## [1] 0.47004
# probability of C scoring with possession:
(C_strongestteam+possession)/(C_strongestteam + W_strongestteam + possession)
## [1] 0.70388

Package dataset

Following lines create NBA.rda, residing in the data/ directory of the package.

NBA_maxp <- NBA_maxp_precomputed
save(NBA_table,NBA,NBA_maxp,file="NBA.rda")

References

Hankin, R. K. S. 2017. “Partial Rank Data with the hyper2 Package: Likelihood Functions for Generalized Bradley-Terry Models.” The R Journal 9 (2): 429–39.