To cite the hyper2 package in publications, please use Hankin (2017). We will start with a simple example. Consider two players \(p_1\) and \(p_2\) who play tennis say, \(a+b\) times with \(p_1\) winning \(a\) times and \(p_2\) winning \(b\) times. We use the following likelihood function:

\[\frac{(wp_1)^ap_2^b}{(wp_1+p_2)^{a+b}}\]

where \(w\) represents some form of competitive advantage present in the \(a+b\) trials but perhaps absent in subsequent trials [this might be home ground advantage in soccer or the first-mover advantage in chess]. We may represent the log-likelihood in hyper idiom as follows:

f <- function(w,a=5,b=5){
    H <- hyper3()
    H[c(p1=w)] <- a
    H[c(p2=1)] <- b
    H[c(p2=1,p1=w)] <- -(a+b)
    H
}

Thus

f(w=1.1)
## log( (p1=1.1)^5 * (p1=1.1, p2=1)^-10 * (p2=1)^5)

Here I consider how we may differentiate this support function with respect to \(w\):

grad_weight_single <- function(H, probs, wpv, wanted){
  jj <- H[wpv]
  alpha <- drop(powers(jj))
  powers(jj) <- -1
  jj[setNames(rep(1,length(wanted)),wanted)] <- +1
  pnames(jj) <- pnames(H)
  alpha*loglik(probs,jj,log=FALSE)
}

Function grad_weight_single() takes a hyper3 object H, a vector of non-negative probabilities p with unit sum, a named vector wpv corresponding to one of the brackets of H, and a character vector wanted that specifies which term(s) in bracket wpv have a weight that is to be differentiated with respect to. If the weight appears in multiple brackets we need to call grad_weight_single() once for each bracket the weight appears in, and add the results. For example:

w <- 1.1
dw <- 1e-4
H <- f(w)
probs <- c(p1=0.35, p2=0.65)

wpv <- c(p1=w)
wanted <- "p1"
d1 <- grad_weight_single(H, probs, wpv, wanted)

wpv <- c(p2=1,p1=w)
wanted <- "p1"
d2 <- grad_weight_single(H, probs, wpv, wanted)

LHS <- (d1+d2)                                          # differential from grad_weight_single()
RHS <- (loglik(probs,f(w+dw/2)) - loglik(probs,f(w-dw/2)))/dw  # differential via numerics
print(c(lhs=LHS,rhs=RHS,diff=RHS-LHS))
##        lhs        rhs       diff 
## 1.1638e+00 1.1638e+00 2.8051e-09

above we see very close agreement.

1 Slightly more complicated example

H <- hyper3()
f <- function(w){
  H <- hyper3()
  H[c(p1=1 , p2=w , p3=w )] <- -5
  H[c(p1=1)] <- 2
  H[c(p2=2)] <- 1
  H[c(p2=w,p3=w)] <- 2
  return(H)
}

w <- 1.4
dw <- 1e-6
(HH <- f(w))
## log( (p1=1)^2 * (p1=1, p2=1.4, p3=1.4)^-5 * (p2=1.4, p3=1.4)^2 *
## (p2=2)^1)
probs <- c(p1=0.1,p2=0.6,p3=0.3)
d1 <- grad_weight_single(HH, probs, c(p1=1 , p2=w , p3=w), "p2")
d2 <- grad_weight_single(HH, probs, c(p1=1 , p2=w , p3=w), "p3")
d3 <- grad_weight_single(HH, probs, c(       p2=w , p3=w), "p2")
d4 <- grad_weight_single(HH, probs, c(       p2=w , p3=w), "p3")
LHS <- d1+d2+d3+d4
RHS <- (loglik(probs,f(w=w+dw/2)) - loglik(probs,f(w=w-dw/2)))/dw
  c(RHS,LHS,LHS-RHS)
## [1] -1.8803e+00 -1.8803e+00  1.6193e-09

1.1 An example from chess

Two chess players, 1, 2, and 3, play chess. Our observations (chess games, say) comprise \(a+b+c\) games in which \(p_1\) played White, winning \(a\) games, drawing \(b\) and losing \(c\) [we write \(+a=b-c\)]; and \(d+e+f\) games in which \(p_2\) played White, with \(+d=e-f\). There is also a single game between \(p_3\) (white) who beat \(p_1\) (black). Consider the following likelihood function:

\[ \frac{(w p_1)^a\cdot(D(p_1+p_2))^b\cdot p_2^c}{(w p_1+D(p_1+p_2)+p_2)^{a+b+c}} \frac{(w p_2)^d\cdot(D(p_1+p_2))^e\cdot p_1^f}{(w p_2+D(p_1+p_2)+p_1)^{d+e+f}} \frac{ w p_3}{wp_3+p_1} \]

w <- 1.2
D <- 0.3

brackfunc <- function(w,D){
  list(
  b1 = c(p1=w),
  b2 = c(p1=D, p2=D),   
  b3 = c(p2=1),
  b4 = c(p1=w+D,p2=1+D),
  b5 = c(p2=w),
  b6 = c(p1=1),
  b7 = c(p1=1+D,p2=w+D),
  b8 = c(p3=w),
  b9 = c(p3=w,p1=1)
  )
}
  
`f2` <- function(w,D, a=3,b=12,c=1,d=5,e=6,f=6){
  H <- hyper3()       
  brackets <- brackfunc(w,D)
  H[brackets[[1]]] %<>% inc(a)  
  H[brackets[[2]]] %<>% inc(b)  
  H[brackets[[3]]] %<>% inc(c)  
  H[brackets[[4]]] %<>% dec(a+b+c)
  H[brackets[[5]]] %<>% inc(d)  
  H[brackets[[2]]] %<>% inc(e)   # sic, the draw brackets are the same
  H[brackets[[6]]] %<>% inc(f)  
  H[brackets[[7]]] %<>% dec(d+e+f)
  H[brackets[[8]]] %<>% inc
  H[brackets[[9]]] %<>% dec
  return(H)
}

  H <- f2(w=w, D=D)

\[ \frac{(w p_1)^a\cdot(D(p_1+p_2))^b\cdot p_2^c}{(w p_1+D(p_1+p_2)+p_2)^{a+b+c}} \frac{(w p_2)^d\cdot(D(p_1+p_2))^e\cdot p_1^f}{(w p_2+D(p_1+p_2)+p_1)^{d+e+f}} \frac{ w p_3}{wp_3+p_1} \]

Differentiate with respect to w:

probs <- c(p1=0.15 , p2=0.45, p3=0.4)
diff <- (
 grad_weight_single(H, probs, c(p1=w)         , "p1") +
 grad_weight_single(H, probs, c(p1=w+D,p2=1+D), "p1") +
 grad_weight_single(H, probs, c(p2=w)         , "p2") +
 grad_weight_single(H, probs, c(p1=1+D,p2=w+D), "p2") +
 grad_weight_single(H, probs, c(p3=w)         , "p3") +
 grad_weight_single(H, probs, c(p3=w,p1=1)    , "p3")
 )
 
LHS <- diff  # using grad_weight_single()
RHS <- (loglik(probs,f2(w=w+dw,D=D)) - loglik(probs,f2(w=w,D=D)))/dw  # differential via numerics
c(LHS,RHS,LHS-RHS)
## [1] -4.8910e+00 -4.8910e+00  3.8046e-07

We may even be a little clever and use a central difference:

RHS2 <- (loglik(probs,f2(w=w+dw/2,D=D)) - loglik(probs,f2(w=w-dw/2,D=D)))/dw  # differential via numerics
c(LHS,RHS2,LHS-RHS2)
## [1] -4.8910e+00 -4.8910e+00 -1.7443e-08

If we wish to differentiate with respect to D we need a slight generalization, specifically that the wanted argument is length 2:

diff <- (
 grad_weight_single(H, probs, c(p1=  D,p2=  D), c("p1","p2")) +
 grad_weight_single(H, probs, c(p1=w+D,p2=1+D), c("p1","p2")) +
 grad_weight_single(H, probs, c(p1=1+D,p2=w+D), c("p1","p2"))
 )
 
LHS <- diff  # using grad_weight_single()
dD <- 1e-4
RHS <- (loglik(probs,f2(w,D=D+dD/2)) - loglik(probs,f2(w=w,D=D-dD/2)))/dD  # d/dD via numerics
c(LHS,RHS,LHS-RHS)
## [1]  3.6424e+01  3.6424e+01 -5.4552e-07

Come to think of it, all the wanted arguments are the same length (two). A more exacting test would be:

\[ \frac{(p_1)^3\,(w p_2)^4}{(p_1+wp_2)^7}\cdot \frac{p_2^5\,(wp_1+wp_3)^2}{(wp_1+p_2+wp_3)^7} \]

m <- function(w){
  H <- hyper3()
  H[c(p1=1)] <- 3
  H[c(p2=w)] <- 4
  H[c(p1=1,p2=w)] <- -7
  H[c(p2=1)] <- 5
  H[c(p1=w,p3=w)] <- 2
  H[c(p1=w,p2=1,p3=w)] <- -7
  H
}
w <- 1.888
dw <- 1e-4
probs <- c(p1=0.4,p2=0.1, p3=0.5)
m(w)
## log( (p1=1)^3 * (p1=1, p2=1.888)^-7 * (p1=1.888, p2=1, p3=1.888)^-7 *
## (p1=1.888, p3=1.888)^2 * (p2=1)^5 * (p2=1.888)^4)
H <- m(w)
loglik(probs,H)
## [1] -20.273
diffD <- (
 grad_weight_single(H, probs, c(p2=w          ), "p2"        ) +
 grad_weight_single(H, probs, c(p1=1,p2=w     ), "p2"        ) +
 grad_weight_single(H, probs, c(p1=w,p3=w     ), c("p1","p3")) +
 grad_weight_single(H, probs, c(p1=w,p2=1,p3=w), c("p1","p3"))
 )

LHS <- (loglik(probs,m(w+dw/2)) -loglik(probs,m(w-dw/2)))/dw
RHS <- diffD
c(LHS,RHS,LHS-RHS)
## [1] -1.5124e+00 -1.5124e+00 -1.1705e-11

References

Hankin, R. K. S. 2017. “Partial Rank Data with the hyper2 Package: Likelihood Functions for Generalized Bradley-Terry Models.” The R Journal 9 (2): 429–39.