HW 9: Estimation

Problem 1: Exponential Distribution - Estimating the Rate Parameter

You observe the following waiting times (in minutes) for a bus: 2.5, 5.1, 1.8, 3.6. Assuming the waiting times follow an exponential distribution, estimate the rate parameter (\(\lambda\)) using MLE.

Solution

To construct the likelihood, we need the PDF of the exponential distribution. It is

\[ f(x) = \lambda \exp (-\lambda x) \]

In R, this is the dexp function.

The likelihood is the product of the PDFs evaluated at the observed waiting times.

\[ \begin{align*} L(\lambda) &= \lambda \exp (-\lambda 2.5) \times \lambda \exp (-\lambda 5.1) \times \cdots \times \lambda \exp (-\lambda 3.6)\\ = & \lambda^4 \exp[ -\lambda (2.5 + 5.1 + 1.8 + 3.6)] \\ (\text{In general})=& \lambda^N \exp[ -\lambda N \bar{X}] \\ \end{align*} \]

In R:

data <- c(2.5, 5.1, 1.8, 3.6)
## Direct coding
l1 <- function(lambda, data){
    n <- length(data)
    xbar <- mean(data)
    lambda^n * exp( -n * xbar * lambda)
} 

curve(l1(x,data), 0, 1.5, ylab = "Likelihood", xlab = expression(lambda))

## Using dexp function
l2 <- function(lambda) apply(outer(data, lambda, dexp), 2, prod)
curve(l2(x), 0, 1.5, ylab = "Likelihood", xlab = expression(lambda))

To find the maximum, we could use calculus or numerical methods.

Calculus

\[ \begin{align*} \ell(\lambda) = \log L(\lambda) &= \log \left[ \lambda^N \exp[ -\lambda N \bar{X}] \right] \\ & = N \log(\lambda) - \lambda N \bar{X}\\ \frac{\partial \ell}{\partial \lambda} &= \frac{N}{\lambda} - N \bar{X}\\ \frac{\partial \ell}{\partial \lambda} &\stackrel{\text{set}}{=} 0\\ \frac{N}{\lambda} - N \bar{X} & = 0\\ \hat{\lambda} &= \frac{1}{\bar{X}} \end{align*} \]

Numerical methods

## Negative log likelihood
negloglik <- function(lambda, data){ -apply(outer(data, lambda, dexp, log = TRUE), 2, sum) }

# General purpose optimizer
o1 <- optim(1, negloglik, data = data, method = "Brent", lower = 0, upper = 100)
o1$par

[1] 0.3076923

# MLE specific optimizer
suppressPackageStartupMessages(require(stats4))
o2 <- mle(function(x){negloglik(x,data = data)}, list(x = 1))

Warning in FUN(X, Y, ...): NaNs produced
Warning in FUN(X, Y, ...): NaNs produced
Warning in FUN(X, Y, ...): NaNs produced
Warning in FUN(X, Y, ...): NaNs produced
Warning in FUN(X, Y, ...): NaNs produced

summary(o2)

Maximum likelihood estimation

Call:
mle(minuslogl = function(x) {
    negloglik(x, data = data)
}, start = list(x = 1))

Coefficients:
   Estimate Std. Error
x 0.3076925  0.1538446

-2 log L: 17.42924

curve(l2(x), 0, 1.5, ylab = "Likelihood", xlab = expression(lambda))
abline(v=o1$par, col = "red", lwd = 3)

Problem 2: Normal Distribution - Estimating Mean and Variance

A sample of student heights (in inches) is: 65, 68, 72, 63, 70. Estimate the mean (\(\mu\)) and variance (\(\sigma^2\)) of the height distribution using MLE, assuming heights are normally distributed.

Solution

Like problem 1, this can be solved with calculus or numerically. The course notes already show both, so we skip those details.

data <- c(65, 68, 72, 63, 70)
n <- length(data)
mu_hat <- mean(data)
sigma2_hat <- var(data) # Technically, the MLE estimate is var(data)*(n-1)/n.  Note (n-1)/n goes to 1 as n gets large, so it is often ignored
c(mu_hat = mu_hat, sigma2_hat = sigma2_hat, sigma2_hat_corrected = sigma2_hat*(n-1)/n)

              mu_hat           sigma2_hat sigma2_hat_corrected 
               67.60                13.30                10.64

# Another command for normal
# Note that lm give the uncorrected estimate for sigma (without the square)
lm1 <- lm(data ~ 1)
summary(lm1)


Call:
lm(formula = data ~ 1)

Residuals:
   1    2    3    4    5 
-2.6  0.4  4.4 -4.6  2.4 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   67.600      1.631   41.45 2.03e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.647 on 4 degrees of freedom

Problem 3: Poisson Distribution - Estimating the Average Number of Events

You count the number of cars passing a certain point on a road per minute for 5 minutes and get: 3, 5, 2, 4, 6. Estimate the average number of cars passing per minute (\(\lambda\)) using MLE, assuming a Poisson distribution.

Solution

Using Calculus: We construct the likelihood function (\(L(\lambda)\)), form the log likelihood function (\(\ell(\lambda)\)), calculate the derivative (\(\ell'(\lambda)\)), and finally find the root of the derivative.

\[ \begin{align*} L(\lambda) &= \prod_{i=1}^N \underbrace{f(x_i, \lambda)}_{\text{Poisson PDF}} \\ &= \prod_{i=1}^N \frac{\lambda^{x_i} e^{-\lambda}}{x_i!} \\ &= \frac{e^{-N\lambda}\lambda^{\sum_{i=1}^Nx_i}}{\prod x_i!} \\ &= \frac{e^{-N\lambda}\lambda^{N\bar{X}}}{\prod x_i!} \\ \ell(\lambda) &= -N\lambda + N\bar{X} \log(\lambda) - \sum \log(x_i!) \\ \frac{\partial \ell}{\partial \lambda} = \ell'(\lambda) &= -N + \frac{N\bar{X}}{\lambda} \\ \frac{\partial \ell}{\partial \lambda} & \stackrel{\text{set}}{=} 0\\ -N + \frac{N\bar{X}}{\lambda} &= 0 \\ \hat{\lambda} = \bar{X} \end{align*} \]

data <- c(3, 5, 2, 4, 6)
c(lambda_hat = mean(data))

lambda_hat 
         4

Using numerical methods

logliklihood <- function(lambda = 1, data) -apply(outer(lambda, data, function(a,b){ dpois(b, a, log = TRUE)}), 1, sum)
curve(logliklihood(x,data),0,10, xlab = expression(lambda), ylab = "negative log likelihood")
abline(v=mean(data), col = "red")

# General purpose optimizer
optim(1,logliklihood, data = data, method = "Brent", lower = 0, upper = 100)

$par
[1] 4

$value
[1] 9.303816

$counts
function gradient 
      NA       NA 

$convergence
[1] 0

$message
NULL

# Poisson regression command gives log(lambda), which is why we exp the result
g1 <- glm(data ~ 1, family = poisson)
summary(g1)


Call:
glm(formula = data ~ 1, family = poisson)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   1.3863     0.2236     6.2 5.66e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 2.5983  on 4  degrees of freedom
Residual deviance: 2.5983  on 4  degrees of freedom
AIC: 20.608

Number of Fisher Scoring iterations: 4

exp(coef(g1))

(Intercept) 
          4

Problem 4: Uniform Distribution - Estimating the Upper Bound

You have the following measurements of the length (in cm) of a manufactured part: 1.2, 1.8, 1.5, 1.7, 1.3. Assuming the lengths are uniformly distributed between 0 and \(\theta\), find the MLE for \(\theta\).

Solution

Calculus:

The PDF is \[ f(x) = \left\{ \begin{array}{ll} \frac{1}{\theta} & 0 < x \leq \theta \\ 0 & \text{otherwise} \end{array} \right. \]

Consequently, the likelihood is

\[ L(\theta) = \left\{ \begin{array}{ll} \theta^{-N} & 0 \leq \min(x_i) \text{ and } \max(x_i) \leq \theta \\ 0 & \text{otherwise} \end{array} \right. \]

data <- c(1.2, 1.8, 1.5, 1.7, 1.3)
likelihood <- function(theta, data){
    ub <- max(data)
    n <- length(data)
    out <- 1/theta^n
    out[theta < ub] <- 0
    out
}
curve(likelihood(x, data),0,4)

Notice that the likelihood is not differentiable at the maximum. Also note that the function is only non-zero for values of \(\theta\) greater than the maximum observed value. The maximum likelihood solution is

\[ \hat{\theta} = \max\{x1, \cdots, x_n\} \]

For the observed data, \(\hat{\theta}\) = 1.8.

Problem 5

Model the pendrop data in 3 different ways

pendrop_data <- readRDS(url("https://tgstewart.cloud/pendrop.RDS"))

Weibull with Method of Moments. Write the value of the parameters
Weibull with MLE. Write the value of the parameters.
With KDE (your choice of the kernel).

For each model compute the probability that the measure is less than 4cm, and compare them.

Plot the 3 models, and the pendrop histogram, in the same figure.

Solution

Method of moments

See here.

y <- pendrop_data$v
ybar <- mean(y)
s2hat <- var(y)
g <- function(a) 1/(gamma(1+2/a)/(gamma(1+1/a)^2) - 1) - ybar^2/s2hat
curve(g(x),1,2)
abline(h=0)

ahat <- uniroot(g,c(1,2))$root 
sigmahat <- ybar/gamma(1+1/ahat)
hist(y, breaks=20,freq = FALSE, main = "Distance from bullseye", xlab="")
curve(dweibull(x, shape = ahat, scale = sigmahat), 0, max(y), add = TRUE, lwd = 3, col = "#ff2b2b")
box()

c(`P(Y < 4):` = pweibull(4, shape = ahat, scale = sigmahat))

P(Y < 4): 
0.5161926

MLE

yy <- y
yy[y<=0] <- 0.0001 # Weibull must have y strictly greater than 0
negloglike<- function(a,sigma){-sum(dweibull(yy,shape = a, scale = sigma, log = TRUE))}
m1 <- mle(negloglike, lower = c(0,0), start = c(1,1))
m1


Call:
mle(minuslogl = negloglike, start = c(1, 1), lower = c(0, 0))

Coefficients:
       a    sigma 
1.582948 4.848562

hist(y, breaks=20,freq = FALSE, main = "Distance from bullseye", xlab="")
curve(dweibull(x, shape = coef(m1)["a"], scale = coef(m1)["sigma"]), 0, max(y), add = TRUE, lwd = 3, col = "#ff2b2b")

c(`P(Y < 4):` = pweibull(4, shape = coef(m1)["a"], scale = coef(m1)["sigma"]))

P(Y < 4): 
 0.521674

KDE

See here (link).

Gaussian kernel.

# Built in commands
d1 <- density(y, kernel = "g", bw = "SJ", adjust = 1.4)
hist(y, breaks=20,freq = FALSE, main = "Distance from bullseye", xlab="", xlim = range(y) + c(-1,+1))
lines(d1, lwd = 3, col = "#ff2b2b")
box()

# handmade commands
Fhat <- function(x,s) rowMeans(outer(x,y,function(a,b) pnorm(a,b,s)))
fhat <- function(x,s) rowMeans(outer(x,y,function(a,b) dnorm(a,b,s)))

hist(y, breaks=20,freq = FALSE, main = "Distance from bullseye", xlab="", xlim = range(y) + c(-1,+1))
curve(fhat(x,.8),add=TRUE,col = "red", lwd = 3)

c(`P(Y < 4):` = Fhat(4,.8))

P(Y < 4): 
0.5108567

Here is another option using Epanechnikov kernel.

# Built in commands
d1 <- density(y, kernel = "e")
hist(y, breaks=20,freq = FALSE, main = "Distance from bullseye", xlab="", xlim = range(y) + c(-1,+1))
lines(d1, lwd = 3, col = "#ff2b2b")
box()

# handmade commands
U <- function(x,l,s) pmin(pmax(1/2/s*(x-l) + 1/2,0),1)
v <- function(x,l,s) 2*U(x,l,s)-1
E <- function(x,l,s) -1/4*v(x,l,s)^3 + 3/4*v(x,l,s) + 1/2
e <- function(x,l,s) (-3/4/s*(x/s-l/s)^2+3/4/s)*(abs(x-l)<s)
Fhat <- function(x,s) rowMeans(outer(x,y,function(a,b) E(a,b,s)))
fhat <- function(x,s) rowMeans(outer(x,y,function(a,b) e(a,b,s)))

hist(y, breaks=20,freq = FALSE, main = "Distance from bullseye", xlab="", xlim = range(y) + c(-1,+1))
curve(fhat(x,1/d1$bw),add=TRUE,col = "red", lwd = 3)

c(`P(Y < 4):` = Fhat(4,1/d1$bw))

P(Y < 4): 
0.5120858