-
Notifications
You must be signed in to change notification settings - Fork 0
/
rec5.tex
199 lines (171 loc) · 7.25 KB
/
rec5.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
\documentclass[10pt, handout, xcolor=table]{beamer}
\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\newcommand*\themecol{\usebeamercolor[fg]{structure}}
\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{footline}[frame number]
\newcommand{\overbar}[1]{\mkern 1.5mu\overline{\mkern-1.5mu#1\mkern-1.5mu}\mkern 1.5mu}
\usepackage{tikz}
\usetikzlibrary{shapes.geometric, arrows}
\tikzstyle{prob} = [rectangle, minimum width=3cm, text width = 4.5cm, minimum height=1cm, text centered, draw=black, fill= blue!20]
\tikzstyle{stat} = [rectangle, minimum width=3cm, text width = 4.5cm, minimum height=1cm, text centered, draw=black, fill= red!20]
\tikzstyle{arrow} = [thick,->,>=stealth]
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt}
\title{STAT 111\\
{\small Recitation 5}}
\author{Mo Huang}
\institute{Email: [email protected] \\
\vspace{0.25cm}
Office Hours: Wednesdays 3:00 - 4:00 pm, JMHH F96\\
\vspace{0.25cm}
Slides (adapted from Gemma Moran): \url{github.com/mohuangx/STAT111-Fall2018} }
\date{October 12, 2018}
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}{Two-Standard-Deviation Rule}
\begin{itemize}
\setlength{\itemsep}{15pt}
\item<1-> From the chart:
$$P(Z < -1.96) = 0.025, \quad P(Z > 1.96) = 0.025.$$
\item<2-> Then:
$$P(-1.96 < Z < 1.96) = 0.95.$$
\item<3-> Approximate $1.96 \approx 2$ and ``unstandardize":
$$P\left( -2 < \frac{X-\mu}{\sigma} < 2\right) = 0.95$$
\item<4->[$\Rightarrow$]
$$P(\mu - 2\sigma < X < \mu + 2\sigma) = 0.95.$$
$$P(\mu - 2.576\sigma < X < \mu + 2.576\sigma) = 0.99.$$
\item<5-> {\themecol The probability that a normal random variable is within 2 (2.576) standard deviations of the mean is $95\% \, (99\%)$.}
\end{itemize}
\end{frame}
\begin{frame}{Central Limit Theorem}
\begin{itemize}
\setlength{\itemsep}{15pt}
\item[] {\themecol The Central Limit Theorem:}
\item Suppose $X_1, X_2, \dots, X_n$ are \emph{iid} with mean $\mu$ and variance $\sigma^2$.
\item Then, for large $n$
\begin{align*}
\themecol T_n \sim N(n\mu, n\sigma^2)\quad \text{and} \quad \bar{X} \sim N\left(\mu, \frac{\sigma^2}{n}\right)
\end{align*}
{\centering \emph{no matter the distribution of the individual} $X_i$}
\item Allows approximation of all distributions using the normal distribution if you know the mean and variance.
\item<2->[Note:] \small if $X_1, \dots, X_n$ are normally distributed, then this applies for \emph{all} $n$, not just large $n$.
\end{itemize}
\end{frame}
\begin{frame}{Central Limit Theorem: Example}
\begin{itemize}
\setlength{\itemsep}{15pt}
\item<1-> Let $X_1, X_2,\dots, X_n \stackrel{iid}{\sim} Binomial(1, \theta)$.
\item<2-> For each $X_i$, $Mean(X_i) = \theta$ and $Var(X_i) = \theta(1-\theta).$
\item<3-> The sum is: $T_n = X_1 + X_2 + \cdots + X_n$.
\item<4-> The proportion is:
$$P = \frac{X_1 + \cdots + X_n}{n}.$$
\item<6-> For large $n$,
\begin{align*}
T_n &\sim N\left(n\theta, n\theta[1-\theta]\right) \\[1.5 em]
P &\sim N\left( \theta, \frac{\theta(1-\theta)}{n}\right)
\end{align*}
\end{itemize}
\end{frame}
\begin{frame}{Central Limit Theorem: Problem}
\begin{itemize}
\item<1-> Suppose you are rolling a fair die 1000 times. Calculate the numbers $A$ and $B$ such that the average of the 1000 rolls is between $A$ and $B$ with probability approximately 0.95. You may assume the mean of one roll is 3.5 and the variance is 35/12.
\item<2->[] {\color{red}
\vspace*{-0.5cm}
\begin{align*}
&Mean(X_i) = 3.5, \quad Var(X_i) = 35/12 \\
&\overline{X} \sim N\bigg(\mu, \frac{\sigma^2}{n}\bigg) = N\bigg(3.5, \frac{35}{12000} \bigg) \quad \text{by CLT}\\
&A = -2\sigma + \mu = -2\sqrt{35/12000} + 3.5 \approx 3.392 \\
&B = 2\sigma + \mu = 2\sqrt{35/12000} + 3.5 \approx 3.608
\end{align*}}
\vspace*{-0.25cm}
\end{itemize}
\end{frame}
\begin{frame}{Statistics}
\begin{itemize}\itemsep3ex
\item<1-> We have finished the first half of the course on {\color{blue} probability}. Now, we move on to {\color{blue} statistics}.
\item<2-> {\color{blue} Statistics} is used to make inductive statements about some phenomenon (coin-flipping, dice rolling) \textbf{after} observing data.
\item<3-> Three main activities of statistics:
\begin{enumerate}\itemsep1ex
\item Estimating numerical values of a parameter or parameters.
\item Assessing accuracy of these estimates.
\item Testing hypotheses about the numerical values of parameters.
\end{enumerate}
\item<4-> Example: Suppose flip a coin 1,000 times and observe 700 heads.
\begin{enumerate}\itemsep1ex
\item How do I estimate the probability $\theta$ of achieving a head?
\item How accurate is my estimate of $\theta$?
\item Is this a fair coin ($\theta = 0.5$)?
\end{enumerate}
\end{itemize}
\end{frame}
\begin{frame}{Estimation of a parameter: Binomial parameter $\theta$}
\begin{itemize}\itemsep3ex
\item<1-> Recall a binomial random variable $X \sim Bin(n, \theta)$. How do we estimate the probability of success $\theta$?
\item<2-> An intuitive estimator for $\theta$ is $\color{blue} p = x/n$, the \textbf{observed} proportion of successes.
\item<3-> Consider the random variable $P$, the proportion of successes \textbf{prior} to performing the experiment.
\begin{itemize}
\item<4-> $Mean(P) = \theta$ so $p$ is ``shooting at the right target". $p$ is then referred to as an \textbf{unbiased} estimate of $\theta$.
\end{itemize}
\item<5-> Difference between estimate and estimator:
\begin{itemize}
\item \textbf{Estimate:} A function of the observed data used to estimate a given parameter. Ex: $p$.
\item \textbf{Estimator:} The random variable whose realization is the estimate. Ex: $P$.
\end{itemize}
\item<6-> To investigate the precision of an estimate, we need to consider the random variable estimator.
\end{itemize}
\end{frame}
\begin{frame}{Precision of an estimate: Binomial parameter $\theta$}
\begin{itemize}\itemsep3ex
\item<1-> Precision of $p$ as an estimate of $\theta$ depends on the variance of random variable $P$.
\item<2-> By the CLT, two standard deviation rule, and approximations (see pg. 40-41), we get the approximate {\color{blue} $95\%$ confidence interval} for $\theta$ as
\[
p \pm 2\sqrt{p(1-p)/n}
\]
\item<3-> We can further approximate the $95\%$ confidence interval with $p(1-p) \leq 1/4$ to get
\begin{equation}
p \pm \sqrt{1/n} \tag{66}
\end{equation}
\item<4-> Correspondingly, the $99\%$ confidence interval is
\[
p \pm 2.576\sqrt{p(1-p)/n} \approx p \pm 1.288\sqrt{1/n}
\]
\end{itemize}
\end{frame}
\begin{frame}{Example}
\begin{itemize}
\setlength{\itemsep}{10pt}
\item<1->[] In the 2017-2018 NBA season, Lebron James shot 531 free throws and made 388. We want to estimate the probability $\theta$ that Lebron James makes a free throw.
\item<2->[Q1:] What is the estimate for $\theta$?
\item<3->[] {\color{red}
\vspace*{-0.5cm}
\[
p = x/n = 388/531 = 0.7307
\]
}
\vspace*{-0.5cm}
\item<4->[Q2:] Calculate the $95\%$ confidence interval for $\theta$ using the approximate $95\%$ interval formula (66):
\item<5->[] {\color{red}
\vspace*{-0.5cm}
\[
p \pm \sqrt{1/n} = 0.7307 \pm \sqrt{1/531} = 0.7307 \pm 0.0434
\]
}
\vspace*{-0.5cm}
\item<6->[Q3:] What is the sample size if we want the width of the confidence interval to be 0.02?
\item<7->[] {\color{red}
\vspace*{-0.5cm}
\begin{align*}
\text{We want } \sqrt{1/n} &= 0.01 = 0.02/2.\\
1/n &= 0.01^2\\
n &= 1/0.01^2 = 10000
\end{align*}
}
\vspace*{-0.5cm}
\end{itemize}
\end{frame}
\end{document}