-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.tex
382 lines (347 loc) · 21.3 KB
/
main.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
\documentclass[fontsize=6pt, paper=a4]{scrartcl}
%\usepackage{extsizes}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\usepackage[a4paper,top=0.0cm, bottom=0.3cm, left=0.0cm, right=0.7cm]{geometry}
\usepackage{multicol}
\usepackage{mathtools}
\mathtoolsset{showonlyrefs}
\usepackage{xfrac}
\begin{document}
\begin{multicols*}{4}
\tiny{
\section{CI single sample}
\subsection{Scenario 1: CI single small sample}
Let $x_1,x_2,...,x_n$ be iid (independent and identically distributed). $~N(\mu, \sigma^2)$ \underline{where both $\mu$ and $\sigma$ are unknown} and \underline{$n<30$}. Then a $100(1-\alpha)\%$ CI is given by:
\begin{equation}
(L,R)=\overline{x}\pm \\t_{(n-1),\frac{\alpha}{2}}*\frac{s}{\sqrt{n}}
\end{equation}
Conditions Required for a Valid Small-Sample Confidence Interval for $\mu$
1. A random sample is selected from the target population.
2. The population has a relative frequency distribution that is approximately normal.
\subsection{Scenario 2: CI single small sample, $\sigma$ known}
Let $x_1,x_2,...,x_n$ be iid (independent and identically distributed). $~N(\mu, \sigma^2)$ where $\mu$ is unknown and \underline{$n<30$}. \underline{$\sigma$ is known}. Then a $100(1-\alpha)\%$ CI is given by: (Conditions: same as 1.1)
\begin{equation}
(L,R)= \overline{x}\pm Z_{\frac{\alpha}{2}}*\frac{\sigma}{\sqrt{n}}
\end{equation}
\subsection{Scenario 3: CI single sample large}
Let $x_1,x_2,...,x_n$ be iid (independent and identically distributed) with $\mu$ and $\sigma$ unknown. Given \underline{$n \geq 30$}: don't need to assume population is normal since (CLT: central limit theorem). Then a CI for $\mu$ of $100(1-\alpha)$ is given by:
\begin{equation}
(L,R)=\overline{x}\pm Z_{\frac{\alpha}{2}}*\frac{s}{\sqrt{n}}
\end{equation}
Conditions Required for a Valid Large-Sample Confidence Interval for $\mu$
1. A random sample is selected from the target population.
2. The sample size n is large (i.e., $n \geq 30$). (Due to the Central Limit Theorem, this condition guarantees that the sampling distribution of $\overline{x}$ is approximately normal. Also, for large n, s will be a good estimator of $\sigma$.)
\subsection{Scenario 4: CI proportion single sample large}
Let $x_1,x_2,...,x_n$ be iid (independent and identically distributed) Bernoulli r.v. (i.e. with $P(X_i=1)=p$ and $P(X_i=0)=1-p$). (P is what you try to estimate). Suppose if P is unknown, then if $n$ is large enough a $100(1-\alpha)\%$ CI for P is given by:
\begin{align}
\widehat{p}=& \frac{\# \textrm{success in sample}}{n} \\
(L,R)=& \widehat{p} \pm Z_{\frac{\alpha}{2}}*\sqrt{\frac{\widehat{p}(1-\widehat{p})}{n}}
\end{align}
Note: For $n$ to be large enough, the following condition must be satisfied:
\begin{equation}
n\widehat{p} \geq 15 \quad \textrm{and} \quad
n(1-\widehat{p}) \geq 15
\end{equation}
Conditions: Conditions Required for a Valid Large-Sample Confidence Interval for p
1. A random sample is selected from the target population.
2. The sample size n is large. (This condition will be satisfied if both $npn \geq 15$ and $nqn \geq 15$. Note that npn and nqn are simply the number of successes and number of failures, respectively, in the sample.
\subsection{CI interpretation}
\textbf{Practical}: We are are $x\%$ confident that $\mu$, the mean [specify context] in the population is between $(x_1;x_2)$\\
\textbf{Theoretical}: To be more precise, if we were to do this study infinitely many times and each time a $x\%$ confident interval is constructed using the same technique as above, $x\%$ of theses intervals would include the true mean duration [Specify context]
\section{Hypothesis ERRORS}
\textbf{Type 1 Error}: We reject the $H_0$ (the null hypothesis) when it is in fact true. "A Type I error occurs if the researcher rejects the null hypothesis in favor of the alternative hypothesis when, in fact, H0 is true. The probability of committing a Type I error is denoted by $\alpha$."
\textbf{Type 2 Error}: We reject the the $H_a$ when in fact it is true. (i.e. we do not reject $H_0$ (keep it) when when it is in fact false). " A Type II error occurs if the researcher accepts the null hypothesis when, in fact, H0 is false. The probability of committing a Type II error is denoted by $\beta$."
\section{Hypothesis Decision And conclusion}
\textbf{Decision}: Since $1,93 > 1,74$ we reject $H_0$ in favour of $H_a$, at the $\alpha=x$\\
Since $1,93 < 1,74$ we do not reject $H_0$ in favour of $H_a$, at the $\alpha=x$\\
\textbf{Conclusion}: we have evidence to conclude that the true mean in the population [context] is [bigger, smaller, not the same] compared to [context] at the $\alpha=x$ level\\ \\
We do not have enough evidence to reject the null hypothesis that the [context] true mean is $\mu_0$ at the $\alpha=x$ level
\section{Hypothesis Testing single sample}
\subsection{Scenario 1: hypothesis single small}
Suppose that $x_1, x_2,...,x_n$ is a random sample from a normal distribution with unknown $\mu$ and $\sigma$ and \underline{$n<30$}. Given $\alpha$ then:
\begin{align}
H_0:&\mu=\mu_0\\
T=&\frac{\overline{x}-\mu_0}{\sfrac{s}{\sqrt{n}}}
\end{align}
$T$ will have a t-distribution with $(n-1)$ degrees of freedom. The rejection region (RR) depends of $H_a$.
\begin{align}
&H_0:\mu=\mu_0 \quad H_a:\mu >\mu_0\\
&RR= \{ T> t_{(n-1), \alpha}\}\\ \\
&H_0:\mu=\mu_0 \quad H_a:\mu <\mu_0\\
&RR= \{ T< -t_{(n-1), \alpha}\}\\ \\
&H_0:\mu=\mu_0 \quad H_a:\mu \neq \mu_0\\
&RR= \{ T> t_{(n-1), \frac{\alpha}{2}} \quad \textrm{OR} \quad T<- t_{(n-1), \frac{\alpha}{2}}\} \\
&RR= \{ \left|T\right|> t_{(n-1), \frac{\alpha}{2}}\}
\end{align}
Conditions Required for a Valid Small-Sample Hypothesis Test for $\mu$
1. A random sample is selected from the target population.
2. The population from which the sample is selected has a distribution that is approximately normal.
\subsection{Scenario 2:hypothesis single large}
Suppose that $x_1, x_2,...,x_n$ in a random sample (iid) with unknown $\mu$ and $\sigma$ and \underline{$n \geq 30$}. Given $\alpha$ (conditions same as 1.3):
\begin{align}
H_0:&\mu=\mu_0\\
T=&\frac{\overline{x}-\mu_0}{\sfrac{s}{\sqrt{n}}}
\end{align}
Since $n \geq 30$, by central limit theorem. $T$ is approximately normal:
\begin{align}
&H_0:\mu=\mu_0 \quad H_a:\mu >\mu_0\\
&RR= \{ T> Z_\alpha \}\\ \\
&H_0:\mu=\mu_0 \quad H_a:\mu <\mu_0\\
&RR= \{ T< -Z_\alpha \}\\ \\
&H_0:\mu=\mu_0 \quad H_a:\mu \neq \mu_0\\
&RR= \{ T> Z_{\frac{\alpha}{2}} \quad \textrm{OR} \quad T<- Z_{\frac{\alpha}{2}} \} \\
&RR= \{ \left|T\right|> Z_{\frac{\alpha}{2}}\}
\end{align}
\subsection{Scenario 3: hypothesis single large proportions}
Let $x_1,x_2,...,x_n$ be a random sample (iid) of Bernoulli r.v with unknown $p$ (probability of success), where $n$ is large enough [i.e. $n\widehat{p} \geq 15 \quad \textrm{and} \quad
n(1-\widehat{p}) \geq 15$]. Given $\alpha$:
\begin{align}
&H_0:P=P_0\\
&\widehat{p}=\frac{\# \textrm{of success in sample}}{n}\\
&T=\frac{\widehat{p}-p_0}{\sqrt{\frac{P_0(1-P_0)}{n}}}
\end{align}
\begin{align}
&H_0:p=p_0 \quad H_a:p >p_0\\
&RR= \{ T> Z_\alpha \}\\ \\
&H_0:p=p_0 \quad H_a:p <p_0\\
&RR= \{ T< -Z_\alpha \}\\ \\
&H_0:p=p_0 \quad H_a:p \neq p_0\\
&RR= \{ T> Z_{\frac{\alpha}{2}} \quad \textrm{OR} \quad T<- Z_{\frac{\alpha}{2}} \} \\
&RR= \{ \left|T\right|> Z_{\frac{\alpha}{2}}\}
\end{align}
Conditions Required for a Valid Large-Sample Hypothesis Test for p
1. A random sample is selected from a binomial population.
2. The sample size n is large. (This condition will be satisfied if both $np\geq 15$ and $nq \geq 15$.)
\section{Two Sample Problems}
\subsection{Scenario 1: TWO INDP; SMALL}
Let $x_1,x_2,...,x_n$ be a random sample from a normal distribution with unknowns $\mu_1$ and $\sigma_1$.
Let $y_1,y_2,...,y_n$ be a random sample from a normal distribution with unknowns $\mu_2$ and $\sigma_2$. If \underline{$n<30$ and $m<30$}. We assume that both samples are \underline{normally} distributed and are \underline{independent} of one another.
Further suppose that $\sigma_1=\sigma_2$. A $100(1-\alpha)\%$ is given by:
\begin{align}
&(L,R)=(\overline{x}_1-\overline{x}_2)\pm t_{(m+n-2), \frac{\alpha}{2}}*S_p*\sqrt{\frac{1}{m}+\frac{1}{n}}\\
&{S_p}=\sqrt{\frac{(m-1){s_1}^2+(n-1){s_2}^2}{m+n-2}}
\end{align}
\begin{align}
&H_0:\mu_1-\mu_2=0 \quad \textrm{OR} \quad H_0:\mu_1=\mu_2\\
&T=\frac{\overline{x}_1-\overline{x}_2}{s_p*\sqrt{\frac{1}{m}+\frac{1}{n}}}\\ \\
&H_a:\mu_1-\mu_2>0\\
&RR=\{T \geq t_{(m+n-2),\alpha}\} \\ \\
&H_a:\mu_1-\mu_2<0\\
&RR=\{T \leq -t_{(m+n-2),\alpha}\} \\ \\
&H_a:\mu_1-\mu_2>0\\
&RR=\{T \geq t_{(m+n-2),\frac{\alpha}{2}} \quad\textrm{OR}\quad T \leq -t_{(m+n-2),\frac{\alpha}{2}}\}\\
&RR=\{\left|T\right|\geq t_{(m+n-2),\frac{\alpha}{2}}\}
\end{align}
Conditions Required for Valid Small-Sample Inferences about $\mu_1-\mu_2$
1. The two samples are randomly selected in a independent manner from the two
target populations.
2. Both sampled populations have distributions that are approximately normal. 3. The population variances are equal (i.e., $\sigma_1^2=\sigma_2^2$).
\subsection{Scenario 2: TWO INDP LARGE}
Suppose $x_1,x_2,...,x_n$ with unknown $\mu_1$ and $\sigma_1$ and $y_1,y_2,...,y_n$ with unknown $\mu_2$ and $\sigma_2$. Furthermore, if \underline{$m \geq 30$ and $n \geq 30$} and the $x$'s are \underline{independent} of the $y$'s. A $100(1-\alpha)\%$ CI for $(\mu_1-\mu_2)$ is given by:
\begin{equation}
(L,R)=(\overline{x}_1-\overline{x}_2)\pm Z_{\frac{\alpha}{2}}*\sqrt{\frac{{s_1}^2}{m}+\frac{{s_2}^2}{n}}
\end{equation}
\begin{align}
&H_0:\mu_1-\mu_2=0\\
&T=\frac{\overline{x_1}-\overline{x_2}}{\sqrt{\frac{{s_1}^2}{m}+\frac{{s_1}^2}{n}}}\\
&H_0:\mu_1-\mu_2=0 \quad H_a:\mu_1-\mu_2>0\\
&RR= \{ T> Z_\alpha \}\\ \\
&H_0:\mu_1-\mu_2=0 \quad H_a:\mu_1-\mu_2<0\\
&RR= \{ T< -Z_\alpha \}\\ \\
&H_0:\mu_1-\mu_2=0 \quad H_a:\mu_1 -\mu_2\neq 0\\
&RR= \{ T> Z_{\frac{\alpha}{2}} \quad \textrm{OR} \quad T<- Z_{\frac{\alpha}{2}} \} \\
&RR= \{ \left|T\right|> Z_{\frac{\alpha}{2}}\}
\end{align}
Conditions Required for Valid Large-Sample Inferences about $\mu_1-\mu_2$
1. The two samples are randomly selected in an independent manner from the two
target populations.
2. The sample sizes,$n_1$ and $n_2$ ,are both large(i.e.,$n_1 \geq 30$ and $n_2 \geq 30$). (By the central limit theorem, this condition guarantees that the sampling distribution of ($\overline{x}_1$ and $\overline{x}_2$) will be approximately normal, regardless of the shapes of the underlying probability distributions of the populations. Also $s_1^2$ and $s_2^2$ will provide good approximations to $\sigma_1^2$ and $\sigma_2^2$)
\subsection{Scenario 3: TWO PAIR DEP SMALL}
Let $x_1,x_2,...,x_n$ and $y_1,y_2,...,y_n$ be paired or matched observations from distributions with unknown $\mu_1$ and $\mu_2$ respectively, and $n<30$. Let $D_i=x_i-y_i$ ($i=1,2,...$). So $D_1, D_2,...D_n$ is a random sample from a normal distribution, with unknown $\mu_d=\mu_1-\mu_2$ and variance ${\sigma_d}^2$ (variance of population of differences). A $100(1-\alpha)\%$ CI for $\mu_d$ is given by:
\begin{equation}
(L,R)=\overline{D}\pm t_{(n-1), \frac{\alpha}{2}}*\frac{s_d}{\sqrt{n}}
\end{equation}
\begin{align}
&H_0:\mu_1=\mu_2 \quad \textrm{OR} \quad H_0:\mu_d=0\\
&T=\frac{\overline{D}}{\sfrac{s_d}{\sqrt{n}}}\\
&H_0:\mu_d=0 \quad H_a:\mu_d >0\\
&RR= \{ T> t_{(n-1), \alpha}\}\\ \\
&H_0:\mu_d=0 \quad H_a:\mu_d <0\\
&RR= \{ T< -t_{(n-1), \alpha}\}\\ \\
&H_0:\mu_d=0 \quad H_a:\mu_d \neq 0\\
&RR= \{ T> t_{(n-1), \frac{\alpha}{2}} \quad \textrm{OR} \quad T<- t_{(n-1), \frac{\alpha}{2}}\} \\
&RR= \{ \left|T\right|> t_{(n-1), \frac{\alpha}{2}}\}
\end{align}
Conditions Required for Valid Small-Sample Inferences about $\mu_d$
1. A random sample of differences is selected from the target population of
differences.
2. The population of differences has a distribution that is approximately normal.
\subsection{Scenario 4: TWO PAIR DEP LARGE}
Let $x_1,x_2,...,x_n$ and $y_1,y_2,...,y_n$ be paired or matched observations from distributions with unknown $\mu_1$ and $\mu_2$ respectively, and $n\geq30$. Let $D_i=x_i-y_i$ ($i=1,2,...$). So $D_1, D_2,...D_n$ is a random sample with unknown $\mu_d=\mu_1-\mu_2$ and SD ${\sigma_d}$ (Normality not required). A $100(1-\alpha)\%$ CI for $\mu_d=\mu_1-\mu_2$ is given by:
\begin{equation}
(L,R)=\overline{D}\pm Z_{\frac{\alpha}{2}}*\frac{s_d}{\sqrt{n}}
\end{equation}
\begin{align}
&H_0:\mu_1=\mu_2 \quad \textrm{OR} \quad H_0:\mu_d=0\\
&T=\frac{\overline{D}}{\sfrac{s_d}{\sqrt{n}}}\\
&H_a:\mu_1>\mu_2 \quad H_a:\mu_d>0\\
&RR= \{ T> Z_\alpha \}\\ \\
&H_a:\mu_1<\mu_2 \quad H_a:\mu_d<0\\
&RR= \{ T< -Z_\alpha \}\\ \\
&H_a:\mu_1\neq \mu_2 \quad H_a:\mu_d\neq 0\\
&RR= \{ T> Z_{\frac{\alpha}{2}} \quad \textrm{OR} \quad T<- Z_{\frac{\alpha}{2}} \} \\
&RR= \{ \left|T\right|> Z_{\frac{\alpha}{2}}\}
\end{align}
Conditions Required for Valid Large-Sample Inferences about $\mu_d$
1. A random sample of differences is selected from the target population of
differences.
2. The sample size nd is large (i.e., $nd \geq 30$).(by the CLT...)
\subsection{Scenario 5: TWO PROPORTION LARGE INDP}
Let $x_1,x_2,...,x_m$ be random sample of bernoulli random variable with unknown probability of success $p_1$ and let $y_1,y_2,...,y_n$ be a random sample of bernoulli r.v. with unknown probability of success $p_2$. Further, suppose that the $X_i$'s are independent of the $y_i$'s and that both sample sizes are \underline{large enough}:$n\widehat{p}_1 \geq 15 \quad \textrm{and} \quad
n(1-\widehat{p}_1) \geq 15 \quad \textrm{and} \quad n\widehat{p}_2 \geq 15 \quad \textrm{and} \quad
n(1-\widehat{p}_2) \geq 15$. A $100(1-\alpha)\%$ CI for $(p_1-p_2)$ is given by:
\begin{equation}
(L,R)=(\widehat{P}_1-\widehat{P}_2)\pm Z_{\frac{\alpha}{2}}\sqrt{\frac{\widehat{P}_1(1-\widehat{P}_1)}{m}+\frac{\widehat{P}_2(1-\widehat{P}_2)}{n}}
\end{equation}
\begin{align}
&H_0:P_1=P_2 \quad \textrm{OR} \quad H_0:P_1-P_2=0\\
&T=\frac{\widehat{P}_1-\widehat{P}_2}{\sqrt{\widehat{P}(1-\widehat{P})[\frac{1}{m}+\frac{1}{n}]}}\\
&\widehat{P}=\frac{X+Y}{m+n} \quad \textrm{X,Y success in populations}\\
&H_0:P_1=P_2 \quad H_a:p_1 >p_2\\
&RR= \{ T> Z_\alpha \}\\ \\
&H_0:P_1=P_2 \quad H_a:p_1 <p_2\\
&RR= \{ T< -Z_\alpha \}\\ \\
&H_0:P_1=P_2 \quad H_a:p_1 \neq p_2\\
&RR= \{ T> Z_{\frac{\alpha}{2}} \quad \textrm{OR} \quad T<- Z_{\frac{\alpha}{2}} \} \\
&RR= \{ \left|T\right|> Z_{\frac{\alpha}{2}}\}
\end{align}
Conditions Required for Valid Large-Sample Inferences about $p_1-p_2$
1. The two samples are randomly selected in an independent manner from the two
target populations.
2. The sample sizes, $n_1$ and $n_2$, are both large, so the sampling distribution of ($\widehat{p}_1-\widehat{p}_2$) will be approximately normal. (cond. be satisfied if $\geq$)
\section{P-Values}
The observed significance level, or p-value, for a specific statistical test is the probability (assuming H0 is true) of observing a value of the test statistic that is at least as contradictory to the null hypothesis, and supportive of the alternative hypothesis, as the actual one computed from the sample data.
\begin{align}
&\underline{H_a:\mu > \mu_0}\\
&p=p(z\geq t_{obs})\\
&p=p(t_{\nu}\geq t_{obs})\\
&\underline{H_a:\mu < \mu_0}\\
&p=p(z\leq t_{obs})\\
&p=p(t_{\nu}\leq t_{obs})\\
&\underline{H_a:\mu \neq \mu_0}\\
&p=2*p(z\geq |t_{obs}|)\\
&p=2*p(t_{\nu}\geq |t_{obs}|)\\
\end{align}
if $p<\alpha$ we reject $H_0$. If $p> \alpha$ we do not reject $H_0$.
Interpretation:since p-value is not small ($p \quad\textrm{not}\leq \alpha$ for any reasonable choice of $\alpha$), there is no evidence to reject $h_0$ for any reasonable value of $\alpha$
\section{Discrete Distributions}
\subsection{Bernoulli Distribution}
A random variable X is said to have a bernoulli distribution with paramater p ($0\leq p \leq 1$) if ($P(x=1)=p$ and $P(x=0)=(1-p)$).
\begin{align}
&E(x)=p\\
&VAR(x)=p(1-p)\\
&SD(x)=\sqrt{p(1-p)}
\end{align}
\subsection{Binomial Setup}
Characteristics of a Binomial Random Variable
1. The experiment consists of $n$ identical trials.
2. There are only two possible outcomes on each trial. We will denote one outcome by S (for Success) and the other by F (for Failure).
3. The probability of S remains the same from trial to trial. This probability is denoted by p, and the probability of F is denoted by $q = 1 - p$.
4. The trials are independent.
5. The binomial random variable $x$ is the number of S’s in n trials.
\begin{equation}
p(X=x)= {n\choose x}*p^x*q^{n-x}
\end{equation}
p is prob a success in one trial; q is $(1-p)$; n is number of trials; x is number of success in n trials.
\begin{align}
&E(x)=np\\
&VAR(x)=np(1-p)\\
&SD(x)=\sqrt{np(1-p)}
\end{align}
Interpretation of $E(x)$: we expect that on average $expectationValue$ [context]
}
\section{basic}
\subsection{types of stats}
\textbf{Descriptive statistics} utilizes numerical and graphical methods to look for patterns in a data set, to summarize the information revealed in a data set, and to present that information in a convenient form.
\textbf{Inferential statistics} utilizes sample data to make estimates, decisions, predictions, or other generalizations about a larger set of data.
\subsection{collection methods}
\textbf{A designed experiment} is a data collection method where the researcher exerts full control over the characteristics of the experimental units sampled. These experi- ments typically involve a group of experimental units that are assigned the treatment and an untreated (or control) group.
\textbf{An observational study} is a data collection method where the experimental units sampled are observed in their natural setting. No attempt is made to control the characteristics of the experimental units sampled. (Examples include opinion polls and surveys.)
\underline{sample mean}
\begin{equation}
\overline{x}=\sum_{i=1}^n x_i/n
\end{equation}
\subsection{Median}
arrange the n measurements from smallest to largest. 1. if n is odd, M is the middle number ($(i+1)/2$). 2. if n is even, M is the mean of the middle two numbers ($(i/2+(i/2+1))/2$)
\subsection{Skewed data}
\textbf{right skewed}:$Median < mean$. \textbf{left skewed}: $mean < median$. \textbf{symmetric}: $mean=median$
\subsection{mode}
The mode is the measurement that occurs most frequently in the data set.
\subsection{range}
The range of a quantitative data set is equal to the largest measurement minus the smallest measurement.
\subsection{sample variance}
\begin{align}
&s^2=\frac{\sum_{i=1}^n (x_i-\overline{x})^2}{n-1}\\
&s^2=\frac{\sum_{i=1}^n {x_i}^2-\frac{\bigg(\sum_{i=1}^n {x_i}\bigg)^2}{n}}{n-1}
\end{align}
\subsection{Percentile}
For any set of n measurements (arranged in ascending or descending order), the pth percentile is a number such that $p\%$ of the measurements fall below that number and $(100 - p)\%$ fall above it.
\subsection{Quartiles}
The lower quartile ($Q_L$) is the 25th percentile of a data set. The middle quartile (M) is the median or 50th percentile. The upper quartile ($Q_U$) is the 75th percentile. \\ The interquartile range (IQR) is the distance between the lower and upper quartiles: $IQR=Q_U -Q_L $\\inner fences and outer fences, are used. Neither set of fences actually appears on the plot. Inner fences are located at a distance of 1.5(IQR) from the hinges. Emanating from the hinges of the box are vertical lines called the whiskers. The two whiskers extend to the most extreme observation inside the inner fences. \textbf{outer fences are same but 3IQR}
\begin{align}
&(\textrm{lower inner fence})= \textrm{lower hinge}-1.5(IQR)\\
&(\textrm{upper inner fence})= \textrm{upper hinge}+1.5(IQR)
\end{align}
\subsection{Z score}
if $z>3$ it is an outlier. $z>2$ possible outlier
\begin{equation}
z=\frac{x-\overline{x}}{s} \leftrightarrow \frac{x-\mu}{\sigma}
\end{equation}
\section{probability}
\subsection{rules}
Probability Rules for Sample Points
Let pi represent the probability of sample point i. Then
1. All sample point probabilities must lie between 0 and 1 (i.e., 0 ... pi ... 1).
2. The probabilities of all the sample points within a sample space must sum to 1 (= 1).
\subsection{complement}
\begin{equation}
P(a)+P(a^c)=1
\end{equation}
\subsection{Additive Rule of Probability}
\begin{equation}
P(AuB) = P(A) + P(B) - P(A nB)
\end{equation}
\subsection{mutually exclusive}
Events A and B are mutually exclusive if $AnB$ contains no sample points—that is, if A and B have no sample points in common. For mutually exclusive events:
\begin{align}
&P(AnB)=0\\
&P(AuB)=P(A)+P(B)
\end{align}
\subsection{conditional probability}
\begin{align}
&P(A|B)=\frac{P(AnB)}{P(B)}=\frac{P(B|A)P(A)}{P(B)}\\
&P(AnB)=P(B|A)P(A) \leftrightarrow P(A|B)P(B)
\end{align}
\subsection{Independent events}
\begin{align}
&P(A|B)=P(A)\\
&P(AnB)=P(A)P(B)
\end{align}
\subsection{multiplicative rule prob}
You have k sets of elements, n1 in the first set, n2 in the second set, ..., and nk in the kth set. Suppose you wish to form a sample of k elements by taking one element from each of the k sets. Then the number of different samples that can be formed is the product.
\subsection{combination rule}
Combinations rule. If you are drawing n elements from a set of N elements with-
out regard to the order of the n elements, then the number of different results is $Ncn$
\subsection{discrete RV distribution rule}
Requirements for the Probability Distribution of a Discrete Random Variable x 1. $P(x)\geq 0$ for all values of x.
2. $\sum p(x) = 1$
where the summation of p1x2 is over all possible values of x
\subsection{Expected value discrete}
\begin{align}
&\mu=E(x)=\sum x P(x)\\
&VAR(x)=E((x-\mu)^2)=\sum (x-\mu)^2P(x)=\sum x^2P(x)-(E(x))^2\\
&STD(X)=\sqrt{VAR(x)}
\end{align}
\end{multicols*}
\end{document}