-
Notifications
You must be signed in to change notification settings - Fork 0
/
ProbabilityModeling.tex
33 lines (26 loc) · 1.24 KB
/
ProbabilityModeling.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
\section*{Probabilistic Modeling}
\subsection*{MLE}
Given a choice of marginal $P(Y|X,\theta)$ take
$\theta^* =\argmax_\theta \prod_{i=1}^n {P_\theta}(y_i|x_i).$
\subsection*{Bayes optimality}
$\argmin_f\E_{x,y}[(y-f(x))^2]=\E[Y\mid X]$\\
$\argmin_f\E_{x,y}[1_{[y\neq f(x)]}]\\=\argmax_yp(Y=y\mid X=x)$
\subsection*{Bias-Variance-noise decomposition}
$\E_{x,y}[(\hat{f}_D(x)- y)^2]\\=
\E_x[\E_D[\hat{f}_D(x)]-f^*(x)]^2\\
\hspace*{0.1mm}+\E_x[\operatorname{var}[\hat{f}_D(x)]]\\
\hspace*{0.1mm}+\E_{x,y}[(y-f^*(x))^2]$ where $f^*=\E[Y\mid X]$.
\subsection*{Logistic regression}
Parametrize $P(y\mid x)$ by $\frac{1}{1+\exp(-y w^T x)}$.\\
MLE is $\operatorname{argmax_w} P(y_{1:n}|w,x_{1:n})\\
= \operatorname{argmin_w} - \sum_{i=1}^n \log P(y_i|w,x_i)\\
= \operatorname{argmin_w} \sum_{i=1}^n \log(1+\exp(-y_i w^T x_i))$
\subsection*{Gradient for logistic regression}
$\ell(w) = \log(1+\exp(-yw^Tx))$\\
$\nabla_w \ell(w) =\frac{-yx}{1+\exp(yw^Tx)}$
\subsection*{Multiclass Logistic Regression}
Parametrize $P(Y=i\mid x)$ by $\frac{\exp(w_i^Tx)}{\sum_j \exp(w_j^Tx)}$.
\subsection*{Kernelized logistic regression}
\mbox{$\min_\alpha\sum_i\log(1+\exp(-y_i\alpha^\top K_i)) + \lambda\alpha^\top K \alpha$}
$\hat{P}(y\mid x)=\frac{1}{1+\exp(-y\sum_i\alpha_ik(x_i,x))}$
\\