Organized algorithms and diagrams, added README and PDF

mossr · May 1, 2021 · 53e9425 · 53e9425
1 parent e2226f3
commit 53e9425
Show file tree

Hide file tree

Showing 32 changed files with 8,137 additions and 8,124 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,10 @@
+# Algorithms for efficient validation of black-box systems
+> Thesis for the degree of Master of Science with a Distinction in Research.
+>
+> Computer Science, Stanford University.
+>
+> Robert Moss, 2021.
+
+See [moss-mscs-thesis.pdf](./moss-mscs-thesis.pdf)
+
+To compile, run `latexmk`
diff --git a/algorithms/ce-mixture-fit.tex b/algorithms/ce-mixture-fit.tex
@@ -0,0 +1,10 @@
+\begin{algorithm}[ht]
+  \begin{algorithmic}
+  \Function{Fit}{$\M, \m, \bfE$}
+    \State $\M \leftarrow \operatorname{Mixture}( \m )$
+    \State $\mathbf{\hat{\vec{\theta}}} \leftarrow \textproc{ExpectationMaximization}(\M, \bfE)$
+    \State \Return $\M(\;\cdot\;; \mathbf{\hat{\vec{\theta}}})$
+  \EndFunction
+  \end{algorithmic}
+  \caption{\label{alg:ce_mixture_fit} Fitting mixture models (used by CE-mixture).}
+\end{algorithm}
diff --git a/algorithms/ce-surrogate.jl.tex b/algorithms/ce-surrogate.jl.tex
@@ -0,0 +1,13 @@
+\begin{lstlisting}[language=JuliaLocal]
+function ce_surrogate(S, 𝐌; m, m_elite, k_max)
+    for k in 1:k_max
+        mₑ, m_elite = evaluation_schedule(k, k_max) # number of evaluations from a schedule
+        𝐗 = rand(𝐌, mₑ) # draw mₑ samples from 𝐌
+        𝐘 = map(S, eachcol(𝐗)) # evaluate samples 𝐗 using true objective S
+        𝐞 = 𝐗[:, sortperm(𝐘)[1:m_elite]] # select elite samples output from true objective
+        𝐄 = model_elite_set!(𝐗, 𝐘, 𝐌, 𝐞, m, m_elite) # find model-elites using a surrogate
+        𝐌 = fit(𝐌, 𝐄) # re-fit distribution 𝐌 using model-elite samples
+    end
+    return 𝐌
+end
+\end{lstlisting}
diff --git a/algorithms/ce-surrogate.tex b/algorithms/ce-surrogate.tex
@@ -1,13 +1,16 @@
-\begin{lstlisting}[language=JuliaLocal]
-function ce_surrogate(S, 𝐌; m, m_elite, k_max)
-    for k in 1:k_max
-        mₑ, m_elite = evaluation_schedule(k, k_max) # number of evaluations from a schedule
-        𝐗 = rand(𝐌, mₑ) # draw mₑ samples from 𝐌
-        𝐘 = map(S, eachcol(𝐗)) # evaluate samples 𝐗 using true objective S
-        𝐞 = 𝐗[:, sortperm(𝐘)[1:m_elite]] # select elite samples output from true objective
-        𝐄 = model_elite_set!(𝐗, 𝐘, 𝐌, 𝐞, m, m_elite) # find model-elites using a surrogate
-        𝐌 = fit(𝐌, 𝐄) # re-fit distribution 𝐌 using model-elite samples
-    end
-    return 𝐌
-end
-\end{lstlisting}
+\begin{algorithm}[ht]
+  \begin{algorithmic}
+  \Function{CE-Surrogate}{$S$, $\M$, $m$, $m_\text{elite}$, $k_\text{max}$}
+    \For {$k \in [1,\ldots,k_\text{max}]$}
+        \State $m, m_\text{elite} \leftarrow \textproc{EvaluationSchedule}(k, k_\text{max})$ \algorithmiccomment{number of evaluations from a schedule}
+        \State $\mat{X} \sim \M(\;\cdot\;; \vec{\theta}_k)$ where $\mat{X} \in \R^{|\M| \times m}$ \algorithmiccomment{draw $m$ samples from $\M$}
+        \State $\mat{Y} \leftarrow S(\vec{x})$ for $\vec{x} \in \mat{X}$ \algorithmiccomment{evaluate samples $\mat{X}$ using true objective $S$}
+        \State $\e \leftarrow$ store top $m_\text{elite}$ from $\mat{Y}$ \algorithmiccomment{select elite samples output from true objective}
+        \State $\bfE \leftarrow \textproc{ModelEliteSet}(\mat{X}, \mat{Y}, \M, \e, m, m_\text{elite})$ \algorithmiccomment{find model-elites using a surrogate model}
+        \State $\vec{\theta}_{k^\prime} \leftarrow \textproc{Fit}(\M(\;\cdot\;; \vec{\theta}_k), \bfE)$ \algorithmiccomment{re-fit distribution $\M$ using model-elite samples}
+    \EndFor
+    \State \Return $\M(\;\cdot\;; \vec{\theta}_{k_\text{max}})$
+  \EndFunction
+  \end{algorithmic}
+  \caption{\label{alg:ce_surrogate} Cross-entropy surrogate method.}
+\end{algorithm}
diff --git a/algorithms/cem-variants-functions.tex → algorithms/cem-variants-functions.jl.tex b/algorithms/cem-variants-functions.tex → algorithms/cem-variants-functions.jl.tex
diff --git a/algorithms/cem-variants-usage-basis.tex → algorithms/cem-variants-usage-basis.jl.tex b/algorithms/cem-variants-usage-basis.tex → algorithms/cem-variants-usage-basis.jl.tex
diff --git a/algorithms/cem-variants-usage.tex → algorithms/cem-variants-usage.jl.tex b/algorithms/cem-variants-usage.tex → algorithms/cem-variants-usage.jl.tex
diff --git a/algorithms/cross-entropy-method.tex b/algorithms/cross-entropy-method.tex
@@ -0,0 +1,14 @@
+\begin{algorithm}[ht]
+  \begin{algorithmic}
+  \Function{CrossEntropyMethod}{}($S, g, m, m_\text{elite}, k_\text{max}$)
+    \For {$k \in [1,\ldots,k_\text{max}]$}
+        \State $\mat{X} \sim g(\;\cdot\;; \vec{\theta}_k)$ where $\mat{X} \in \R^{|g|\times m}$\algorithmiccomment{draw $m$ samples from $g$}
+        \State $\mat{Y} \leftarrow S(\vec{x})$ for $\vec{x} \in \mat{X}$ \algorithmiccomment{evaluate samples $\mat{X}$ using objective $S$}
+        \State $\e \leftarrow$ store top $m_\text{elite}$ from $\mat{Y}$ \algorithmiccomment{select elite samples output from objective}
+        \State $\vec{\theta}_{k^\prime} \leftarrow \textproc{Fit}(g(\;\cdot\;; \vec{\theta}_k), \e)$ \algorithmiccomment{re-fit distribution $g$ using elite samples}
+    \EndFor
+    \State \Return $g(\;\cdot\;; \vec{\theta}_{k_\text{max}})$
+  \EndFunction
+  \end{algorithmic}
+  \caption{\label{alg:cem} Cross-entropy method.}
+\end{algorithm}
diff --git a/algorithms/evaluation-schedule.tex b/algorithms/evaluation-schedule.tex
@@ -0,0 +1,16 @@
+\begin{algorithm}[ht]
+  \begin{algorithmic}
+  \Function{EvaluationSchedule}{$k, k_\text{max}$}
+    \State $G \sim \Geo(p)$
+    \State $N_\text{max} \leftarrow k_\text{max} \cdot m$
+    \State $m \leftarrow \round{N_\text{max} \cdot p_G(k)}$
+    \If{$k = k_\text{max}$}
+        \State $s \leftarrow \displaystyle\sum_{i=1}^{k_\text{max}-1} \round{N_\text{max} \cdot p_G(i)}$
+        \State $m \leftarrow \min(N_\text{max} - s, N_\text{max} - m)$
+    \EndIf
+    \State $m_\text{elite} \leftarrow \min(m_\text{elite}, m)$
+    \State \Return ($m, m_\text{elite}$) 
+  \EndFunction
+  \end{algorithmic}
+  \caption{\label{alg:evaluation_schedule} Evaluation schedule using a Geometric distribution.}
+\end{algorithm}
diff --git a/algorithms/model-elite-set.tex b/algorithms/model-elite-set.tex
@@ -0,0 +1,15 @@
+\begin{algorithm}[ht]
+  \begin{algorithmic}
+  \Function{ModelEliteSet}{$\mat{X}, \mat{Y}, \M, \e, m, m_\text{elite}$}
+    % Fit to entire population!
+    \State $\surrogate \leftarrow \textproc{GaussianProcess}(\mat{X}, \mat{Y}, \text{kernel}, \text{optimizer})$ \algorithmiccomment{fit a surrogate model to the samples} % Squared exponential, NelderMead
+    \State $\mat{X}_\text{m} \sim \M(\;\cdot\;; \vec{\theta}_k)$ where $\mat{X}_\text{m} \in \R^{|\M| \times {10m}}$ \algorithmiccomment{draw $10m$ samples from $\M$}
+    \State $\mathbf{\hat{\mat{Y}}}_\text{m} \leftarrow \surrogate(\vec{x}_\text{m})$ for $\vec{x}_\text{m} \in \mat{X}_\text{m}$ \algorithmiccomment{evaluate samples $\mat{X}_\text{m}$ using surrogate objective $\surrogate$}
+    \State $\e_\text{model} \leftarrow$ store top $10m_\text{elite}$ from $\mathbf{\hat{\mat{Y}}}_\text{m}$ \algorithmiccomment{select model-elite samples from surrogate objective}
+    \State $\e_\text{sub} \leftarrow \textproc{SubEliteSet}(\surrogate, \M, \e)$ \algorithmiccomment{generate sub-elite samples using surrogate $\surrogate$}
+    \State $\bfE \leftarrow \{ \e \} \cup \{ \e_\text{model} \} \cup \{ \e_\text{sub} \}$ \algorithmiccomment{combine all elite samples into an elite set}
+    \State \Return $\bfE$
+  \EndFunction
+  \end{algorithmic}
+  \caption{\label{alg:model_elite_set} Modeling elite set using a surrogate objective.}
+\end{algorithm}
diff --git a/...rithms/pomdp-stress-testing-interface.tex → ...hms/pomdp-stress-testing-interface.jl.tex b/...rithms/pomdp-stress-testing-interface.tex → ...hms/pomdp-stress-testing-interface.jl.tex
diff --git a/algorithms/sub-elite-set.tex b/algorithms/sub-elite-set.tex
@@ -0,0 +1,14 @@
+\begin{algorithm}[ht]
+  \begin{algorithmic}
+  \Function{SubEliteSet}{$\surrogate, \M, \e$}
+    \State $\e_\text{sub} \leftarrow \emptyset$
+    \State $\m \leftarrow \{ e_x \in \e \mid \Normal(e_x, \M.\Sigma) \}$ \algorithmiccomment{create set of distributions centered at each true-elite sample}
+    \For {$\m_i \in \m$}
+        \State $\m_i \leftarrow \textproc{CrossEntropyMethod}(\surrogate, \m_i ; \theta_{\text{CE}})$ \algorithmiccomment{run CE-method over each new distribution}
+        \State $\e_\text{sub} \leftarrow \{\e_\text{sub}\} \cup \{\textproc{Best}(\m_i)\}$ \algorithmiccomment{append best result into the sub-elite set}
+    \EndFor
+    \State \Return $\e_\text{sub}$
+  \EndFunction
+  \end{algorithmic}
+  \caption{\label{alg:sub_elite_set} Subcomponent elite set.}
+\end{algorithm}
diff --git a/appendices/episodic_ast_appendix.tex b/appendices/episodic_ast_appendix.tex
@@ -7,7 +7,7 @@ \section{Tangency Kinks}
 
 \begin{figure}[!ht]
 \centering
-\resizebox{0.5\columnwidth}{!}{\input{diagrams/tangent-angle.tex}}
+\resizebox{0.5\columnwidth}{!}{\input{diagrams/episodic_ast/tangent-angle.tex}}
 \caption{Tangency kink failure event and miss distance.}
 \label{fig:tangency_kink}
 \end{figure}
@@ -24,7 +24,7 @@ \section{Disconnections}
 
 \begin{figure}[!ht]
 \centering
-\resizebox{0.6\columnwidth}{!}{\input{diagrams/disconnection.tex}}
+\resizebox{0.6\columnwidth}{!}{\input{diagrams/episodic_ast/disconnection.tex}}
 \caption{Disconnected failure event and miss distance.}
 \label{fig:disconnection}
 \end{figure}
@@ -40,7 +40,7 @@ \section{Course Directions}
 
 \begin{figure}[!ht]
 \centering
-\resizebox{0.6\columnwidth}{!}{\input{diagrams/course-direction.tex}}
+\resizebox{0.6\columnwidth}{!}{\input{diagrams/episodic_ast/course-direction.tex}}
 \caption{Course direction failure event and miss distance.}
 \label{fig:course_direction}
 \end{figure}

diff --git a/chapters/cem_variants.tex b/chapters/cem_variants.tex
@@ -120,20 +120,7 @@ \subsection{Cross-Entropy Method} \label{sec:cem_background_cem}
 Note that a variety of input distributions for $g$ are supported, but we focus on the multivariate Gaussian distribution and the Gaussian mixture model in this work.
 
 % CE-method
-\begin{algorithm}[ht]
-  \begin{algorithmic}
-  \Function{CrossEntropyMethod}{}($S, g, m, m_\text{elite}, k_\text{max}$)
-    \For {$k \in [1,\ldots,k_\text{max}]$}
-        \State $\mat{X} \sim g(\;\cdot\;; \vec{\theta}_k)$ where $\mat{X} \in \R^{|g|\times m}$\algorithmiccomment{draw $m$ samples from $g$}
-        \State $\mat{Y} \leftarrow S(\vec{x})$ for $\vec{x} \in \mat{X}$ \algorithmiccomment{evaluate samples $\mat{X}$ using objective $S$}
-        \State $\e \leftarrow$ store top $m_\text{elite}$ from $\mat{Y}$ \algorithmiccomment{select elite samples output from objective}
-        \State $\vec{\theta}_{k^\prime} \leftarrow \textproc{Fit}(g(\;\cdot\;; \vec{\theta}_k), \e)$ \algorithmiccomment{re-fit distribution $g$ using elite samples}
-    \EndFor
-    \State \Return $g(\;\cdot\;; \vec{\theta}_{k_\text{max}})$
-  \EndFunction
-  \end{algorithmic}
-  \caption{\label{alg:cem} Cross-entropy method.}
-\end{algorithm}
+\input{algorithms/cross-entropy-method}
 
 
 \subsection{Mixture Models}
@@ -206,22 +193,7 @@ \subsection{Cross-Entropy Surrogate Method} \label{sec:cem_alg_ce_surrogate}
 \vspace{5mm} % NOTE. To complement the \newpage below.
 
 % CE-surrogate
-\begin{algorithm}[ht]
-  \begin{algorithmic}
-  \Function{CE-Surrogate}{$S$, $\M$, $m$, $m_\text{elite}$, $k_\text{max}$}
-    \For {$k \in [1,\ldots,k_\text{max}]$}
-        \State $m, m_\text{elite} \leftarrow \textproc{EvaluationSchedule}(k, k_\text{max})$ \algorithmiccomment{number of evaluations from a schedule}
-        \State $\mat{X} \sim \M(\;\cdot\;; \vec{\theta}_k)$ where $\mat{X} \in \R^{|\M| \times m}$ \algorithmiccomment{draw $m$ samples from $\M$}
-        \State $\mat{Y} \leftarrow S(\vec{x})$ for $\vec{x} \in \mat{X}$ \algorithmiccomment{evaluate samples $\mat{X}$ using true objective $S$}
-        \State $\e \leftarrow$ store top $m_\text{elite}$ from $\mat{Y}$ \algorithmiccomment{select elite samples output from true objective}
-        \State $\bfE \leftarrow \textproc{ModelEliteSet}(\mat{X}, \mat{Y}, \M, \e, m, m_\text{elite})$ \algorithmiccomment{find model-elites using a surrogate model}
-        \State $\vec{\theta}_{k^\prime} \leftarrow \textproc{Fit}(\M(\;\cdot\;; \vec{\theta}_k), \bfE)$ \algorithmiccomment{re-fit distribution $\M$ using model-elite samples}
-    \EndFor
-    \State \Return $\M(\;\cdot\;; \vec{\theta}_{k_\text{max}})$
-  \EndFunction
-  \end{algorithmic}
-  \caption{\label{alg:ce_surrogate} Cross-entropy surrogate method.}
-\end{algorithm}
+\input{algorithms/ce-surrogate}
 
 \newpage % NOTE.
 
@@ -239,21 +211,7 @@ \subsection{Cross-Entropy Surrogate Method} \label{sec:cem_alg_ce_surrogate}
 Finally, the elite set $\bfE$ is built from the true-elites $\e$, the model-elites $\e_\text{model}$, and the subcomponent-elites $\e_\text{sub}$.
 The resulting concatenated elite set $\bfE$ is returned.
 
-\begin{algorithm}[ht]
-  \begin{algorithmic}
-  \Function{ModelEliteSet}{$\mat{X}, \mat{Y}, \M, \e, m, m_\text{elite}$}
-    % Fit to entire population!
-    \State $\surrogate \leftarrow \textproc{GaussianProcess}(\mat{X}, \mat{Y}, \text{kernel}, \text{optimizer})$ \algorithmiccomment{fit a surrogate model to the samples} % Squared exponential, NelderMead
-    \State $\mat{X}_\text{m} \sim \M(\;\cdot\;; \vec{\theta}_k)$ where $\mat{X}_\text{m} \in \R^{|\M| \times {10m}}$ \algorithmiccomment{draw $10m$ samples from $\M$}
-    \State $\mathbf{\hat{\mat{Y}}}_\text{m} \leftarrow \surrogate(\vec{x}_\text{m})$ for $\vec{x}_\text{m} \in \mat{X}_\text{m}$ \algorithmiccomment{evaluate samples $\mat{X}_\text{m}$ using surrogate objective $\surrogate$}
-    \State $\e_\text{model} \leftarrow$ store top $10m_\text{elite}$ from $\mathbf{\hat{\mat{Y}}}_\text{m}$ \algorithmiccomment{select model-elite samples from surrogate objective}
-    \State $\e_\text{sub} \leftarrow \textproc{SubEliteSet}(\surrogate, \M, \e)$ \algorithmiccomment{generate sub-elite samples using surrogate $\surrogate$}
-    \State $\bfE \leftarrow \{ \e \} \cup \{ \e_\text{model} \} \cup \{ \e_\text{sub} \}$ \algorithmiccomment{combine all elite samples into an elite set}
-    \State \Return $\bfE$
-  \EndFunction
-  \end{algorithmic}
-  \caption{\label{alg:model_elite_set} Modeling elite set using a surrogate objective.}
-\end{algorithm}
+\input{algorithms/model-elite-set}
 
 To encourage exploration of promising areas of the design space, the algorithm \smallcaps{SubEliteSet} focuses on the already marked true-elites $\e$.
 Each elite $e_x \in \e$ is used as the mean of a new multivariate Gaussian distribution with covariance inherited from the distribution $\M$.
@@ -264,20 +222,7 @@ \subsection{Cross-Entropy Surrogate Method} \label{sec:cem_alg_ce_surrogate}
 Note that we use $\theta_\text{CE}$ to denote the parameters for the CE-method algorithm.
 In our case, we recommend using a small $k_\text{max}$ of around $2$ so the subcomponent-elites do not over-fit to the surrogate model but have enough CE-method iterations to tend towards optimal.
 
-\begin{algorithm}[ht]
-  \begin{algorithmic}
-  \Function{SubEliteSet}{$\surrogate, \M, \e$}
-    \State $\e_\text{sub} \leftarrow \emptyset$
-    \State $\m \leftarrow \{ e_x \in \e \mid \Normal(e_x, \M.\Sigma) \}$ \algorithmiccomment{create set of distributions centered at each true-elite sample}
-    \For {$\m_i \in \m$}
-        \State $\m_i \leftarrow \textproc{CrossEntropyMethod}(\surrogate, \m_i ; \theta_{\text{CE}})$ \algorithmiccomment{run CE-method over each new distribution}
-        \State $\e_\text{sub} \leftarrow \{\e_\text{sub}\} \cup \{\textproc{Best}(\m_i)\}$ \algorithmiccomment{append best result into the sub-elite set}
-    \EndFor
-    \State \Return $\e_\text{sub}$
-  \EndFunction
-  \end{algorithmic}
-  \caption{\label{alg:sub_elite_set} Subcomponent elite set.}
-\end{algorithm}
+\input{algorithms/sub-elite-set}
 
 
 \subsection{Cross-Entropy Mixture Method} \label{sec:cem_alg_ce_mixture}
@@ -291,17 +236,7 @@ \subsection{Cross-Entropy Mixture Method} \label{sec:cem_alg_ce_mixture}
 Results in \cref{sec:cem_results} aim to show this behavior.
 
 % CE-mixture (fit)
-\begin{algorithm}[ht]
-  \begin{algorithmic}
-  \Function{Fit}{$\M, \m, \bfE$}
-    \State $\M \leftarrow \operatorname{Mixture}( \m )$
-    \State $\mathbf{\hat{\vec{\theta}}} \leftarrow \textproc{ExpectationMaximization}(\M, \bfE)$
-    \State \Return $\M(\;\cdot\;; \mathbf{\hat{\vec{\theta}}})$
-  \EndFunction
-  \end{algorithmic}
-  \caption{\label{alg:ce_mixture_fit} Fitting mixture models (used by CE-mixture).}
-\end{algorithm}
-
+\input{algorithms/ce-mixture-fit}
 
 \subsection{Evaluation Scheduling} \label{sec:cem_alg_eval_schedule}
 Given the nature of the CE-method, we expect the covariance to shrink over time, thus resulting in a solution with higher confidence.
@@ -316,25 +251,8 @@ \subsection{Evaluation Scheduling} \label{sec:cem_alg_eval_schedule}
 Note the use of the integer rounding function (e.g., $\round{x}$), which we later have to compensate for the final iterations.
 Results in \cref{sec:cem_results} compare values of $p$ that control the redistribution of evaluations.
 
-
 % EvaluationSchedule
-\begin{algorithm}[ht]
-  \begin{algorithmic}
-  \Function{EvaluationSchedule}{$k, k_\text{max}$}
-    \State $G \sim \Geo(p)$
-    \State $N_\text{max} \leftarrow k_\text{max} \cdot m$
-    \State $m \leftarrow \round{N_\text{max} \cdot p_G(k)}$
-    \If{$k = k_\text{max}$}
-        \State $s \leftarrow \displaystyle\sum_{i=1}^{k_\text{max}-1} \round{N_\text{max} \cdot p_G(i)}$
-        \State $m \leftarrow \min(N_\text{max} - s, N_\text{max} - m)$
-    \EndIf
-    \State $m_\text{elite} \leftarrow \min(m_\text{elite}, m)$
-    \State \Return ($m, m_\text{elite}$) 
-  \EndFunction
-  \end{algorithmic}
-  \caption{\label{alg:evaluation_schedule} Evaluation schedule using a Geometric distribution.}
-\end{algorithm}
-
+\input{algorithms/evaluation-schedule}
 
 \section{Experiments} \label{sec:cem_experiments}
 In this section, we detail the experiments we ran to compare the CE-method variants and evaluation schedules.