diff --git a/chapters/cem_variants.tex b/chapters/cem_variants.tex index c956aa4..a82a2b6 100644 --- a/chapters/cem_variants.tex +++ b/chapters/cem_variants.tex @@ -232,12 +232,12 @@ \subsection{Cross-Entropy Surrogate Method} \label{sec:cem_alg_ce_surrogate} \begin{algorithmic} \Function{CE-Surrogate}{$S$, $\M$, $m$, $m_\text{elite}$, $k_\text{max}$} \For {$k \in [1,\ldots,k_\text{max}]$} - \State $m, m_\text{elite} \leftarrow \textproc{EvaluationSchedule}(k, k_\text{max})$ - \State $\mat{X} \sim \M(\;\cdot\;; \vec{\theta}_k)$ where $\mat{X} \in \R^m$ - \State $\mat{Y} \leftarrow S(\vec{x})$ for $\vec{x} \in \mat{X}$ - \State $\e \leftarrow$ store top $m_\text{elite}$ from $\mat{Y}$ - \State $\bfE \leftarrow \textproc{ModelEliteSet}(\mat{X}, \mat{Y}, \M, \e, m, m_\text{elite})$ - \State $\vec{\theta}_{k^\prime} \leftarrow \textproc{Fit}(\M(\;\cdot\;; \vec{\theta}_k), \bfE)$ + \State $m, m_\text{elite} \leftarrow \textproc{EvaluationSchedule}(k, k_\text{max})$ \algorithmiccomment{choose number of evaluations based on a schedule} + \State $\mat{X} \sim \M(\;\cdot\;; \vec{\theta}_k)$ where $\mat{X} \in \R^{|\M| \times m}$ \algorithmiccomment{draw $m$ samples from $\M$} + \State $\mat{Y} \leftarrow S(\vec{x})$ for $\vec{x} \in \mat{X}$ \algorithmiccomment{evaluate samples $\mat{X}$ using true objective $S$} + \State $\e \leftarrow$ store top $m_\text{elite}$ from $\mat{Y}$ \algorithmiccomment{select elite samples output from true objective} + \State $\bfE \leftarrow \textproc{ModelEliteSet}(\mat{X}, \mat{Y}, \M, \e, m, m_\text{elite})$ \algorithmiccomment{find model-elites using a surrogate model} + \State $\vec{\theta}_{k^\prime} \leftarrow \textproc{Fit}(\M(\;\cdot\;; \vec{\theta}_k), \bfE)$ \algorithmiccomment{re-fit distribution $\M$ using model-elite samples} \EndFor \State \Return $\M(\;\cdot\;; \vec{\theta}_{k_\text{max}})$ \EndFunction @@ -264,12 +264,12 @@ \subsection{Cross-Entropy Surrogate Method} \label{sec:cem_alg_ce_surrogate} \begin{algorithmic} \Function{ModelEliteSet}{$\mat{X}, \mat{Y}, \M, \e, m, m_\text{elite}$} % Fit to entire population! - \State $\surrogate \leftarrow \textproc{GaussianProcess}(\mat{X}, \mat{Y}, \text{kernel}, \text{optimizer})$ % Squared exponential, NelderMead - \State $\mat{X}_\text{m} \sim \M(\;\cdot\;; \vec{\theta}_k)$ where $\mat{X}_\text{m} \in \R^{10m}$ - \State $\mathbf{\hat{\mat{Y}}}_\text{m} \leftarrow \surrogate(\vec{x}_\text{m})$ for $\vec{x}_\text{m} \in \mat{X}_\text{m}$ - \State $\e_\text{model} \leftarrow$ store top $10m_\text{elite}$ from $\mathbf{\hat{\mat{Y}}}_\text{m}$ - \State $\e_\text{sub} \leftarrow \textproc{SubEliteSet}(\surrogate, \M, \e)$ - \State $\bfE \leftarrow \{ \e \} \cup \{ \e_\text{model} \} \cup \{ \e_\text{sub} \}$ \algorithmiccomment{elite set} + \State $\surrogate \leftarrow \textproc{GaussianProcess}(\mat{X}, \mat{Y}, \text{kernel}, \text{optimizer})$ \algorithmiccomment{fit a surrogate model to the samples} % Squared exponential, NelderMead + \State $\mat{X}_\text{m} \sim \M(\;\cdot\;; \vec{\theta}_k)$ where $\mat{X}_\text{m} \in \R^{|\M| \times {10m}}$ \algorithmiccomment{draw $10m$ samples from $\M$} + \State $\mathbf{\hat{\mat{Y}}}_\text{m} \leftarrow \surrogate(\vec{x}_\text{m})$ for $\vec{x}_\text{m} \in \mat{X}_\text{m}$ \algorithmiccomment{evaluate samples $\mat{X}_\text{m}$ using surrogate objective $\surrogate$} + \State $\e_\text{model} \leftarrow$ store top $10m_\text{elite}$ from $\mathbf{\hat{\mat{Y}}}_\text{m}$ \algorithmiccomment{select model-elite samples output from surrogate objective} + \State $\e_\text{sub} \leftarrow \textproc{SubEliteSet}(\surrogate, \M, \e)$ \algorithmiccomment{generate sub-elite samples using surrogate $\surrogate$} + \State $\bfE \leftarrow \{ \e \} \cup \{ \e_\text{model} \} \cup \{ \e_\text{sub} \}$ \algorithmiccomment{combine all elite samples into an elite set} \State \Return $\bfE$ \EndFunction \end{algorithmic} @@ -289,10 +289,10 @@ \subsection{Cross-Entropy Surrogate Method} \label{sec:cem_alg_ce_surrogate} \begin{algorithmic} \Function{SubEliteSet}{$\surrogate, \M, \e$} \State $\e_\text{sub} \leftarrow \emptyset$ - \State $\m \leftarrow \{ e_x \in \e ; \Normal(e_x, \M.\Sigma) \}$ + \State $\m \leftarrow \{ e_x \in \e \mid \Normal(e_x, \M.\Sigma) \}$ \algorithmiccomment{create set of distributions centered at each model-elite sample} \For {$\m_i \in \m$} - \State $\m_i \leftarrow \textproc{CrossEntropyMethod}(\surrogate, \m_i ; \theta_{\text{CE}})$ - \State $\e_\text{sub} \leftarrow \{\e_\text{sub}\} \cup \{\textproc{Best}(\m_i)\}$ + \State $\m_i \leftarrow \textproc{CrossEntropyMethod}(\surrogate, \m_i ; \theta_{\text{CE}})$ \algorithmiccomment{run CE-method over each new distribution} + \State $\e_\text{sub} \leftarrow \{\e_\text{sub}\} \cup \{\textproc{Best}(\m_i)\}$ \algorithmiccomment{append best result into the sub-elite set} \EndFor \State \Return $\e_\text{sub}$ \EndFunction @@ -394,8 +394,7 @@ \subsection{Test Objective Function Generation} The parameters that define the sierra function are collected into $\vec{\theta} = \langle \mathbf{\tilde{\vec{\mu}}}, \mathbf{\tilde{\mat{\Sigma}}}, \mat{G}, \mat{P}, \vec{s} \rangle$. Using these parameters, we can define the mixture model used by the sierra function as: \begin{gather*} - \Sierra \sim \operatorname{Mixture}\left(\left\{ \vec{\theta} ~\Big|~ \Normal\left(\vec{g} + s\vec{p}_i + \mathbf{\tilde{\vec{\mu}}},\; \mathbf{\tilde{\mat{\Sigma}}} \cdot i^{\text{decay}}/\eta \right) \right\} \right)\\ - \text{for } (\vec{g}, \vec{p}_i, s) \in (\mat{G}, \mat{P}, \vec{s}) + \Sierra \sim \operatorname{Mixture}\left(\left\{ \vec{\theta} ~\Big|~ \Normal\left(\vec{g} + s\vec{p}_i + \mathbf{\tilde{\vec{\mu}}},\; \mathbf{\tilde{\mat{\Sigma}}} \cdot i^{\text{decay}}/\eta \right) \right\} \right) \quad\text{for}\quad (\vec{g}, \vec{p}_i, s) \in (\mat{G}, \mat{P}, \vec{s}) \end{gather*} We add a final component to be our global minimum centered at $\mathbf{\tilde{\vec{\mu}}}$ and with a covariance scaled by $\sigma\eta$. Namely, the global minimum is $\vec{x}^* = \E[\Normal(\mathbf{\tilde{\vec{\mu}}}, \mathbf{\tilde{\mat{\Sigma}}}/(\sigma\eta))] = \mathbf{\tilde{\vec{\mu}}}$. We can now use this constant mixture model with $49$ components and define the sierra objective function $\mathcal{S}(\vec{x})$ to be the negative probability density of the mixture at input $\vec{x}$ with uniform weights: @@ -411,6 +410,19 @@ \subsection{Experimental Setup} \label{sec:cem_experiment_setup} The algorithmic category aims to compare features of each CE-method variant while holding common parameters constant (for a better comparison). While the scheduling category experiments with evaluation scheduling heuristics. + +Because the algorithms are stochastic, we run each experiment with 50 different random number generator seed values. +To evaluate the performance of the algorithms in their respective experiments, we define three metrics. +First, we define the average ``optimal'' value $\bar{b}_v$ to be the average of the best so-far objective function value (termed ``optimal'' in the context of each algorithm). Again, we emphasize that we average over the 50 seed values to gather meaningful statistics. +Another metric we monitor is the average distance to the true global optimal $\bar{b}_d = \norm{\vec{b}_{\vec{x}} - \vec{x}^*}$, where $\vec{b}_{\vec{x}}$ denotes the $\vec{x}$-value associated with the ``optimal''. +We make the distinction between these metrics to show both ``closeness'' in \textit{value} to the global minimum and ``closeness'' in the \textit{design space} to the global minimum. +Our final metric looks at the average runtime of each algorithm, noting that our goal is to off-load computationally expensive objective function calls to the surrogate model. + +For all of the experiments, we use a common setting of the following parameters for the sierra test function (shown in the top-right plot in \cref{fig:sierra}): +\begin{equation*} + (\mathbf{\tilde{\vec{\mu}}} =[0,0],\; \sigma=3,\; \delta=2,\; \eta=6,\; \text{decay} = 1) +\end{equation*} + \begin{figure*}[!t] \centering \subfloat[The cross-entropy method.]{% @@ -427,22 +439,11 @@ \subsection{Experimental Setup} \label{sec:cem_experiment_setup} } \end{figure*} -Because the algorithms are stochastic, we run each experiment with 50 different random number generator seed values. -To evaluate the performance of the algorithms in their respective experiments, we define three metrics. -First, we define the average ``optimal'' value $\bar{b}_v$ to be the average of the best so-far objective function value (termed ``optimal'' in the context of each algorithm). Again, we emphasize that we average over the 50 seed values to gather meaningful statistics. -Another metric we monitor is the average distance to the true global optimal $\bar{b}_d = \norm{\vec{b}_{\vec{x}} - \vec{x}^*}$, where $\vec{b}_{\vec{x}}$ denotes the $\vec{x}$-value associated with the ``optimal''. -We make the distinction between these metrics to show both ``closeness'' in \textit{value} to the global minimum and ``closeness'' in the \textit{design space} to the global minimum. -Our final metric looks at the average runtime of each algorithm, noting that our goal is to off-load computationally expensive objective function calls to the surrogate model. - -For all of the experiments, we use a common setting of the following parameters for the sierra test function (shown in the top-right plot in \cref{fig:sierra}): -\begin{equation*} - (\mathbf{\tilde{\vec{\mu}}} =[0,0],\; \sigma=3,\; \delta=2,\; \eta=6,\; \text{decay} = 1) -\end{equation*} - \subsubsection{Algorithmic Experiments} \label{sec:cem_alg_experiments} We run three separate algorithmic experiments, each to test a specific feature. For our first algorithmic experiment (1A), we want to test each algorithm when the user-defined mean is centered at the global minimum and the covariance is arbitrarily wide enough to cover the design space. +\Cref{fig:k5} illustrates experiment (1A) for each algorithm. Let $\M$ be a distribution parameterized by $\vec{\theta} = (\vec{\mu}, \mat{\Sigma})$, and for experiment (1A) we set the following: % CE-mixture mean and covariance (1A) \begin{equation*} @@ -581,15 +582,7 @@ \subsection{Results and Analysis} \label{sec:cem_results} This scenario is illustrated in \cref{fig:example_1b}. We can see that both CE-surrogate and CE-mixture perform well in this case. -\begin{figure}[!h] - \centering - \resizebox{0.6\columnwidth}{!}{\input{figures/cem_variants/example1b.pgf}} - \caption{ - \label{fig:example_1b} - First iteration of the scenario in experiment (1B) where the initial distribution is far away form the global optimal. The red dots indicate the true-elites, the black dots with white outlines indicate the ``non-elites'' evaluated from the true objective function, and the white dots with black outlines indicate the samples evaluated using the surrogate model. - } -\end{figure} - +% % % % \begin{figure}[!ht] @@ -616,4 +609,15 @@ \section{Discussion} \label{sec:cem_discussion} Using this test function, we showed that the CE-surrogate algorithm achieves the best performance relative to the standard CE-method, each using the same number of true objective function evaluations. +\begin{figure}[!h] + \centering + \resizebox{0.6\columnwidth}{!}{\input{figures/cem_variants/example1b.pgf}} + \caption{ + \label{fig:example_1b} + First iteration of the scenario in experiment (1B) where the initial distribution is far away form the global optimal. The red dots indicate the true-elites, the black dots with white outlines indicate the ``non-elites'' evaluated from the true objective function, and the white dots with black outlines indicate the samples evaluated using the surrogate model. + } +\end{figure} + + + % \printbibliography \ No newline at end of file diff --git a/chapters/pomdpstresstesting.tex b/chapters/pomdpstresstesting.tex index 9763f10..802d01f 100644 --- a/chapters/pomdpstresstesting.tex +++ b/chapters/pomdpstresstesting.tex @@ -1,3 +1,6 @@ +This chapter discusses open source software tools built and used in the previous chapters. +These tools were designed for general applications of this work and were written in the scientific computing language Julia \cite{bezanson2017julia}. + \section{POMDPStressTesting.jl Summary} \href{https://github.com/sisl/POMDPStressTesting.jl}{POMDPStressTesting.jl} is a package that uses reinforcement learning and stochastic optimization to find likely failures in black-box systems through a technique called adaptive stress testing \cite{ast}. diff --git a/main.tex b/main.tex index 17006f0..0c9808d 100644 --- a/main.tex +++ b/main.tex @@ -56,7 +56,7 @@ \chapter{Introduction} \input{chapters/weakness_rec} % POMDPStressTesting.jl: Adaptive Stress Testing for Black-Box Systems - \chapter{Open Source Tooling}\label{cha:tooling} + \chapter{Open Source Tools for Validation}\label{cha:tooling} \input{chapters/pomdpstresstesting} \fi