diff.tex

%DIF 1a1
%DIF LATEXDIFF DIFFERENCE FILE
%DIF DEL thesisSubmitted.tex   Sat Aug 11 12:24:53 2018
%DIF ADD thesis.tex            Tue Aug 14 09:53:08 2018
% !TeX spellcheck = en_US %DIF > 
%DIF -------
% Documentation:
%
% The UoK class file extends the standard report style to follow the Registry
% guidelines for laying out a thesis. It sets the margins, interline spacing,
%DIF 5c6
%DIF < % the page, figure and table numbering style, and disallows page breaks at
%DIF -------
% the page, Figure and table numbering style, and disallows page breaks at %DIF > 
%DIF -------
% hyphens. The class file consists of setting one and an half line spacing text
% with a 4cm left margin, at least a 2.5cm right margin, approximately 2cm top
% and bottom margin, on A4 paper.
% 
% The class the following options, in addition to those of the standard report
% class.
%     mini - Toggles the thesis in to mini-thesis mode. This adds "mini" to the
%            title and appends a nocite(*) at the end for an automatic output of
%            your complete bibliography.
%     draftmark - Puts a DRAFT' watermark on every page of the document along
%                 with the draft statement on the title page. Additionaly, it
%                 is used as a switch for the UoKExtentions package.
%     draft - Puts the entire document into draft mode. Applies all the effect
%             of draftmark above, but also propergates to other packages used.
%     copyright - Adds a copyright page between the title page and the preface.
%DIF 21c22
%DIF < %     nofig - Disables output of the list of figures in the preface.
%DIF -------
%     nofig - Disables output of the list of Figures in the preface. %DIF > 
%DIF -------
%     notab - Disables output of the list of tables in the preface.
% All options passed to UoKthesis will be passed along to included packages:
%    natbib, draftwatermark, setspace, hyperref, lmodern
%
% The cover page and optional copyright page are implicitly added before the
% start of the preface section. Use the following commands to populate the 
% cover page/copyright page information:
%     \title{thesis title}
%     \author{author's name} 
%     \degree{Master of Science, Doctor of Philosophy, etc.} 
%     \subject{author's department}
%          - Computer Science if omitted 
%     \submitdate{month year in which submitted}
%          - dated by LaTeX if omitted 
%     \copyrightyear{year degree conferred (next year if submitted in Dec.)}
%          - assumes current year (or next year, in December) if omitted 
% 
% The preface environment allows for the use of sections that precede the main
% document; such as Abstract and  Acknowlegements. These sections should be
% defined using \section{Preface Section Title}. The contents page (and list of
%DIF 42c43
%DIF < % figures and tables if in use) will be automatically inserted at the end of the
%DIF -------
% Figures and tables if in use) will be automatically inserted at the end of the %DIF > 
%DIF -------
% preface environment.
%
% The thesis style invokes the setspace package to set the commands:
%     \doublespace
%     \onehalfspace
%     \singlespace
% for spacing. By default one and an half spacing is used which resembles the
% UKC Typewriter requirement. Singlespace can be used for letterpress
% appearance. If you want to use true double space, for some reason, place the
% \doublespace command where you want to start using double spacing. Just call
% the appropriate spacing command at where you want to use them.
% 
%DIF 55c56
%DIF < % In the figure and table environments, single spacing is used. If you want to
%DIF -------
% In the Figure and table environments, single spacing is used. If you want to %DIF > 
%DIF -------
% use any other size rather than one and an half spacing, then do:
% 	\renewcommand{\baselinestretch}{1.6} (or whatever you want instead of 1.6)
% This command won't take effect unless it comes before the \begin{document} or
% is triggered by a font change (after something like \small \normalsize).
%
% The example below shows the 12pt thesis style being used. This seems to give
% acceptable looking results, but it may be omitted to get 10pt. Alternatively,
% the 11pt option can be used.
%
% This version differs from old_ukcthesis.sty in the following ways:
% 1. Removed the doublespace package (now uses setspace).
% 2. Merged the phantom section for correct PDF links into the bibliography
%    generating function. 
% 3. Added thesis type options (mini, draft).
% 4. Kent Harvard is used for referencing and citation, this is supported by the
%    natbib package.
% 5. PsFig macro removed.
% 6. Now comes as two files, UoKthesis.cls, which defines purely stylistic layout,
%    and UoKextentions.sty, that provideds some additional functionality.

\documentclass[12pt]{UoKthesis}

%\renewcommand*\rmdefault{ptm}
%\renewcommand{\familydefault}{\rmdefault}
% Note: The UoKextentions package includes the xcolor package with the [usenames]
% options. If you need to add further options, these can be given to UoKextentions
% to be propogated through.
\usepackage{UoKextentions}
\usepackage{times}
%\usepackage{llncsdoc}
%\usepackage{verbatim}
\usepackage{url}
\usepackage{color}
\usepackage{adjustbox}
\usepackage{amsmath}
\usepackage{relsize}
\usepackage[final]{listings}
\usepackage[T1]{fontenc}
%\usepackage[math]{times}
\usepackage{mathptmx}
\usepackage{graphicx}
\usepackage{wrapfig}
\usepackage[scaled=.90]{helvet}
\usepackage{courier}
\newcommand{\td}[1]{{\bf {\tt{#1}}}}
\newcommand{\comment}[1]{\textcolor{red}{\td{{#1}}}}
\usepackage{textcomp}
\usepackage{csquotes}
\lstset{
  frame=none,
  xleftmargin=2pt,
  stepnumber=1,
  numbers=left,
  numbersep=5pt,
  numberstyle=\ttfamily\tiny\color[gray]{0.3},
  belowcaptionskip=\bigskipamount,
  captionpos=b,
  escapeinside={*'}{'*},
  language=haskell,
  tabsize=2,
  emphstyle={\bf},
  commentstyle=\it,
  stringstyle=\mdseries\ttfamily,
  showspaces=false,
  keywordstyle=\bfseries\ttfamily,
  columns=flexible,
  upquote=true,
  showstringspaces=false,
  basicstyle=\small\ttfamily,
  breaklines=true,
  morecomment=[l]\%,
}

% Kent Harvard Bibliography Style. WIP
\bibliographystyle{kentHarvard}

% Provides nice linking in PDFs
\usepackage{hyperref}

% Only needed if you want to produce an index. Example is shown at the bottom of this document.
\usepackage{makeidx}

% Useful packages
% \usepackage{epstopdf} % Converts EPS files to PDF using ghostscript
% \usepackage{fnbreak}  % Warns you if you have split footnotes
% \usepackage{mathpazo} % Type­set­ math­e­mat­ics in the Palatino fam­ily of text fonts
% \usepackage{paralist} % Enumerate and itemize within paragraphs
% \usepackage{amsmath}  % AMS mathematical facilities
% \usepackage{rotating} % Rotating facilities for floats

%DIF 146a147
\usepackage{caption} %DIF > 
%DIF -------
\usepackage{subcaption}
\setcounter{secnumdepth}{3} % add more section types

%%%%% macros
\def\fixme#1{\fbox{\textbf{\textsc{Fixme}}\quad#1}}
\def\fixpic#1{\fbox{\textbf{\textsc{Picture}}\quad#1}}
\def\defnx#1#2{\emph{#1}\index{#2}}
\def\defn#1{\defnx{#1}{#1}}
\def\floatpic#1#2{%
\begin{wrapfigure}{r}{\dimexpr #1 / 2 \relax}
\includegraphics[width=\dimexpr #1 / 2 \relax]{#2}
\end{wrapfigure}}
\def\inlinepic#1#2{%
\begin{center}
\includegraphics[width=\dimexpr #1 / 2 \relax]{#2}
\end{center}}

%%%%% augment hyphenation
\hyphenation{wide-spread}

%%%%% document start
%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF
%DIF UNDERLINE PREAMBLE %DIF PREAMBLE
\RequirePackage[normalem]{ulem} %DIF PREAMBLE
\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1} %DIF PREAMBLE
\providecommand{\DIFaddtex}[1]{{\protect\color{blue}\uwave{#1}}} %DIF PREAMBLE
\providecommand{\DIFdeltex}[1]{{\protect\color{red}\sout{#1}}}                      %DIF PREAMBLE
%DIF SAFE PREAMBLE %DIF PREAMBLE
\providecommand{\DIFaddbegin}{} %DIF PREAMBLE
\providecommand{\DIFaddend}{} %DIF PREAMBLE
\providecommand{\DIFdelbegin}{} %DIF PREAMBLE
\providecommand{\DIFdelend}{} %DIF PREAMBLE
%DIF FLOATSAFE PREAMBLE %DIF PREAMBLE
\providecommand{\DIFaddFL}[1]{\DIFadd{#1}} %DIF PREAMBLE
\providecommand{\DIFdelFL}[1]{\DIFdel{#1}} %DIF PREAMBLE
\providecommand{\DIFaddbeginFL}{} %DIF PREAMBLE
\providecommand{\DIFaddendFL}{} %DIF PREAMBLE
\providecommand{\DIFdelbeginFL}{} %DIF PREAMBLE
\providecommand{\DIFdelendFL}{} %DIF PREAMBLE
%DIF HYPERREF PREAMBLE %DIF PREAMBLE
\providecommand{\DIFadd}[1]{\texorpdfstring{\DIFaddtex{#1}}{#1}} %DIF PREAMBLE
\providecommand{\DIFdel}[1]{\texorpdfstring{\DIFdeltex{#1}}{}} %DIF PREAMBLE
\newcommand{\DIFscaledelfig}{0.5}
%DIF HIGHLIGHTGRAPHICS PREAMBLE %DIF PREAMBLE
\RequirePackage{settobox} %DIF PREAMBLE
\RequirePackage{letltxmacro} %DIF PREAMBLE
\newsavebox{\DIFdelgraphicsbox} %DIF PREAMBLE
\newlength{\DIFdelgraphicswidth} %DIF PREAMBLE
\newlength{\DIFdelgraphicsheight} %DIF PREAMBLE
% store original definition of \includegraphics %DIF PREAMBLE
\LetLtxMacro{\DIFOincludegraphics}{\includegraphics} %DIF PREAMBLE
\newcommand{\DIFaddincludegraphics}[2][]{{\color{blue}\fbox{\DIFOincludegraphics[#1]{#2}}}} %DIF PREAMBLE
\newcommand{\DIFdelincludegraphics}[2][]{% %DIF PREAMBLE
\sbox{\DIFdelgraphicsbox}{\DIFOincludegraphics[#1]{#2}}% %DIF PREAMBLE
\settoboxwidth{\DIFdelgraphicswidth}{\DIFdelgraphicsbox} %DIF PREAMBLE
\settoboxtotalheight{\DIFdelgraphicsheight}{\DIFdelgraphicsbox} %DIF PREAMBLE
\scalebox{\DIFscaledelfig}{% %DIF PREAMBLE
\parbox[b]{\DIFdelgraphicswidth}{\usebox{\DIFdelgraphicsbox}\\[-\baselineskip] \rule{\DIFdelgraphicswidth}{0em}}\llap{\resizebox{\DIFdelgraphicswidth}{\DIFdelgraphicsheight}{% %DIF PREAMBLE
\setlength{\unitlength}{\DIFdelgraphicswidth}% %DIF PREAMBLE
\begin{picture}(1,1)% %DIF PREAMBLE
\thicklines\linethickness{2pt} %DIF PREAMBLE
{\color[rgb]{1,0,0}\put(0,0){\framebox(1,1){}}}% %DIF PREAMBLE
{\color[rgb]{1,0,0}\put(0,0){\line( 1,1){1}}}% %DIF PREAMBLE
{\color[rgb]{1,0,0}\put(0,1){\line(1,-1){1}}}% %DIF PREAMBLE
\end{picture}% %DIF PREAMBLE
}\hspace*{3pt}}} %DIF PREAMBLE
} %DIF PREAMBLE
\LetLtxMacro{\DIFOaddbegin}{\DIFaddbegin} %DIF PREAMBLE
\LetLtxMacro{\DIFOaddend}{\DIFaddend} %DIF PREAMBLE
\LetLtxMacro{\DIFOdelbegin}{\DIFdelbegin} %DIF PREAMBLE
\LetLtxMacro{\DIFOdelend}{\DIFdelend} %DIF PREAMBLE
\DeclareRobustCommand{\DIFaddbegin}{\DIFOaddbegin \let\includegraphics\DIFaddincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFaddend}{\DIFOaddend \let\includegraphics\DIFOincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFdelbegin}{\DIFOdelbegin \let\includegraphics\DIFdelincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFdelend}{\DIFOaddend \let\includegraphics\DIFOincludegraphics} %DIF PREAMBLE
\LetLtxMacro{\DIFOaddbeginFL}{\DIFaddbeginFL} %DIF PREAMBLE
\LetLtxMacro{\DIFOaddendFL}{\DIFaddendFL} %DIF PREAMBLE
\LetLtxMacro{\DIFOdelbeginFL}{\DIFdelbeginFL} %DIF PREAMBLE
\LetLtxMacro{\DIFOdelendFL}{\DIFdelendFL} %DIF PREAMBLE
\DeclareRobustCommand{\DIFaddbeginFL}{\DIFOaddbeginFL \let\includegraphics\DIFaddincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFaddendFL}{\DIFOaddendFL \let\includegraphics\DIFOincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFdelbeginFL}{\DIFOdelbeginFL \let\includegraphics\DIFdelincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFdelendFL}{\DIFOaddendFL \let\includegraphics\DIFOincludegraphics} %DIF PREAMBLE
%DIF LISTINGS PREAMBLE %DIF PREAMBLE
\lstdefinelanguage{codediff}{ %DIF PREAMBLE
  moredelim=**[is][\color{red}]{*!----}{----!*}, %DIF PREAMBLE
  moredelim=**[is][\color{blue}]{*!++++}{++++!*} %DIF PREAMBLE
} %DIF PREAMBLE
\lstdefinestyle{codediff}{ %DIF PREAMBLE
	belowcaptionskip=.25\baselineskip, %DIF PREAMBLE
	language=codediff, %DIF PREAMBLE
	basicstyle=\ttfamily, %DIF PREAMBLE
	columns=fullflexible, %DIF PREAMBLE
	keepspaces=true, %DIF PREAMBLE
} %DIF PREAMBLE
%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF

\begin{document}

\title{Data-Driven Refactorings for Haskell}
\author{Stephen Adams}
\subject{Computer Science}
\degree{PhD}

\begin{preface}
\section{Abstract}

Refactoring is the process of changing the internal structure of a program without changing its external behaviour. The goal of performing refactorings is to increase code quality. However, refactoring by hand is time consuming and error-prone. This makes automated refactoring tools very useful.

Agile software development allows for software to evolve slowly over time. This evolution changes how a program processes, abstracts over, and views its data. This evolution, though necessary, comes with the cost of technical debt. As technical debt increases changes to a code base become more difficult. Refactoring is one of the primary ways to  reduce technical debt. 

There exist refactorings that specifically help software to evolve its data model, however these refactorings are specific to the object-oriented programming paradigm. Haskell is a strongly typed, pure functional programming language. Haskell's rich type system allows for complex and powerful data models and abstractions. This thesis reports on work done to design and automate refactorings that help Haskell programmers develop and evolve these abstractions.

This work also discussed the current design and implementation of HaRe (the \textit{Ha}skell \textit{Re}factorer). HaRe now supports the Glasgow Haskell Compiler's implementation of the Haskell 2010 standard and its extensions, and uses some of GHC's internal packages in its implementation. 

\section{Acknowledgements}
First I would like to thank my supervisor Professor Simon Thompson, for his guidance and patience throughout this process. Without his help this thesis would not have been possible.

I also need to thank my parents, Jeff and Diane, for their support and encouragement. You both have been so excited for me to take this opportunity. It can't have been easy to have me live an ocean away, and your support in me started this project has been amazing.

A special thanks go out to Kristin Lamberty, Elena Machkasova, and Nic McPhee the professors of computer science at the University of Minnesota, Morris. All three of you introduced me to the world of computing and I wouldn't be here today without all of your help. You prepared me as much as anyone can be prepared for a PhD.

Alan Zimmerman is cited many times in this thesis but I don't think that is sufficient thanks for all the work he has done. Without you HaRe would not be where it is today. You dedication to the Haskell open source community is much appreciated.

I would like to extend my thanks to Adriana and Gaya Perera for letting me stay with them for the final year of my degree. You very graciously opened your house to me when I needed it and allowed me to stay much longer than any of us were expecting.

Finally I must thank Rosemary for her love and support. You have supported me through some of the toughest times of my life. Without you this would not have been finished, and without you this accomplishment would mean nothing.

\end{preface}

\chapter{Introduction}\label{chp:intro}


\section{Functional Programming}
Functional programming is a programming paradigm that focuses on data values as described by expressions which are built from function applications and definitions~\citep{elementsOfFunc}.  Functions in this case are closely related to the idea of mathematical functions. What qualifies a programming language as functional is debatable but several concepts are often included in languages that are described as functional. 

First class functions mean that functions are allowed to be treated like any other data type. First class functions can be passed to or returned from from other ``higher-order'' functions. Iteration is accomplished through recursion rather than via looping. There is also a heavy emphasis on functions remaining ``pure\DIFaddbegin \DIFadd{,}\DIFaddend '' that is without side effects\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{; }\DIFaddend though functional languages do provide ways to use IO or state\DIFaddbegin \DIFadd{, }\DIFaddend it is emphasised that functions should remain pure if at all possible. Haskell's type system allows for effectful computations to be contained within monad types.

\section{Haskell}
\label{haskell}
Haskell is a statically typed, lazily evaluated, pure functional language. Haskell \DIFdelbegin \DIFdel{is strongly and statically typed, and }\DIFdelend \DIFaddbegin \DIFadd{also }\DIFaddend supports Hindley-Milner type inference(\cite{hindley}\DIFdelbegin \DIFdel{,}\DIFdelend \DIFaddbegin \DIFadd{;}\DIFaddend \cite{milner}). Type inference means that \DIFdelbegin \DIFdel{a }\DIFdelend Haskell programs do not need \DIFdelbegin \DIFdel{every type }\DIFdelend \DIFaddbegin \DIFadd{the type of every top level binding }\DIFaddend to be explicitly \DIFdelbegin \DIFdel{listed in the source code}\DIFdelend \DIFaddbegin \DIFadd{provided by the programmer}\DIFaddend . Types will be~\textit{inferred} at compilation time so that every part of a Haskell program's type is known at that time. Haskell's type system also allows users to define \DIFaddbegin \DIFadd{and use }\DIFaddend their own types.

Lazy evaluation, also known as call-by-need~\citep{wadsworth}, means that Haskell expressions are not evaluated when they are passed as a parameter, but rather when that value is used. For example, in the Haskell function \texttt{f}, in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{lazyY}, the parameter y will never be evaluated. Lazy evaluation is more nuanced than call-by-name style parameter passing. Composite data types will be evaluated only as much as required to allow the computation to continue. This allows for both partial and infinite data types, for example \texttt{[1..]} is \DIFdelbegin \DIFdel{a }\DIFdelend \DIFaddbegin \DIFadd{the }\DIFaddend list containing all of the natural numbers. 

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{verbatim}%DIFDELCMD < 
%DIFDELCMD < f x y = case x > 0 of
%DIFDELCMD < True -> x - 1
%DIFDELCMD < False -> x + 1
%DIFDELCMD < \end{verbatim}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
f x y = case x > 0 of
   True -> x - 1
   False -> x + 1
\end{lstlisting}
\DIFaddendFL \caption{A simple function}
\label{lazyY}
\end{figure}

Haskell is also a pure language. Purity is the idea that functions cannot perform actions in addition to returning values. These additional actions are known as side-effects. Haskell allows for traditionally side-effect causing operations (IO, state, etc.) through the use of monads. Monads represent computations, which when run will have effects as well as producing a result. A principle topic of this thesis is refactoring to support patterns of computation related to Monads and other related structures.

\DIFaddbegin \subsection{User defined types in Haskell}

\DIFadd{All high level programming languages come with some predefined set of types and, typically, some way for users to define new types. In Haskell new types can be introduced with the }\texttt{\DIFadd{data}} \DIFadd{keyword. Figure~\ref{simpleDataTy} shows a simple data type, }\texttt{\DIFadd{Choice}}\DIFadd{. There are only two different values a }\texttt{\DIFadd{Choice}} \DIFadd{can be, }\texttt{\DIFadd{Yes}} \DIFadd{or }\texttt{\DIFadd{No}}\DIFadd{, these are the }\textit{\DIFadd{constructors}} \DIFadd{of }\texttt{\DIFadd{Choice}}\DIFadd{. 
}


\begin{figure}[t]
\begin{lstlisting}
data Choice = Yes | No
\end{lstlisting}
\caption{\DIFaddFL{A simple data type that models a yes/no choice.}}
\label{simpleDataTy}
\end{figure}

\DIFadd{Data types can also extend existing Haskell types. Returning to the }\texttt{\DIFadd{Choice}} \DIFadd{example maybe we want to be able to store some text along with a }\texttt{\DIFadd{Yes}} \DIFadd{to explain why this choice was made. This modified definition of }\texttt{\DIFadd{Choice}} \DIFadd{is shown in Figure~\ref{stringChoice}. Now a call to the }\texttt{\DIFadd{Yes}} \DIFadd{constructor needs to be passed some value of type }\texttt{\DIFadd{String}} \DIFadd{to construct a value of type }\texttt{\DIFadd{Choice}}\DIFadd{. 
}

  \begin{figure}[t]
\begin{lstlisting}
data Choice = Yes String | No
\end{lstlisting}
\caption{\texttt{\DIFaddFL{Yes}} \texttt{\DIFaddFL{Choice}}\DIFaddFL{s now contain a string as well.}}
\label{stringChoice}
\end{figure}

\DIFadd{Haskell allows you to take this one step further however. The creator of a type does not need to specify exactly which types will be stored within the new type, this means that types can be }\textit{\DIFadd{parameterized}}\DIFadd{. Figure~\ref{choiceParam} shows a version of }\texttt{\DIFadd{Choice}} \DIFadd{where the }\texttt{\DIFadd{Yes}} \DIFadd{choices can contain any type }\texttt{\DIFadd{a}}\DIFadd{. This is known as parametric polymorphism~\mbox{%DIFAUXCMD
\citep{haskellWikiPolymorphism}}\hspace{0pt}%DIFAUXCMD
. This type of polymorphism allows users to effectively reuse their types again and again without having to redefine them every time a new ``inner'' type is needed.
}

  \begin{figure}[t]
\begin{lstlisting}
data Choice a = Yes a | No
\end{lstlisting}
\caption{\texttt{\DIFaddFL{Yes}} \texttt{\DIFaddFL{Choice}}\DIFaddFL{s can now hold any other type.}}
\label{choiceParam}
\end{figure}

\subsection{Type classes}

\DIFadd{Now that we have seen how users of Haskell can define their own data types, and how types can be defined in a generic way, making the definitions applicable to a whole set of arguments. Haskell is a functional language  so a natural next question is to wonder that, if we can define types that are generic in some ways, can the same be said of functions? The answer is yes, this is what type classes do for Haskell.
}

\DIFadd{A type class allows a user to define a set of functions and/or constant names and their types. Then definitions of these functions and constants can be given for specific types. Figure~\ref{choiceEq} shows the type class for }\texttt{\DIFadd{Eq}} \DIFadd{which describes what it means for types to have a notion of equality. Figure~\ref{choiceEq} also shows how the definition of }\texttt{\DIFadd{Choice}} \DIFadd{from Figure~\ref{choiceParam} implements the }\texttt{\DIFadd{Eq}} \DIFadd{type class. The ``}\texttt{\DIFadd{(Eq a) =>}}\DIFadd{'' clause in the instance declaration is a constraint on the type of }\texttt{\DIFadd{a}}\DIFadd{. This means that for a }\texttt{\DIFadd{Choice}} \DIFadd{to be ``}\texttt{\DIFadd{Eq}}\DIFadd{-able'' }\texttt{\DIFadd{a}} \DIFadd{must be as well.
}

\begin{figure}[t]
\begin{lstlisting}
class Eq a where
   (==) :: a -> a -> Bool
   (/=) :: a -> a -> Bool

instance (Eq a) => Eq (Choice a) where
   Yes x == Yes y = x == y
   No == No = True
   _ == _ = False

   c1 /= c2 = not (c1 == c2) 
\end{lstlisting}
\caption{\DIFaddFL{The }\texttt{\DIFaddFL{Eq}} \DIFaddFL{type class and }\texttt{\DIFaddFL{Choice}}\DIFaddFL{'s implementation of it.}}
\label{choiceEq}
\end{figure}

\DIFadd{Haskell gives its users very powerful tools to help construct their own types and define those types' properties. These tools allow for many powerful concepts to be defined abstractly making them reusable. One of the more well known abstract concepts of the Haskell language are monads which are the subject of the next section.
}

\subsection{Monads}

\DIFadd{Monads are a type class used to represent ``composable computation descriptions''~\mbox{%DIFAUXCMD
\citep{haskellWikiMonad}}\hspace{0pt}%DIFAUXCMD
. Haskell uses monads to augment pure computations with features that other languages would allow as side effects such as state or I/O. Haskell has a built in monad type class, whose declaration is shown in Figure~\ref{monadTC}. Instances of monad need to implement two functions: }\texttt{\DIFadd{return}} \DIFadd{which will create a monadic computation that will produce its parameter and }\texttt{\DIFadd{>>=}} \DIFadd{(which is pronounced bind). Bind's first argument is some monadic value with a result of type }\texttt{\DIFadd{a}} \DIFadd{and its second argument is a function that takes in a value of type }\texttt{\DIFadd{a}} \DIFadd{and returns another monadic value that computes a result of type }\texttt{\DIFadd{b}}\DIFadd{. Bind computes the result of type }\texttt{\DIFadd{a}} \DIFadd{from the monadic value it was given in its first argument and passes that value to the function it received as its second argument. This description of bind only applies when its first argument is capable of computing some result or when the monad instance does not need to perform any additional tasks. This is the great power of monads as an abstraction, because bind can be implemented in almost any way}\footnote{\DIFadd{All instances of monad should abide by the three monad laws~\mbox{%DIFAUXCMD
\citep{wadler1995Monads}}\hspace{0pt}%DIFAUXCMD
}} \DIFadd{it can act as a way to specify how a particular set of computations are performed. 
}

\DIFadd{The }\texttt{\DIFadd{Choice}} \DIFadd{type that was defined in the previous section is really just a copy of the }\texttt{\DIFadd{Maybe}} \DIFadd{type which is commonly used to represent computations that may fail or return a result. Instead of }\texttt{\DIFadd{Yes}} \DIFadd{and }\texttt{\DIFadd{No}} \texttt{\DIFadd{Maybe}}\DIFadd{'s constructors are }\texttt{\DIFadd{Just}} \DIFadd{and }\texttt{\DIFadd{Nothing}} \DIFadd{respectively. }\texttt{\DIFadd{Maybe}} \DIFadd{is a monad and in its case bind is used to cause a chain of computations to return }\texttt{\DIFadd{Nothing}} \DIFadd{if one of the steps of the computation fail. This allows for functions that could potentially fail to return a result to be composed without having to check if their parameter is a }\texttt{\DIFadd{null}} \DIFadd{value. Figure~\ref{maybeChain} shows this in practice. Each of the }\texttt{\DIFadd{f\_}} \DIFadd{functions can be written with the assumption that their parameter exists, only the top level function, }\texttt{\DIFadd{g}}\DIFadd{, needs to check if the }\texttt{\DIFadd{chain}} \DIFadd{function has failed.
}

\begin{figure}[t]
	\begin{lstlisting}
	class Applicative m => Monad m where
	return :: a -> m a
	(>>=)  :: forall a b. m a -> (a -> m b) -> m b
	\end{lstlisting}
	\caption{\DIFaddFL{The }\texttt{\DIFaddFL{Monad}} \DIFaddFL{type class declaration}}
	\label{monadTC}
\end{figure}

\begin{figure}[t]
	\begin{lstlisting}
		chain :: a -> Maybe a
		chain a = f_1 a >>= f_2 >>= f_3 >>= f_4

		g a = case (chain a) of
			(Just b) -> putStrLn ("Got value: " ++ (show b))
			Nothing -> putStrLn "Something went wrong"
	\end{lstlisting}
	\caption{\DIFaddFL{A chain of functions composed using bind. Each }\texttt{\DIFaddFL{f\_}} \DIFaddFL{function is of type }\texttt{\DIFaddFL{a -> Maybe a}}}
	\label{maybeChain}
\end{figure}

\DIFadd{Monads are a powerful abstraction but it is also useful to further categorise monads that exhibit different properties. Haskell comes with type classes, that inherit from }\texttt{\DIFadd{Monad}}\DIFadd{, and that categorise additional features monads can have, such as failure (}\texttt{\DIFadd{MonadFail}}\DIFadd{) or failure and recovery (}\texttt{\DIFadd{MonadPlus}}\DIFadd{)~\mbox{%DIFAUXCMD
\citep{typeclassopedia}}\hspace{0pt}%DIFAUXCMD
. }\texttt{\DIFadd{MonadPlus}} \DIFadd{is a type class that represents the type of computations that may fail and also provide some way of choosing between possibly failed computations. 
}

\DIFadd{In addition to the type classes that further categorise monads, monads themselves inherit from other more general types: functors and applicative functors. Functors are the set of types that can mapped over, in the case of lists the values can be transformed to another value by mapping some function over the list. Applicative functors are the set of types that can have sequential actions performed over them. Applicative functors will be discussed in more detail in Chapter~\ref{chp:applicative}. 
}

\DIFadd{The Haskell community has built a large system of types around monad for representing all different sorts of computations. This system of types provides a fertile environment for data-driven refactorings, the topic of this thesis.
}

\DIFaddend \section{Refactoring} 
Refactoring is the process of changing a program without changing its behaviour. This is done to improve its internal structure~\citep{fowler}. The term refactoring was \DIFdelbegin \DIFdel{coined in 1991 in}\DIFdelend \DIFaddbegin \DIFadd{first coined in}\DIFaddend ~\citep{programRestructuring} and these ideas expanded to the object-oriented paradigm in~\citep{refactOOFrameworks}. \DIFaddbegin \DIFadd{Martin Fowler's 1999 book ``Refactoring'' has become the canonical reference for object-oriented refactorings~\mbox{%DIFAUXCMD
\citep{fowler}}\hspace{0pt}%DIFAUXCMD
. }\DIFaddend People have been performing refactoring for much longer and it was called ``program restructuring'' in the literature(\cite{highSpeedRestructuring}\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{; }\DIFaddend \cite{performanceRestructuring}). 
\DIFdelbegin \DIFdel{Martin Fowler's 1999 book ``Refactoring'' has become the canonical reference for object-oriented refactorings~\mbox{%DIFAUXCMD
\citep{fowler}}\hspace{0pt}%DIFAUXCMD
.
}\DIFdelend 

Behaviour preservation is what separates refactoring from other types of program manipulation. This idea of \DIFdelbegin \DIFdel{functionality preservation means }\DIFdelend \DIFaddbegin \DIFadd{behaviour preservation is }\DIFaddend that refactoring will not introduce new bugs or eliminate old ones. To prevent semantic changes after refactoring, many refactorings have non-trivial preconditions~\citep{mens2002formalising}. For example, the \DIFdelbegin \DIFdel{renaming refactoring }\DIFdelend \DIFaddbegin \DIFadd{refactoring that renames an item (such as a function or variable) }\DIFaddend should check that the new name being introduced does not cause a name clash with a name already used in the source program.

Manual refactoring \DIFdelbegin \DIFdel{is }\DIFdelend \DIFaddbegin \DIFadd{can be }\DIFaddend tedious and error prone because changes to small portions of code may require system-wide changes. When deleting a parameter from a function, for example, every call site of that function also needs to be modified, and missing even a single call site will cause an error. Refactoring by hand depends on high testing coverage to ensure that functionality is preserved~\citep{fowler}. This means that tools that can automatically perform refactorings and ensure that preconditions are met are highly desirable.

\subsection{Functional refactoring}

Refactoring a functional language has a few key differences from refactoring an imperative language. The higher-order nature \DIFaddbegin \DIFadd{of }\DIFaddend functional languages means that any sub-expression of a function is a candidate for generalisation whereas in other languages the types of parameters and results \DIFdelbegin \DIFdel{is limited. }\DIFdelend \DIFaddbegin \DIFadd{are limited. This means that functional refactorings can target much more of the source program's abstract syntax tree. }\DIFaddend The semantics of functional languages also allow for more comprehensive checking of preconditions based on the static semantics of the language~\citep{refacTools}.

It is also not unusual for functional refactorings to be substantially different than their object-oriented (OO) counterparts. For example creating a \DIFdelbegin \DIFdel{case }\DIFdelend \DIFaddbegin \texttt{\DIFadd{case}} \DIFaddend statement from a multi-equation function definition in a functional language versus inlining \DIFdelbegin \DIFdel{a virtual method as a case }\DIFdelend \DIFaddbegin \DIFadd{virtual methods into a }\texttt{\DIFadd{case}} \DIFaddend statement in an OO language require substantially different program manipulations~\citep{huiqingThesis}. \DIFaddbegin \DIFadd{The first refactoring, the inlining of a multi-equation function, involves moving the pattern matches from each of the target function's equations to the left hand side patterns of the new }\texttt{\DIFadd{case}} \DIFadd{expression. The object oriented refactoring needs to look up all of the implementations of the virtual method, inline their bodies into the new target method, and build a case statement that switches depending on which concrete type the new method is called invoked in. Both of these refactorings are consolidating some conditional logic into a single function,}\footnote{\DIFadd{If we consider methods to be functions that happen to be associated with a particular object.}} \DIFadd{but the syntax elements that need to traversed and constructed to build the target program are very different. }\DIFaddend Additionally there can be refactorings with no OO counterpart, monadification the introduction of monadic types into otherwise pure code \DIFdelbegin \DIFdel{, }\DIFdelend for instance.

\section{Summary}

One of the most widely accepted best practices in software development is the concept of incremental change, an essential concept of both \DIFdelbegin \DIFdel{~\mbox{%DIFAUXCMD
\citep{agileManifesto} }\hspace{0pt}%DIFAUXCMD
and ~\mbox{%DIFAUXCMD
\citep{extremeProg}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{the extreme programming and agile software development philosophies (\mbox{%DIFAUXCMD
\cite{extremeProg}}\hspace{0pt}%DIFAUXCMD
; \mbox{%DIFAUXCMD
\cite{agileManifesto}}\hspace{0pt}%DIFAUXCMD
)}\DIFaddend . Refactoring remains a key step in this incremental development process to preventing technical debt from causing development to slow to a crawl.

Much of the refactoring literature focuses on changes that need to be made to the structure of programs. The structure of a program is very important and structural refactorings can maintain the ``separation of concerns'' by extracting functions from existing definitions or ensuring that a \DIFdelbegin \DIFdel{programs }\DIFdelend \DIFaddbegin \DIFadd{program's }\DIFaddend names reflect what the program currently does. This thesis argues that it is just as important to maintain the ways that a program structures and evaluates the data it computes.

This thesis has chosen to use the term ``data-driven'' to describe the type of refactorings that are prompted by the data that a program computes. These refactorings can be prompted by an insufficiently fine-grained data model. For example, the ``introduce type synonym'' refactoring \DIFdelbegin \DIFdel{separates }\DIFdelend \DIFaddbegin \DIFadd{which creates a new type synonym that helps separate }\DIFaddend certain instances of a type that are used to represent different \DIFdelbegin \DIFdel{things}\DIFdelend \DIFaddbegin \DIFadd{concepts}\DIFaddend .

This thesis also takes advantage of Haskell's call-by-need evaluation strategy which allows for control flow to be abstracted by the user. The ``generalise monad to applicative refactoring'' (see Chapter~\ref{chp:applicative}) takes code that was formally monadic and sequentially evaluated and makes it possible to evaluate it in parallel. The ``generalise maybe'' refactorings from \DIFdelbegin \DIFdel{section~\ref{genMaybe} , }\DIFdelend \DIFaddbegin \DIFadd{Section~\ref{genMaybe} }\DIFaddend takes a concrete effect and makes it an abstract one that can be instantiated in multiple ways. Both of these refactorings are applied as either the programmer's understanding of the data they are working with becomes more nuanced or to prepare the program's data model for enhancement. In this way the data ``drives'' these refactorings.

\section{Thesis Outline}

This thesis will proceed as follows:


\DIFaddbegin \textbf{\DIFadd{Chapter~\ref{chp:related}: Related work}}

\DIFadd{This thesis begins with chapter that discusses the some of the related work that this thesis builds from and is inspired by. 
}

%DIF > %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\DIFadd{This chapter covers other refactoring tools for other functional and object-oriented languages. This chapter also discusses multiple projects that are using refactoring in unique and interesting ways such as introducing parallelism, and non-traditional programming languages. Next the chapter describes work on developing syntactic sugar for applicative functors. Finally there is a brief discussion of type changing program transformation systems.
}


\DIFaddend \textbf{Chapter~\ref{chp:hare}: Background: Refactoring Haskell in HaRe}

This chapter covers the history of HaRe \DIFaddbegin \DIFadd{(the }\textbf{\DIFadd{Ha}}\DIFadd{skell }\textbf{\DIFadd{Re}}\DIFadd{factorer)}\DIFaddend , the technologies it depends on, and the current implementation of HaRe. Specifically there is an overview of the\DIFdelbegin \DIFdel{GHC API}\DIFdelend ~\citep{ghcApi}, the generic traversal library Scrap Your Boilerplate~\citep{syb}, and ghc-exactprint~\citep{exactprint}. This chapter also describes how the inner workings of HaRe are implemented and the functions that compose its API. Finally it discuses the design and development process of implementing refactorings for HaRe.

\textbf{Chapter~\ref{chp:ddRefs}: Data-Driven Refactorings}

Chapter~\ref{chp:ddRefs} introduces data-driven refactorings. The chapter begins with a discussion of this type of refactoring for object-oriented languages. The rest of the chapter describes data-driven refactorings for the Haskell programming language. First there is a description of the ``introduce a type synonym\DIFaddbegin \DIFadd{,}\DIFaddend '' the ``renaming'' refactoring of data-driven refactorings. The \DIFaddbegin \DIFadd{renaming refactoring is the most straightforward refactoring, it simply changes the name of a variable to one that better suits the real meaning of the variable. Similarly a type synonym can rename types to better reflect what certain instances of that type are being used for in a program, and this refactoring supports that transformation. The }\DIFaddend other two refactorings covered in this chapter are the ``generalising maybe'' and ``list to \DIFdelbegin \DIFdel{hughes }\DIFdelend \DIFaddbegin \DIFadd{Hughes }\DIFaddend list'' refactorings.

The ``generalising maybe'' refactoring \DIFaddbegin \DIFadd{(Section~\ref{genMaybe}) }\DIFaddend rewrites functions that use the concrete type of \texttt{Maybe} to \DIFdelbegin \DIFdel{instead use}\DIFdelend \DIFaddbegin \DIFadd{use, instead, }\DIFaddend the operations provided by the \DIFdelbegin \DIFdel{typeclasses it implements}\DIFdelend \DIFaddbegin \DIFadd{type classes it implements, }\DIFaddend \texttt{Monad} and/or \texttt{MonadPlus}. The final refactoring described by this chapter is the ``list to Hughes list'' refactoring \DIFaddbegin \DIFadd{(Section~\ref{listToDlist})}\DIFaddend . Hughes lists (also known as difference lists) are an alternative implementation for lists that support $O(n)$ time appends. This refactoring takes functions that use that standard list implementation and rewrites the function to use \DIFdelbegin \DIFdel{Hughe }\DIFdelend \DIFaddbegin \DIFadd{Hughes }\DIFaddend lists instead. The approach for this refactoring is applicable between any two types that are ``reversibly embeddable'' a concept that will be defined as well.

\textbf{Chapter~\DIFdelbegin \DIFdel{\ref{generalImp}}\DIFdelend \DIFaddbegin \DIFadd{\ref{chp:generalImp}}\DIFaddend : Implementing Data-Driven Refactorings in HaRe}

This chapter continues \DIFdelbegin \DIFdel{on }\DIFdelend from the refactoring designs presented in chapter~\ref{chp:ddRefs} to describe HaRe's implementation of both the ``\DIFdelbegin \DIFdel{Maybe }\DIFdelend \DIFaddbegin \DIFadd{maybe }\DIFaddend to MonadPlus'' and the ``\DIFdelbegin \DIFdel{List }\DIFdelend \DIFaddbegin \DIFadd{list }\DIFaddend to Hughes List'' refactorings. There is also be a discussion of the API that  supports the ``\DIFdelbegin \DIFdel{List }\DIFdelend \DIFaddbegin \DIFadd{list }\DIFaddend to Hughes List'' refactoring and can be used to define further ``reversibly embeddable'' type refactorings. Finally this chapter will describe the enhancements made to HaRe's API, specifically the addition of \DIFdelbegin \DIFdel{high level }\DIFdelend \DIFaddbegin \DIFadd{high-level }\DIFaddend transformation functions.

\textbf{Chapter~\ref{chp:applicative}: Generalising Monads to Applicative}

This chapter presents another generalisation refactoring. Applicative functors are a, relatively, new addition to the Haskell environment. Applicative functors are an interface for sequencing effectful computations. Currently the Haskell community predominantly uses the monadic interface for effects. This chapter will describe the design and implementation of a refactoring for taking a monadic \texttt{do} statement and transforming it to use the \texttt{Applicative} (the Haskell \DIFdelbegin \DIFdel{typeclass }\DIFdelend \DIFaddbegin \DIFadd{type class }\DIFaddend for Applicative functors) interface instead.

This chapter also describes how the Haskell community is currently using the \texttt{Applicative} interface, based on the results of a survey of Hackage\DIFaddbegin \DIFadd{, }\DIFaddend the Haskell package archive~\citep{hackage}. Finally this chapter concludes with a discussion of possible applications of the refactoring.

\textbf{Chapter~\ref{chp:monadification}: Introducing Monads} 

Chapter~\ref{chp:monadification} describes the monadification refactoring. Monadification is the process of introducing monads into pure code. Monads are the standard way for effects to be used in Haskell and so a refactoring to automatically add them is very useful to the community. There are many styles of monadification and this chapter describes several of them. Finally it discusses the implementation of the monadification refactoring in HaRe.

\DIFdelbegin \textbf{\DIFdel{Chapter~\ref{chp:related}: Related work}}
%DIFAUXCMD
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The final content chapter discusses the literature related to this thesis. This chapter covers other refactoring tools for other functional and object-oriented languages. This chapter also discusses multiple projects that are using refactoring in unique and interesting ways such as introducing parallelism, and non-traditional programming languages. Next the chapter describes work on developing syntactic sugar for applicative functors. Finally there is a brief discussion of type changing program transformation systems.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend \textbf{Chapter~\ref{chp:conc}: Conclusion}

The final chapter summarises the work done for this thesis and the contributions it has made. It concludes with a discussion of future work that could be performed on HaRe.

\section{Contributions of this Research}

The work in this thesis was carried out in HaRe\DIFdelbegin \DIFdel{(the }\textbf{\DIFdel{Ha}}%DIFAUXCMD
\DIFdel{skell }\textbf{\DIFdel{Re}}%DIFAUXCMD
\DIFdel{factorer)}\DIFdelend . This study focused on adding additional refactorings to HaRe of a new type. Rather than being motivated by the structural problems of a program\DIFaddbegin \DIFadd{, }\DIFaddend data-driven refactorings seek to resolve issues that are caused by the data types a program uses. The contributions of this research are:

\begin{itemize}
	\item Extending the HaRe API to better support data-driven refactorings. These refactorings are complex and require more information from the abstract syntax of GHC than prior refactorings in HaRe. The contributions to the API were focused on analysis of the types of nodes and creating a higher level interface for common small expression level transformations. These changes are described in \DIFdelbegin \textbf{\DIFdel{chapter~\ref{generalImp}}}%DIFAUXCMD
\DIFdelend \DIFaddbegin \DIFadd{Chapter~\ref{chp:generalImp}}\DIFaddend .
	\item The design and implementation of the ``generalise maybe'' and ``list to Hughes list'' refactoring, described in \DIFdelbegin \DIFdel{chapters}\DIFdelend \DIFaddbegin \DIFadd{Chapters}\DIFaddend ~\ref{chp:ddRefs} and ~\DIFdelbegin \DIFdel{\ref{generalImp}}\DIFdelend \DIFaddbegin \DIFadd{\ref{chp:generalImp}}\DIFaddend . The ``generalise maybe'' refactoring is a way of taking a concrete effect and turning it into an abstract one so that it can be instantiated in multiple ways. The ``list to Hughes list'' refactoring describes a way to rewrite  a program's data model to use a different type with a similar interface.
	\item The design and implementation of the ``\DIFdelbegin \DIFdel{Generalise Monads to Applicative}\DIFdelend \DIFaddbegin \DIFadd{generalise monads to applicative}\DIFaddend '' refactoring, \DIFdelbegin \DIFdel{chapter}\DIFdelend \DIFaddbegin \DIFadd{Chapter}\DIFaddend ~\ref{chp:applicative}. Effects in Haskell are typically handled using monads, a powerful abstraction that allows the type system to check the type of the effects that are being performed by a program. Since the introduction of monads the Haskell community has developed more fine-grained approaches to effect handling. This refactoring allows software systems to handle effects using the applicative as opposed to the monadic interface.
	\item The \DIFdelbegin \DIFdel{design and }\DIFdelend implementation of the ``monadification'' refactoring, see \DIFdelbegin \DIFdel{chapter}\DIFdelend \DIFaddbegin \DIFadd{Chapter}\DIFaddend ~\ref{chp:monadification}. Monadification is the process of making a program work over a monadic type rather than a pure one. This refactoring supports the \DIFdelbegin \DIFdel{evolution }\DIFdelend \DIFaddbegin \DIFadd{transformation }\DIFaddend of pure programs to effectful ones.
\end{itemize}


%DIF > %%%%%%%%%%%%%%%%%%%%%%%%%
\DIFaddbegin 


%DIF > %%%%%%%%%%%%%%%%%%%%%%%%%
\chapter{\DIFadd{Related Work}}
\label{chp:related}

\DIFadd{This chapter reviews current work in refactoring and other areas of the literature and helps to situate the work described in this thesis into a broader landscape. This chapter begins, in Section~\ref{ideTools}, with a short description of refactoring tools, including the sophisticated modern IDEs that support object-oriented languages and the refactoring tools that support functional languages. 
}

\DIFadd{Next Sections~\ref{refacParallel} and~\ref{applicativeDo} will describe some of the work done to transform programs so that they are executed in parallel rather than sequentially. The original idea behind refactoring was to reduce the technical debt of a target program. Technical debt is the idea that when a system is being implemented there can be quick and messy ways to implement things, but this implementation will incur ``debt'' on the project. Much like financial debt some debt is needed to get a project off the ground but also like financial debt if debt is allowed to grow unchecked it can grind a project to a halt~\mbox{%DIFAUXCMD
\citep{techDebt}}\hspace{0pt}%DIFAUXCMD
. Refactoring, was traditionally the process of ``paying back'' this debt, however the potential scope of refactorings has been expanded to include, for example, introducing parallelism rather than just improving code quality. 
}

\DIFadd{Section~\ref{typeTrans} covers work done in the program transformation and refactoring fields. In particular earlier versions of monadification are discussed, such as ``Reuse by Program Transformation''~\mbox{%DIFAUXCMD
\citep{lammelReuse}}\hspace{0pt}%DIFAUXCMD
, and ``Monadification of Functional Programs''~\mbox{%DIFAUXCMD
\citep{monadification}}\hspace{0pt}%DIFAUXCMD
. Other work that is covered in this section includes work on refactoring and program transformation that is particularly focused on types.
}

\DIFadd{The final section of this chapter discusses how refactoring tools are implemented. This section pays special attention to refactoring tools that target languages other than Haskell, because the focus of Chapter~\ref{chp:hare} is on building refactoring tools for Haskell.
}

\section{Refactoring Tools in Modern IDEs}\label{ideTools}
\DIFadd{Refactoring tools have become a standard feature in integrated development environments. The four most popular IDEs for object-oriented languages, Eclipse}\footnote{\url{https://www.eclipse.org/}}\DIFadd{, NetBeans}\footnote{\url{https://netbeans.org/}}\DIFadd{, IntelliJ}\footnote{\url{https://www.jetbrains.com/idea/}}\DIFadd{, and Visual Studio}\footnote{\url{https://www.visualstudio.com/}} \DIFadd{all come with refactoring tools for their primary language~\mbox{%DIFAUXCMD
\citep{ides}}\hspace{0pt}%DIFAUXCMD
. These refactoring tools support some general refactorings (renaming, method extraction) and some that are specific to object-oriented languages (pushing/pulling methods up/down the object hierarchy). 
}

\DIFadd{Modern IDEs, however, were not designed with functional programming in mind. Eclipse, Netbeans, and Intellij were all built to support Java and Visual Studio supports multiple languages most prominently C++ and C\#.}\footnote{\DIFadd{Visual Studio also includes support for F\# out of the box, but this could be considered the exception that proves the rule.}} \DIFadd{These tools allow community developed plugins to expand the languages they can support but the plugin's can fall out of date, for example the Haskell Eclipse plugin was stopped being supported in 2015.}\footnote{\DIFadd{See: }\url{https://wiki.haskell.org/IDEs\#EclipseFP_plugin_for_Eclipse_IDE}} \DIFadd{Though modern IDEs are heavily depended on in object-oriented development functional programming languages have developed their own, mostly independent tooling ecosystems. This will be the focus of the next section, refactoring tools for functional programming languages.
}

\section{Refactoring Tools for functional languages}\label{funcTools}
\DIFadd{A reason that often used to be given to explain why functional languages are not in widespread use in industry is the lack of a robust tooling ecosystem~\mbox{%DIFAUXCMD
\citep{wadlerTools}}\hspace{0pt}%DIFAUXCMD
. This is no longer the case as functional language ecosystems have undergone a great deal of development in recent years and, maybe coincidently, use in industry has gone up substantially in the last five years. This section will cover some existing refactoring and code smell tools for functional programming languages.
}

\subsection{HLint}

\DIFadd{HLint is a ``code smell'' tool for Haskell. Poorly designed code often produces ``smells,'' apparently superficial problems that indicate deeper design issues~\mbox{%DIFAUXCMD
\citep{fowler}}\hspace{0pt}%DIFAUXCMD
. One of the most common of these smells is duplicated code. Other hints include simplifying boolean expressions to remove unneeded calls to }\texttt{\DIFadd{not}} \DIFadd{(e.g. ``}\texttt{\DIFadd{not (a == b)}}\DIFadd{'' should become ``}\texttt{\DIFadd{(a /= b)''}}\DIFadd{), or replacing common types of folds with their prelude defined names (e.g. }\texttt{\DIFadd{sum}} \DIFadd{can replace }\texttt{\DIFadd{foldr (+) 0}}\DIFadd{). A code smell tool suggests changes to a code base such as alternative functions to use, how to simplify code, and redundancies~\mbox{%DIFAUXCMD
\citep{hlint}}\hspace{0pt}%DIFAUXCMD
.
}

\DIFadd{Code smell and refactoring tools are very closely related. Simplistically a code smell tool detects problems in a code base and a refactoring tool fixes them. If a tool can detect a problem why can't the same program fix them? HLint has a }\texttt{\DIFadd{-refactor}} \DIFadd{flag that will automatically apply the suggestions. However there can be a problem in doing this, e.g. a single piece of code could have multiple smells, how would HLint choose which one to apply? Also once a transformation has been applied other hints may no longer be applicable. HLint's behaviour in these cases is not documented.
}

\DIFadd{One of the powerful features of HLint is its customizability. An HLint configuration file (called }\texttt{\DIFadd{hlint.yaml}}\DIFadd{) added to the root of a project   will be detected by HLint and it will suggest both the default hints as well as the custom hints from that file. Hints are very simple to write.
}

\begin{figure}[t]
\begin{lstlisting}
- hint: {lhs: x !! 0, rhs: head x}
\end{lstlisting}
\caption{\DIFaddFL{A simple hint from~\mbox{%DIFAUXCMD
\citep{hlint}}\hspace{0pt}%DIFAUXCMD
}}
\label{lstHint}
\end{figure}

\DIFadd{Figure~\ref{lstHint} contains the definition of a hint that detects the list index operator is being used to look up the 0th element of a list and suggests using }\texttt{\DIFadd{head}} \DIFadd{instead. The }\texttt{\DIFadd{lhs}} \DIFadd{tag is the code HLint will search for. If code matching }\texttt{\DIFadd{lhs}} \DIFadd{is found  HLint will suggest the code be replaced with the }\texttt{\DIFadd{rhs}} \DIFadd{code. HLint assumes any single character variable is a substitution parameter. Given the hint from Figure~\ref{lstHint} and the following code:
}

\begin{lstlisting}
f list = list !! 0
\end{lstlisting}

\DIFadd{HLint produces the following output.
}

\begin{lstlisting}
 Suggestion: Use head
Found:
  list !! 0
Why not:
  head list

1 hint
\end{lstlisting}

\DIFadd{A major limitation of HLint is that it is only aware of a single module at a time. HLint is not aware of what types or names are in scope and code smells that span multiple modules cannot be discovered.
}


\subsection{Haskell Tools Refact}

\DIFadd{HaRe is not the only refactoring tool for Haskell. In late 2016 Haskell Tools Refact was announced and is currently at version 0.7~\mbox{%DIFAUXCMD
\citep{haskellTools}}\hspace{0pt}%DIFAUXCMD
. The Haskell Tools project is a GHC based developer tool kit for writing transformations~\mbox{%DIFAUXCMD
\citep{haskellToolsGit}}\hspace{0pt}%DIFAUXCMD
. There are eight refactorings currently supported}\footnote{\DIFadd{May 2018}}\DIFadd{.
}

\begin{itemize}
\item \DIFadd{Rename
}\item \DIFadd{Generate type signature
}\item \DIFadd{Generate exports
}\item \DIFadd{Extract binding
}\item \DIFadd{Inline binding
}\item \DIFadd{Organize imports
}\item \DIFadd{Float out
}\item \DIFadd{Organize extensions
}\end{itemize}

\DIFadd{Haskell Tools has implemented  its own abstract syntax tree. The AST of Haskell Tools is generated using information from all of GHC's compiler stages. Each node represents the same language elements; it just includes additional information that is spread across the different stages of the GHC~\mbox{%DIFAUXCMD
\citep{haskellTools}}\hspace{0pt}%DIFAUXCMD
.
}

\DIFadd{The Haskell Tools refactorer is currently integrated into the Atom editor}\footnote{\url{https://atom.io/}} \DIFadd{with Sublime Text}\footnote{\url{https://www.sublimetext.com}} \DIFadd{support planned for the near future~\mbox{%DIFAUXCMD
\citep{haskellTools}
}\hspace{0pt}%DIFAUXCMD
}

\subsection{Wrangler}\label{wranglerOne}
\DIFadd{Wrangler is a refactoring and code inspection tool for Erlang~\mbox{%DIFAUXCMD
\citep{wrangler}}\hspace{0pt}%DIFAUXCMD
. Erlang is a functional programming language designed to be massively scalable and highly fault tolerant~\mbox{%DIFAUXCMD
\citep{erlang}}\hspace{0pt}%DIFAUXCMD
. It was originally developed in 1986 by Joe Armstrong, Robert Virding, and Mike Williams at the Computer Science Laboratory at Ericsson Telecom AB~\mbox{%DIFAUXCMD
\citep{erlangHistory}}\hspace{0pt}%DIFAUXCMD
. Erlang's core design tenets include lightweight processes, that communicate through message passing. Erlang also boasts a ``let it fail" error handling architecture, when a process fails the error is not handled by the process where it occurred, but is handled by a separate dedicated part of the program~\mbox{%DIFAUXCMD
\citep{armstrongThesis}}\hspace{0pt}%DIFAUXCMD
.
}

\DIFadd{Wrangler is accessible from the command line and has been integrated into both Emacs and Eclipse. It currently supports a large library of refactorings, code smells, as well as other program analysis tools such as clone detection and automatic API migration~\mbox{%DIFAUXCMD
\citep{wrangler}}\hspace{0pt}%DIFAUXCMD
. Additionally Wrangler supports a template-based API and a domain specific language which allow users to define their own refactorings and script composite refactorings~\mbox{%DIFAUXCMD
\citep{wranglerDomain}}\hspace{0pt}%DIFAUXCMD
.
}

\DIFadd{The template-based API of Wrangler allows users to define program analyses and transformations using Erlang concrete syntax. Wrangler templates consist of fragments of Erlang syntax that may contain meta-variables or meta-atoms that can stand for any language element. Meta-variables/atoms are variables or atoms that end with the "}\texttt{}\DIFadd{" character; this meta-variable/atom binds a language element to its name so that that element can be referred to by name in the definition of the refactoring. Meta-variables/atoms that end with "}\texttt{}\DIFadd{" are list meta-variables/atoms that match a sequence of language elements as long as they are of same sort~\mbox{%DIFAUXCMD
\citep{letsUser}}\hspace{0pt}%DIFAUXCMD
.}\footnote{\DIFadd{Things like the arguments to a function or a sequence of expressions in a function body are the same "sort."}}

\begin{figure}[t]
\begin{lstlisting}[language=erlang]
?T("erlang:spawn(Arg@)")

?T("erlang:spawn(Arg@@)")
\end{lstlisting}
\caption{\DIFaddFL{Some Wrangler templates}}
\label{templates}
\end{figure}

\DIFadd{The first template in Figure~\ref{templates} matches applications of }\texttt{\DIFadd{erlang:spawn}} \DIFadd{when it is called with one argument whereas the second template will match the same function with any number of arguments.
}

\DIFadd{Composite refactorings are refactorings that are made up of multiple refactorings run, in sequence, one after the other. It can be challenging to develop composite refactorings if they are not explicitly handled by the refactoring tool. The naive solution just chains refactorings together with the output from one refactoring in a composite refactoring becoming the input to the next refactoring. However, what if the second refactoring fails in a chain of four? Composite refactoring definitions, without tool support, become filled with error handling code to manage the situation when one of the component refactorings fail. Wrangler defines a domain specific language that helps describe the various facets of a composite refactoring.
}

\DIFadd{The Wrangler DSL supports the creation of a composite refactoring through a variety of features. First, Wrangler extends every primitive refactoring with a }\textit{\DIFadd{refactoring command generator}}\DIFadd{. A command generator allows the extended refactoring to accept not just concrete values but also structures that specify how the parameter should be generated; each parameter of a command generator accepts either a concrete value, a condition that checks if a value is satisfactory, or a generator for creating the parameter based on the previous parameters. A refactoring for renaming functions named with the format }\texttt{\DIFadd{camelCase}} \DIFadd{to }\texttt{\DIFadd{camel\_case}} \DIFadd{would accept three arguments: the target filename, the name of the target function, and the desired new name. This command generator's first parameter is a condition that always returns true because any file is a valid target for renaming. The second parameter is another condition that checks if the function name matches is in camel case format (e.g. "}\texttt{\DIFadd{aFunName}}\DIFadd{"). The final parameter is generated by taking the second parameter and modifying it so that the name is in the corresponding "snake case" format (e.g. "}\texttt{\DIFadd{a\_fun\_name}}\DIFadd{"). 
}

\DIFadd{The DSL also allows decision making to occur during the execution of a composite refactoring. Composite refactorings are transactional and can be either atomic or non-atomic. Atomic composite refactorings require each component refactoring to be successfully applied before continuing onto the next refactoring. If a single refactoring fails inside of an atomic composite refactoring, the entire refactoring fails and the program remains unchanged. When a single refactoring fails inside a non-atomic composite refactoring, correspondingly, the entire refactoring will not fail and continue by trying the next refactoring in the sequence. The Wrangler DSL allows for refactorings to described as atomic and non-atomic sections at each level.  
}

\subsection{RefactorErl}\label{refactorErl}

\DIFadd{Another notable tool for the Erlang language is RefactorErl. RefactorErl started out as another refactoring tool for Erlang but has since expanded into a source code analysis and transformation tool~\mbox{%DIFAUXCMD
\citep{refactorErl}}\hspace{0pt}%DIFAUXCMD
. RefactorErl uses a ``semantic program graph'' to represent an Erlang source program, this graph is broken up into three layers~\mbox{%DIFAUXCMD
\citep{erlangStatic}}\hspace{0pt}%DIFAUXCMD
:
}

\begin{enumerate}
	\item \DIFadd{Lexical layer: This is where token, spacing, and comment information is kept about the source program.
	}\item \DIFadd{Syntactic layer: This layer keeps the abstract syntax tree of the source program.
	}\item \DIFadd{Semantic layer: This contains additional calculated semantic information about the source program, such as module and function references as well as variable bindings.
}\end{enumerate}

\DIFadd{Refactoring tools, in general, only require the information contained within these first two layers, the semantic layer helps RefactorErl implement its static analysis capabilities. The semantic program graph is constructed after a source program's abstract syntax has been obtained from  the Erlang language front end. The semantic layer is built on top of the abstract syntax tree by several different static analysers that each add a different kind of information to the graph. For example, the function analyser adds a semantic function node when the first reference to or the definition of a function is found. Every reference to that function discovered after that points to the original node~\mbox{%DIFAUXCMD
\citep{erlangStatic}}\hspace{0pt}%DIFAUXCMD
. 
}

\DIFadd{The edges in the semantic layer are also labeled so the relationships between nodes can be captured as well. The function analyser, for instance, labels the edges going from the semantic function node to internal references to that function with a }\textit{\DIFadd{funIref}} \DIFadd{label, and the }\textit{\DIFadd{fundef}} \DIFadd{label connects the semantic node to the function definition it represents. The semantic layer is critical for the static analysis RefactorErl performs but it is also quite useful for the refactorings as well. Consider the ``move a function'' refactoring}\footnote{\DIFadd{See: }\url{http://pnyf.inf.elte.hu/trac/refactorerl/wiki/RefactoringSteps/MoveFunction}}\DIFadd{, which moves a function definition from one module to another, every reference in the target function needs to be checked to ensure that those definitions are available in the new scope of the function. Without a semantic layer a refactoring tool may have to traverse the syntax tree multiple times to locate all the references the target function contains and ensure they are available in the function's new location. With the semantic layer, on the other hand, finding all of the references can be done in two steps over the graph (one step from the reference to the semantic node, the next step following the semantic node's ``definition'' edge, e.g. }\textit{\DIFadd{fundef}}\DIFadd{, }\textit{\DIFadd{fielddef}}\DIFadd{, etc.).
}

\DIFadd{RefactorErl currently supports 24 different refactorings as well as a suite of static analysis and program comprehension tools. 
}

\subsection{ROTOR}

\DIFadd{A newcomer to the refactoring tools for functional language space is ROTOR}\footnote{\textbf{\DIFadd{R}}\DIFadd{eliable }\textbf{\DIFadd{O}}\DIFadd{Caml-base }\textbf{\DIFadd{T}}\DIFadd{ool for }\textbf{\DIFadd{O}}\DIFadd{Caml }\textbf{\DIFadd{R}}\DIFadd{efactoring}} \DIFadd{the first refactoring tool to target OCaml~\mbox{%DIFAUXCMD
\citep{rotor}}\hspace{0pt}%DIFAUXCMD
. Language features of OCaml provide some unique challenges for a refactoring tool. In OCaml one module may be included in another so that, for example, when renaming the function }\texttt{\DIFadd{f}} \DIFadd{in module }\texttt{\DIFadd{A}} \DIFadd{but }\texttt{\DIFadd{A}} \DIFadd{is included in module }\texttt{\DIFadd{B}} \DIFadd{then both }\texttt{\DIFadd{A.f}} \DIFadd{and }\texttt{\DIFadd{B.f}} \DIFadd{will need to be renamed. ROTOR handles this by decomposing refactorings into a set of textual replacement operations that can depend on other transformations~\mbox{%DIFAUXCMD
\citep{rotor}}\hspace{0pt}%DIFAUXCMD
. From the renaming example renaming ``}\texttt{\DIFadd{A.f}}\DIFadd{'' would depend on the ``}\texttt{\DIFadd{B.f}}\DIFadd{'' renaming succeeding and vice versa.
}

\DIFadd{An opportunity of the ROTOR project is that it has an partner in industry, Jane Street Capital. ROTOR is using the core library}\footnote{\url{https://github.com/janestreet/core}} \DIFadd{an "industrial strength" version of the OCaml standard library as a test bed for testing the refactoring tool.
}

\section{Refactoring to introduce parallelism}\label{refacParallel}

\DIFadd{The reasons to refactor source code have also expanded beyond code quality. This section will describe two different projects that have developed refactorings to change the execution of a program from single to multi-threaded. Functional programming languages are well suited to parallel execution due to immutability by default and in some languages (such as Erlang) first-class concurrency features. This section will first describe the ``ParaForming'' which uses refactoring to introduce parallel abstractions into Haskell code, then it will describe work done to refactor Erlang code to introduce algorithmic skeletons.
}

\subsection{ParaForming}  
\DIFadd{ParaForming is an approach to construct parallel programs from an existing program using software refactoring~\mbox{%DIFAUXCMD
\citep{paraforming}}\hspace{0pt}%DIFAUXCMD
. The ParaForming work targets Glasgow parallel Haskell: ``GpH'', an extension to Haskell, and is implemented in HaRe. Parallelism is added to programs in GpH using strategies (see Figure~\ref{strategy}). 
}

\begin{figure}[t]
\begin{lstlisting}
type Strategy a = a -> Eval a
\end{lstlisting}
\caption{\DIFaddFL{The strategy type}}
\label{strategy}
\end{figure}   

\DIFadd{A strategy takes its argument and determines how it will be evaluated inside of the }\texttt{\DIFadd{Eval}} \DIFadd{monad. The }\texttt{\DIFadd{rpar}} \DIFadd{strategy introduces parallelism by "sparking" its argument. Sparks are tasks that are collected into a pool which is managed by the runtime. The spark pool is a source of work that GHC can pull from when there are idle processors. Sparks may be evaluated in parallel or not at all depending on the availability of spare cores.
}

\DIFadd{The simplest parallel refactoring is to introduce data parallelism. This refactoring is applied to an expression that works over a list and evaluates each member of that list in a spark. A sequential function that sums the Euler totient function is in Figure~\ref{eulerSeq} and the refactored program is in Figure~\ref{eulerPar1}
}

\begin{figure}[t]
\begin{lstlisting}
sumEulerSeq :: Int -> Int
sumEulerSeq n = sum (map euler (mkList n))
\end{lstlisting}
\caption{\DIFaddFL{A sequential calculation that sums the Euler totient function}}
\label{eulerSeq}
\end{figure} 


\begin{figure}[t]
\begin{lstlisting}
sumEulerPar1 :: Int -> Int
sumEulerPar1 n = sum (map euler (mkList n) `using` parList rdeepseq)
\end{lstlisting}
\caption{\DIFaddFL{A refactored version of the function from Figure~\ref{eulerSeq}}}
\label{eulerPar1}
\end{figure} 

\DIFadd{This refactoring evaluates the calculation of }\texttt{\DIFadd{map euler (mkList n)}} \DIFadd{using}\footnote{\DIFadd{The (}\texttt{\DIFadd{using :: a -> Strategy a -> a}}\DIFadd{) function just evaluates some expression with the given strategy.}} \DIFadd{the }\texttt{\DIFadd{parList rdeepseq}} \DIFadd{strategy. The }\texttt{\DIFadd{parlist}}\footnote{\texttt{\DIFadd{parlist :: Strategy a -> Strategy }[\DIFadd{a}]}} \DIFadd{function evaluates each element of a list in parallel according to a given strategy and }\texttt{\DIFadd{rdeepseq}} \DIFadd{is the strategy the fully evaluates its argument. 
  }

\DIFadd{The refactored program in Figure~\ref{eulerPar1} is highly parallel but not very efficient because the parallelism is too fine grained. Another refactoring can help in this case instead of sparking every element of a list another strategy can be introduced, one that separates the list into "chunks" and each of the chunks of the list is executed in parallel. This refactoring adds an additional argument to the function that determines how many chunks the list will be split into, as seen in Figure~\ref{eulerChunk}.
  }

\begin{figure}[t]
\begin{lstlisting}
sumEulerChunk :: Int -> Int -> Int
sumEulerChunk c n = sum (map euler (mkList n) `using` parListChunk c rdeepseq)
\end{lstlisting}
\caption{\DIFaddFL{A "chunked" version of the function from Figure~\ref{eulerSeq}}}
\label{eulerChunk}
\end{figure}  

\DIFadd{These two refactorings are both a way of introducing data parallelism with varying degrees of granularity. The other form of parallelism is known as task parallelism. Where data parallelism is focused on computing different parts of a data structure in parallel (the elements of a list in the previous case), task parallelism instead focuses on having different ``tasks'' excecuted in parallel. The work done in~\mbox{%DIFAUXCMD
\cite{paraforming} }\hspace{0pt}%DIFAUXCMD
outlines a refactoring that can make recursive calls happen in parallel.
}

\subsection{Cost-Directed Parallel Refactoring}

\DIFadd{The previous section touched on one of the big challenges of parallel programming, determining the correct level of parallelism to achieve maximum performance. \mbox{%DIFAUXCMD
\cite{parallelErl} }\hspace{0pt}%DIFAUXCMD
describes a methodology to introduce algorithmic skeletons into Erlang programs using the Erlang refactoring tool Wrangler. In addition to introducing a skeleton this work provides cost models that estimate the performance of the program after adding each skeleton. This estimate helps a programmer to make an informed decision about which parallelisation strategy is the best for a particular program. 
}

\DIFadd{An algorithmic skeleton is a common parallel pattern. A skeleton is implemented as a higher-order function that takes in a sequential function and any parameters that the skeleton requires. \mbox{%DIFAUXCMD
\cite{parallelErl} }\hspace{0pt}%DIFAUXCMD
discusses the four most common and useful skeletons. For example, the map skeleton works by breaking up the target data into pieces that can be operated on in parallel. Finally the results from the the parallel computations are combined back into a single image. One of the examples presented in~\mbox{%DIFAUXCMD
\cite{parallelErl} }\hspace{0pt}%DIFAUXCMD
is an image processing system that denoises images. Denoising a section of an image can be done independently from processing the other sections of the same image. The introducing the map skeleton would break the image into pieces to be denoised in parallel then the outputted sections can be stitched back together again
}

\DIFadd{Skeletons are simple to understand in theory but it can be difficult to know which to apply in practice. This is when the cost models of each skeleton become useful to help make an informed decision about which skeleton should cause the greatest speed up and this information can be used to guide the refactoring. In~\mbox{%DIFAUXCMD
\cite{parallelErl} }\hspace{0pt}%DIFAUXCMD
an initial benchmark of the program can be used to estimate the speed up that different skeletons could provide.
}

\DIFadd{Parallelisation can be tedious and difficult to do which makes it a good candidate for tool assistance. A refactoring tool can guide a programmer through the process of parallelisation. Much like how the data-driven refactorings have multiple small changes are required before the entire process can be considered ``finished'' changing a program to run in parallel is also a sequence of several smaller changes.
}

\section{ApplicativeDo}\label{applicativeDo}

\DIFadd{Haskell is a pure functional programming language. Purity in this context means that Haskell is side-effect free. This causes some confusion because in most other languages side-effects are allowed, if not the primary way that programs produce their ``result.'' Haskell instead models side-effect causing computations using Monads, which take effects of computation that are typically implicit and make them an explicit result of the program instead. Beyond modeling side effect causing operations, monads allow for computations to be supplemented with additional features~\mbox{%DIFAUXCMD
\citep{haskellWikiMonad}}\hspace{0pt}%DIFAUXCMD
.
}

\DIFadd{Haskell supports the writing of monadic code through the ``}\texttt{\DIFadd{do}}\DIFadd{'' syntax sugar, an example of this syntax can be seen in Figure~\ref{doF}. Monads and their }\texttt{\DIFadd{do}} \DIFadd{syntax have become a commonly accepted pattern for handling effects within the Haskell community. 
}

\DIFadd{Applicative functors are another type class that describe computations performed within some context, but are less powerful than monads. They were first described by~\mbox{%DIFAUXCMD
\cite{mcbrideIdioms}}\hspace{0pt}%DIFAUXCMD
, and a fairly recent change to GHC made the }\texttt{\DIFadd{Applicative}} \DIFadd{type class}\footnote{\texttt{\DIFadd{Applicative}} \DIFadd{is what the GHC calls the type class that implements applicative functors.}} \DIFadd{a superclass of }\texttt{\DIFadd{Monad}} \DIFadd{which means that every instance of }\texttt{\DIFadd{Monad}} \DIFadd{now must also implement the }\texttt{\DIFadd{Applicative}} \DIFadd{interface as well.   
}

\DIFadd{Compiler changes can't force a community to change its practices and }\texttt{\DIFadd{Applicative}} \DIFadd{remains under-utilised compared to }\texttt{\DIFadd{Monad}}\DIFadd{. This under-utilisation of applicative functors in Haskell has not gone unnoticed; in \mbox{%DIFAUXCMD
\cite{applicativeDo}}\hspace{0pt}%DIFAUXCMD
, an implementation of a language extension for GHC was introduced that changes the way Haskell interprets }\texttt{\DIFadd{do}} \DIFadd{statements so that applicative functors can be supported by the same }\texttt{\DIFadd{do}} \DIFadd{syntactic sugar. }\texttt{\DIFadd{Applicative}}\DIFadd{'s offer a key advantage over monads, when applicative functors are composed the results they calculate remain independent of each other. This means that values under applicative functors can be evaluated in parallel. Using the familiar }\texttt{\DIFadd{do}} \DIFadd{syntactic sugar to also support }\texttt{\DIFadd{Applicative}}\DIFadd{'s means that programs can become concurrent ``for free,'' this is the main motivation behind the applicative-do work~\mbox{%DIFAUXCMD
\citep{applicativeDo}}\hspace{0pt}%DIFAUXCMD
.
}

\DIFadd{Figure~\ref{doF} shows a simple function constructed using Haskell's do notation. The standard way that this function would be desugared is shown in Figure~\ref{fDesugar}. Finally Figure~\ref{fApDoDesugar} shows how the same function would be desugared when the }\texttt{\DIFadd{ApplicativeDo}} \DIFadd{language extension is turned on.
}

\begin{figure}[t]
\begin{lstlisting}
f = do
	x1 <- A
	x2 <- B x1
	x3 <- C
	return (x2,x3)
\end{lstlisting}
\caption{\DIFaddFL{A simple monadic function constructed using a }\texttt{\DIFaddFL{do}} \DIFaddFL{statement.}}
\label{doF}
\end{figure}

\begin{figure}[t]
\begin{lstlisting}
f = A >>=
	(\x1 -> B x1 >>=
		(\x2 -> C >>=
			(\x3 -> return (x2,x3))))
\end{lstlisting}
\caption{\DIFaddFL{The desugared version of }\texttt{\DIFaddFL{f}} \DIFaddFL{from Figure~\ref{doF}.}}
\label{fDesugar}
\end{figure}

\begin{figure}[t]
\begin{lstlisting}
f = (\x2 x3 -> (x2, x3))
        <$> (A >>= (\x1 -> B x1))
        <*> C                
\end{lstlisting}
\caption{\DIFaddFL{How }\texttt{\DIFaddFL{f}} \DIFaddFL{will be desugared when applicative do is turned on.}}
\label{fApDoDesugar}
\end{figure}

\DIFadd{The }\textit{\DIFadd{ApplicativeDo}} \DIFadd{algorithm will attempt to insert as many applies into the expression as possible. When the implementation of the }\texttt{\DIFadd{Applicative}} \DIFadd{instance evaluates the two arguments of apply in parallel, better performance can be achieved by adding more applies. 
}

\DIFadd{There could be multiple ways to desugar a particular function. The }\textit{\DIFadd{applicativeDo}} \DIFadd{algorithm first assumes that every expression has an identical time cost, and from this assumption the algorithm heuristically determines the desugaring with the shortest execution time.
}


\section{Program transformations}\label{typeTrans}

\DIFadd{Refactoring is a type of program transformation but it does not constitute the whole field. A major difference between refactoring and other types of program transformations is that a refactoring must take into account the human readability of its output. Program transformations typically focus on just the algorithm whereas refactorings must take into account the broader effects a transformation has on a codebase and the context that programs exist in. Additionally the target program of a refactoring needs to be readable, maintainable, and keep proper layout and user comments. Other types of program transformation don't typically have these concerns. This section will describe some of the program transformation work most relevant to this thesis. First it will describe the type and transform system developed by~\mbox{%DIFAUXCMD
\citep{typeAndTransformSemantics}}\hspace{0pt}%DIFAUXCMD
. Next there will be a discussion of the previous methods of monadification found in the literature. 
}

\subsection{Type and transform systems}

\DIFadd{The type-and-transform system described in~\mbox{%DIFAUXCMD
\citep{typeAndTransformSemantics} }\hspace{0pt}%DIFAUXCMD
is a system for a semantics preserving and type changing program transformations over the typed lambda calculus with let polymorphism. The type-and-transform system is limited to isomorphic types, there must be a way to convert between the two types and back again as described in Figure~\ref{transformIso}.
}

\begin{figure}[t]
\begin{lstlisting}
rep :: A -> R
abs :: R -> A

rep . abs = id
abs . rep = id
\end{lstlisting}
\caption{\DIFaddFL{The properties that must hold for the type-and-transform system to work over types }\texttt{\DIFaddFL{A}} \DIFaddFL{and }\texttt{\DIFaddFL{R}}}
\label{transformIso}
\end{figure} 

\DIFadd{The type-and-transform system supports type-changing rewrites through typed rewrite rules that insert conversions between the source and target types as appropriate. To handle the fact that there are multiple ways to retype a program each rewrite rule is weighted to maximize the use of the target type, introduce the target type as soon as possible in the program, and delay the conversion back to the source type as late as possible.
}

\DIFadd{This work emphasises formalisation and its correctness and the work is done in the context of the lambda calculus rather than a full programming language. There is a Haskell implementation of their system but it is only a prototype though they state that they want to expand this work to work with Haskell however this has not been published yet.
}

\subsection{Automatic Monadification}\label{erwigMonad}

\DIFadd{Monadification is not a new problem and various solutions have been presented in the literature. In~\mbox{%DIFAUXCMD
\citep{lammelReuse} }\hspace{0pt}%DIFAUXCMD
monadification is performed in two steps. First the program is transformed into A-normal form}\footnote{\DIFadd{This is also known as sequencing}}\DIFadd{, which flattens applications into let expressions. The first line of Figure~\ref{anormal} shows a normal expression and line 3 of the same Figure shows that expression in A-normal form.
}

\begin{figure}[t]
\begin{lstlisting}
f (g x) (h y)

let x1 = g x in
	let x2 = h y in
		f x1 x2
\end{lstlisting}
\caption{\DIFaddFL{A-normal form converstion}}
\label{anormal}
\end{figure}

\DIFadd{Once the program has been converted into A-normal form, a let expression of the form: 
}

\DIFadd{$ let x = t1 in t2 $ 
}

\DIFadd{Is transformed into:
}

\DIFadd{$ t1 >>= \lambda x. t2 $
}

\DIFadd{If the right hand side of the lambda is not already a monadic type then }\texttt{\DIFadd{return}} \DIFadd{will be introduced, e.g. $ t1 >>= \lambda x. return~~t2 $. The full transformation is given by inference rules in~\mbox{%DIFAUXCMD
\citep{lammelReuse}}\hspace{0pt}%DIFAUXCMD
.
}

\DIFadd{Monadification is developed further by~\mbox{%DIFAUXCMD
\citep{monadification}}\hspace{0pt}%DIFAUXCMD
. This work provides an algorithm for restricted call-by-value monadification as opposed to the semantics style inference rules defined in~\mbox{%DIFAUXCMD
\citep{lammelReuse}}\hspace{0pt}%DIFAUXCMD
. This work targets the lambda calculus extended with case and let expressions. The algorithm from~\mbox{%DIFAUXCMD
\citep{monadification} }\hspace{0pt}%DIFAUXCMD
is very similar to the one implemented in HaRe. It has the same precondition that every call to a monadified function must be fully saturated, which means that every call site of a target function needs all of its parameters to be named variables, and it produces the same style of monadification as the implementation provided in HaRe. A prototype implementation of this method was produced as a part of~\mbox{%DIFAUXCMD
\citep{monadification}}\hspace{0pt}%DIFAUXCMD
.
}

\DIFadd{The idea of providing refactoring as a monadification was first discussed in~\mbox{%DIFAUXCMD
\cite{monadSurvey}}\hspace{0pt}%DIFAUXCMD
. This source provides a comprehensive discussion of the different styles of monadification. There are several different, equally ``correct,'' ways to monadify a function. A refactoring tool, perhaps more than other types of program transformations, needs to consider what its users intentions and desires are for applying the transformation so that the most useful output can be produced. This is particularly important for refactoring tools because its output needs to be directly usable by developers. This makes the discussion of what monadification style to be very relevant since the monadified functions need to be easily usable. This issue of style is continued in Chapter~\ref{chp:monadification}. 
}

\subsection{Data Type Transformations}\label{dtt}

\DIFadd{Data types are obviously the focus of data-driven refactorings and a considerable amount of work has been focused on them in the program transformation and refactoring literature. 
}

\DIFadd{In~\mbox{%DIFAUXCMD
\cite{dataTypeFramework} }\hspace{0pt}%DIFAUXCMD
the authors present a set of transformation primitives that help modify data types, either through writing scripts or via a GUI in an interactive mode. This framework can be used to define refactorings but its creators implemented operators that can be used for behaviour changing transformations as well, which take the framework's capabilities beyond refactoring. This framework, for example, supports the insertion and deletion of constructor components~\mbox{%DIFAUXCMD
\citep{dataTypeFramework}}\hspace{0pt}%DIFAUXCMD
. 
}

\DIFadd{This framework created a set of transformation operators that work over Haskell but the design of the framework is general enough to be implemented in other langauges that also support algebraic data types.
}

\subsection{Generic Refactorings}

\DIFadd{The previous subsection discussed a transformation system for data types and one of the main contributions of that framework is its language agnostic design~\mbox{%DIFAUXCMD
\citep{datatypeTransformation}}\hspace{0pt}%DIFAUXCMD
. This section describes another language agnostic system for program transformation which was reported in ``Towards Generic Refactoring''~\mbox{%DIFAUXCMD
\citep{genericRefactoring}}\hspace{0pt}%DIFAUXCMD
. This work asks the question what types of refactorings can be applied to many different programming languages and even markup languages such as XML. 
}

\DIFadd{This paper provides an implementation of the refactoring framework in Haskell but also provides an interface that can be instantiated for many different languages. This is possible because, despite syntax differences between different programming languages, certain refactoring actions have the same purpose regardless of the target language though the refactorings will need to be pointed towards the correct syntax elements for the target language. A good example of this is the ``extraction'' refactoring. It's better known as ``extract a function'' or ``extract a method'' depending if the target language is object-oriented or functional. In any language the refactoring will introduce a name for a previously anonymous section of code. The syntax that the refactoring targets and what the extracted piece of code is, is what makes up the differences between languages. In Haskell the refactoring would target expressions and the extracted code is a function, in Java statements would be made into a new method~\mbox{%DIFAUXCMD
\citep{genericRefactoring}}\hspace{0pt}%DIFAUXCMD
. 
}

\DIFadd{The canonical reference for refactorings,~\mbox{%DIFAUXCMD
\cite{fowler}}\hspace{0pt}%DIFAUXCMD
, is written using Java and though the author stresses that the refactoring catalogue is useful to other languages, the descriptions of the refactorings are very specific to object-oriented programming.~\mbox{%DIFAUXCMD
\cite{genericRefactoring} }\hspace{0pt}%DIFAUXCMD
helps identify that certain transformations can be applied regardless of languages. This thesis will expand on this question in Section~\ref{ooRefs} by examining the data driven refactoring chapters from~\mbox{%DIFAUXCMD
\cite{fowler} }\hspace{0pt}%DIFAUXCMD
that do not obvious functional equivalents.
}

\section{Engineering refactoring tools}\label{engineeringRefac}

\DIFadd{Sections~\ref{ideTools} and~\ref{funcTools} briefly discussed some refactoring tools and the features they provide but did not say much about how they are built. This section will look at how refactoring tools are actually built, with a specific focus on how these tools gain access to a representation of the target source code and how they work with that representation. This section will also focus on languages other than Haskell. Refactoring Haskell is, of course, the focus of this thesis, and the general implementation of HaRe will be the focus of Chapter~\ref{chp:hare}.
}

\DIFadd{This section will look in greater detail at implementing refactoring tools for Erlang and Clojure.
}

\subsection{Implementing Wrangler}

\DIFadd{Wrangler was first mentioned in Section~\ref{wranglerOne}, here we describe in a bit more detail how Wrangler is implemented, with a particular focus on the internal representation of the Erlang source code that Wrangler uses and how that representation is transformed. 
}

\DIFadd{Erlang comes with its own ``front end.'' A language front end provides access to some of the same tools that language compiler and runtime systems are built out of. Refactoring tools are mostly interested in a language's lexer, parser, the data type the language is represented as internally (this usually is an abstract syntax tree), and a pretty printer for that data type. Many languages do not store all of the information a refactoring tool requires in its internal representation and Erlang is no different. Wrangler is dependent on the SyntaxTools library which includes more semantic data about the target program~\mbox{%DIFAUXCMD
\citep{wranglerUpdated}}\hspace{0pt}%DIFAUXCMD
. In addition to the information SyntaxTools adds to the AST of an Erlang program the library also makes it possible to add additional information to the tree as well~\mbox{%DIFAUXCMD
\citep{syntaxTools}}\hspace{0pt}%DIFAUXCMD
. Wrangler uses this feature to further annotate the AST with both syntactic and semantic information~\mbox{%DIFAUXCMD
\citep{wranglerUpdated}}\hspace{0pt}%DIFAUXCMD
.  
}

\DIFadd{Once a refactoring tool has parsed the target program into its internal representation the transformations can begin. However the internal representation of a program for even moderately sized programs can be quite sizeable. Traversing and modifying these structures can introduce large amounts of what's called ``boilerplate'' code into a project. In this case, the boilerplate is highly repetitive code that simply walks through a structure, it is difficult to maintain and hides the relatively small amount of ``real'' code that is actually performing the transformations~\mbox{%DIFAUXCMD
\citep{syb}}\hspace{0pt}%DIFAUXCMD
. Generic programming is a technique used to eliminate this type of code. Rather than use a third-party generic programming library like HaRe does (see Section~\ref{genProg}) Wrangler has implemented its own versions of the generic traversals it requires.}\footnote{\DIFadd{See: }\url{https://github.com/RefactoringTools/wrangler/blob/master/src/api_ast_traverse.erl}} \DIFadd{This is feasible to in Erlang because its weak dynamic type system means that traversals can be defined in a single function whereas Haskell needs separate functions defined to handle each type of the abstract syntax tree. 
}

\subsection{Refactoring LISPs}

\DIFadd{The LISP family of languages make interesting target languages for refactorings. Lisps' support for macros and because Lisp code is structured as lists means that the internal representation of a Lisp program can be manipulated with the list-processing functionality that comes built into the language. 
}

\DIFadd{Another unique aspect to building refactoring tools for a Lisp is the support that the Emacs text editor can provide. Implementations of the Emacs text editor normally ship with their own dialect of Lisp referred to as Emacs Lisp~\mbox{%DIFAUXCMD
\citep{emacs}}\hspace{0pt}%DIFAUXCMD
. Refactoring tools are commonly integrated into Emacs, for example both HaRe and Wrangler provide Emacs extensions in Emacs Lisp. In those cases all the Lisp code is used for is to define the the user interface and wrapper around the command line calls to the refactoring tool. If the target language of the refactoring is a Lisp then Emacs Lisp can be used to define refactorings directly. Consider the Clojure refactoring tool ``clj-refactor.el''~\mbox{%DIFAUXCMD
\citep{clojureRefac}}\hspace{0pt}%DIFAUXCMD
. Clojure is a Lisp that can be compiled to either Java bytecode for execution on the JVM, or to Javascript for front end web programming~\mbox{%DIFAUXCMD
\citep{clojure}}\hspace{0pt}%DIFAUXCMD
. The Clojure refactoring tool uses a separate analyser to generate an AST independent of which dialect is being targeted. Once that representation is created Emacs Lisp can actually be used to perform the source code transformation instead of Clojure.    
}

\section{Summary}

\DIFadd{This chapter has discussed the literature that relates to and helped build the work that is described in the rest of this thesis. It began with a discussion of other refactoring tools, for both functional and object-oriented languages. Next in Section~\ref{refacParallel} was a description of work that uses refactoring to introduce parallelism into programs. Refactoring's original goal was to improve code quality and reduce technical debt, but with this work refactoring has been expanded to include introducing parallelism. This thesis again pushes the scope that refactorings can focus on towards data type evolution. Section~\ref{applicativeDo} describes an alternative interpretation for the Haskell }\texttt{\DIFadd{do}} \DIFadd{syntax. The work described in Chapter~\ref{chp:applicative} has the same goal as this work, the introduction of applicative rather than monadic operations. The approach taken in Chapter~\ref{chp:applicative} uses refactoring to explicitely introduce applicative operations rather than having the compiler do this implicitely. The program transformation field also has looked at how data types can change and some of this work was described in Section~\ref{typeTrans}. 
}

\DIFadd{The final section discussed how refactoring tools were implemented in Erlang and Clojure. Both Wrangler and clj-refactor deal with similar issues when it comes to getting an abstract syntax tree of their target programs. Third party libraries help both of these projects gain access to the abstract syntax tree of the target program with enough information in them to perform transformations while preserving formatting. Other refactoring tools perform much of the analysis required to perform transformations themselves such as RefactorErl and haskell-tools.
}

\DIFadd{By considering how tools are built we can begin to understand the general toolchain that must be built to implement a program transformation tool. In general program transformation tools work in three main phases, Figure~\ref{generalToolchain} shows the general toolchain that a tool will need to build. Table~\ref{transformPhases} shows the types of libraries and technologies that are used in each of the phases. 
}

\begin{figure}[t]
	\begin{center}
		\includegraphics[scale=.4]{graphVis/RelatedWork/abstractTooling.png}
	\end{center}
	\caption{\DIFaddFL{A general toolchain for program transformation.}}
	\label{generalToolchain}
\end{figure}

\begin{table}
\centering
\begin{tabular}[t]{|c|c|c|}
\hline
\DIFaddFL{Building Abstract Representation }& \DIFaddFL{Analysis and Transformation }& \DIFaddFL{Outputing Results }\\
\hline
\DIFaddFL{Parser }& \DIFaddFL{Generic Programming Library }& \DIFaddFL{Pretty-printer }\\
\DIFaddFL{Lexer }& \DIFaddFL{Custom Analysis }& \DIFaddFL{Token-Stream }\\
\DIFaddFL{Renamer }& \DIFaddFL{Traversal Schemes }& \DIFaddFL{IDE Functionality }\\
\DIFaddFL{Typechecker }& \DIFaddFL{IDE Functionality }& \\
\DIFaddFL{IDE Functionality }& & \\
\hline
\end{tabular}
\caption{\DIFaddFL{The components used during the different phases of program transformation.}}
\label{transformPhases}
\end{table}

\DIFadd{Before any transformation can happen the source program must be read into a representation that the tool can work with. Many tools use the existing compiler toolchain's lexer and parser to build an abstract syntax tree, this is what HaRe and Wrangler do. Additional compiler components, such as the renamer or typechecker, can be used to decorate the syntax tree with additional information. If a tool is implemented in a specific IDE, the IDE may provide functionality to work with the target language. The notable example of this would be clj-refactor which is built into Emacs, since Emacs is supports its own dialect of Lisp and Lisps place a strong emphasis on metaprogramming clj-refactor uses built in Emacs functions to move through and transform Clojure source files~\mbox{%DIFAUXCMD
\citep{clojureRefac}}\hspace{0pt}%DIFAUXCMD
. Emacs uniquely supports Lisp but other IDEs also provide ways to interact with the abstract structure of the source code files being edited. Intellij IDEA, for example, provides ``}\texttt{\DIFadd{PSIFile}}\DIFadd{'' a generic interface for representing the contents of a file as a hierarchy~\mbox{%DIFAUXCMD
\citep{ideaPsi}}\hspace{0pt}%DIFAUXCMD
. For a source code file the }\texttt{\DIFadd{PSIFile}} \DIFadd{would represent the abstract syntax of that language, but this model also allows for other types of languages, such as XML, to be modeled with the same interface.
}

\DIFadd{The second phase of program transformation is the actual analysis and transformation of the target program's abstract representation. This phase is where the program works with and modifies the structure that was constructed during the first phase. In complex statically typed languages such as Haskell third party generic programming libraries are required to efficiently traverse and transform the abstract syntax of the source language. Other tools may perform additional analysis of their own and add that information to the abstract program structure. RefactorErl does this with the semantic graph  that is built on top of the syntax tree. Wrangler also does this by augmenting the syntax tree with additional information after the standard Erlang parser has finished. Finally, an IDE may contribute its own analysis and transformation functionality, once again Emacs' support for Emacs Lisp gives the editor powerful tools for transforming other Lisps. The powerful object oriented focused IDEs offer basic rewriting capabilities but also powerful plugin environments so tools can depend on analysis and transformation tools built by community members.
}

\DIFadd{The final phase a program transformation tool needs to perform is outputting the modified source. If the output needs to be seen by the user this process is handled by a ``pretty-printer.'' Pretty printers takes some abstract representation of source code, typically an abstract syntax tree, and turns it into the code literal that it represents. Pretty printing can prove quite challenging depending on the abstract structure the tool is using. Parsers often omit some of the elements of a source file, code comments and whitespace are often omitted from abstract syntax because they aren't needed by the compiler. Tools that want to preserve these aspects of a user's program need to either store the formatting and comments in the abstract representation somehow, or reconstruct them directly from the token stream. Finally, an IDE can provide powerful abstractions that can perform the pretty printing automatically. Intellij will automatically display changes made to a section of a PSI file, effectively solving this issue for tool builders that target Intellij.
}

\DIFadd{The implementation of program transformation tooling, in general, follows the three stage pipeline of:
}

\begin{enumerate}
	\item \DIFadd{Constructing an abstract representation of the source program.
	}\item \DIFadd{Analysis and transformation of that representation.
	}\item \DIFadd{Outputting the transformed source code.
}\end{enumerate}

\DIFadd{The design of these three phases are highly dependent on the source language the tool targets. In constructing the first phase of a transformation tool it is common to reuse lexer and parser of the target language's runtime if possible which makes the following phases dependent on the abstract syntax that is used byt the runtime system. What information is contained in the abstract syntax informs the steps that a tool must take in the next phase. If information that is required for the transformation isn't included in the abstract syntax it will need to be computed at this point. Additionally depending on the complexity of the abstract syntax tree a generic programming library may need to be introduced or traversal and transform schemes implemented to work with the abstract syntax. The final phase of a program transformation tool, printing,  is again determined by the information that is available from the abstract representation. The original format of the source file may have to be stored separately if the abstract representation does not contain enough information to preserve the user's comments and formatting.
}

\DIFadd{This generic transformation tool pipeline is how many program transformation tools are constructed and HaRe is no exception. This chapter has described other transformation and refactoring tools. The remainder of this thesis will focus on the Haskell refactoring tool HaRe. The next chapter will begin by describing how HaRe is constructed and how refactorings are implemented in it.
}

\DIFaddend \chapter{Background: Refactoring Haskell in HaRe}
\label{chp:hare}


Work on HaRe \DIFdelbegin \DIFdel{started in early 2003 }\DIFdelend \DIFaddbegin \DIFadd{was initiated }\DIFaddend at the University of Kent. HaRe was \DIFdelbegin \DIFdel{originally created }\DIFdelend \DIFaddbegin \DIFadd{started }\DIFaddend by Huiqing Li, Claus Reinke, and Simon Thompson \DIFaddbegin \DIFadd{in early 2003}\DIFaddend ~\citep{refacWebsite}. The first implementation of HaRe supported the Haskell 98 standard\DIFaddbegin \DIFadd{. This first implementation of HaRe and it's first catalogue of refactorings were some of the main contributions of Huiqing Li's PhD thesis}\DIFaddend ~\citep{huiqingThesis}.  \DIFaddbegin \DIFadd{Chris Brown continued to expand on HaRe with his PhD thesis ``Tool Support for Refactoring Haskell Programs''~\mbox{%DIFAUXCMD
\citep{brownThesis}}\hspace{0pt}%DIFAUXCMD
. In it he expanded the number of refactorings HaRe supports, HaRe's API, and built new code analysis tools, such as duplicate code elimination and program slicing transformations, using the infrastructure of HaRe.
}

	\DIFaddend The Haskell ecosystem has evolved a great deal since then. Haskell 2010 is \DIFdelbegin \DIFdel{the current }\DIFdelend \DIFaddbegin \DIFadd{now the }\DIFaddend formal language standard but the Glasgow Haskell Compiler (GHC) has become \DIFdelbegin \DIFdel{the }\DIFdelend \DIFaddbegin \DIFadd{a }\DIFaddend de facto Haskell standard~\citep{refacTools}. GHC supports the entire 2010 standard but also includes language extensions that \DIFaddbegin \DIFadd{can }\DIFaddend do everything from changing the type system to adding new syntax features~\citep{langExts}. 

	\DIFdelbegin \DIFdel{This chapter begins, in section}\DIFdelend \DIFaddbegin \DIFadd{Chris Brown began the working on updating HaRe so that it could support all of GHC Haskell and not just Haskell 98. Since then development of HaRe has been lead by Alan Zimmerman; much of HaRe's current infrastructure is his work~\mbox{%DIFAUXCMD
\citep{hareCurrent}}\hspace{0pt}%DIFAUXCMD
. He also contributes to several of HaRe's dependencies including GHC-Mod~\mbox{%DIFAUXCMD
\citep{ghcMod}}\hspace{0pt}%DIFAUXCMD
, the current implementation of Strafunski-StrategyLib~\mbox{%DIFAUXCMD
\citep{strafunskiGit}}\hspace{0pt}%DIFAUXCMD
, and he started ghc-exactprint~\mbox{%DIFAUXCMD
\citep{exactprint}}\hspace{0pt}%DIFAUXCMD
.
	}

	\DIFadd{The contributions of this thesis are built on top of all of the work that has gone into making HaRe what it is today. This chapter will describe the implementation of HaRe beginning, in Section}\DIFaddend ~\ref{hareOrig}, with a brief discussion of the original implementation of HaRe\DIFdelbegin \DIFdel{. This section includes a description of programmatica}\DIFdelend \DIFaddbegin \DIFadd{, and its dependencies such as Programatica}\DIFaddend ~\citep{programatica}, \DIFdelbegin \DIFdel{HaRe's first language backend}\DIFdelend \DIFaddbegin \DIFadd{a language frontend for Haskell 98}\DIFaddend , and Strafunski-StrategyLib~\citep{strafunski} \DIFdelbegin \DIFdel{, the }\DIFdelend \DIFaddbegin \DIFadd{a }\DIFaddend generic programming library\DIFdelbegin \DIFdel{that assisted HaRe in transforming the abstract syntax tree of source programs}\DIFdelend . Next the chapter describes the current \DIFdelbegin \DIFdel{libraries that HaRe depends on. }\DIFdelend \DIFaddbegin \DIFadd{dependencies of HaRe beginning with }\DIFaddend Section~\ref{ghcApi}\DIFaddbegin \DIFadd{. This section }\DIFaddend describes the GHC API, \DIFdelbegin \DIFdel{instead of needed a separate project as a language backend GHCprovides an API to access the language implementation. The }\DIFdelend \DIFaddbegin \DIFadd{the language frontend that is included with the GHC. HaRe now uses the GHC API for access to the internal representation of Haskell rather than a third-party library like Programmtica. In addition to Strafunski-StrategyLib HaRe's newer code also uses another }\DIFaddend generic programming library\DIFdelbegin \DIFdel{that HaRe uses has changed as well}\DIFdelend \DIFaddbegin \DIFadd{, }\textit{\DIFadd{Scrap Your Boilerplate}} \DIFadd{(SYB)~\mbox{%DIFAUXCMD
\citep{syb}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . Section~\ref{genProg} describes how HaRe uses \DIFdelbegin \textit{\DIFdel{Scrap Your Boilerplate}}%DIFAUXCMD
\DIFdel{~\mbox{%DIFAUXCMD
\citep{syb} }\hspace{0pt}%DIFAUXCMD
to perform }\DIFdelend \DIFaddbegin \DIFadd{SYB to perform abstract syntax }\DIFaddend tree traversals and transformations. Section~\ref{ghcExactprint} describes\DIFaddbegin \DIFadd{, }\textit{\DIFadd{ghc-exactprint}}\DIFadd{, }\DIFaddend the pretty-printer HaRe uses \DIFdelbegin \textit{\DIFdel{ghc-exactprint}}%DIFAUXCMD
\DIFdelend \DIFaddbegin \DIFadd{to output the modified program}\DIFaddend ~\citep{exactprint}. Finally the chapter \DIFdelbegin \DIFdel{will conclude }\DIFdelend \DIFaddbegin \DIFadd{concludes }\DIFaddend with a description of the current implementation of HaRe in \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{hareCurr}. \DIFaddbegin \DIFadd{For the purposes of this thesis the ``current'' implementation of HaRe is, as of this writing, version 0.8.4.1}\footnote{\url{https://github.com/RefactoringTools/HaRe/tree/4055ef45f0de3c966fd7841986ab0ed2ee814055}}\DIFadd{.
}\DIFaddend 

\section{The original implementation of HaRe}
\label{hareOrig}

Implementing an automated refactoring system has several dependencies\DIFdelbegin \DIFdel{including, }\DIFdelend \DIFaddbegin \DIFadd{, including }\DIFaddend a frontend for the language that is targeted, a generic programming library or some means of making generic traversals of abstract syntax trees, and a pretty printer. The language frontend is required for the refactorer to analyse and modify source code, the generic programming library assists in traversing the complex abstract syntax tree of a \DIFdelbegin \DIFdel{real world }\DIFdelend \DIFaddbegin \DIFadd{real-world }\DIFaddend programming language, and the pretty printer outputs as source the modified AST in a form recognizable by the author of the original program. The original implementation of HaRe fulfilled these dependencies with two libraries, Programatica\DIFdelbegin \DIFdel{and }\DIFdelend \DIFaddbegin \DIFadd{, which provided the language frontend and pretty printer, and }\DIFaddend Strafunski-StrategyLib \DIFaddbegin \DIFadd{a generic programming library}\DIFaddend . 

\subsection{Programatica and Strafunski}\label{prog&Strafunski}

Programatica was a project at the OGI School of Science and Engineering to build tool support for validating Haskell programs~\citep{programaticaTools}. The Programatica team open sourced their frontend so that other tools could also use it~\citep{refacWebsite}. At the time there was no API to access the internals of GHC\DIFaddbegin \DIFadd{, }\DIFaddend so the HaRe team chose to use Programatica over other available front ends because it was the simplest front end that supported the full Haskell 98 standard along with a number of its extensions~\citep{huiqingThesis}. Programatica's Haskell front end is broken up into multiple components including a lexer, a parser, an abstract syntax tree data type, a module system, a type checker, and a pretty printer. Programatica's frontend allows for the implementers of HaRe to focus on refactoring only rather than having to build all of these components as well.

Programatica's abstract syntax contains 20 data types with 110 data constructors in total. Working with the \DIFdelbegin \DIFdel{syntax }\DIFdelend \DIFaddbegin \DIFadd{abstract syntax tree }\DIFaddend directly would introduce a large amount of "boilerplate" code into HaRe that would make maintenance and reusability \DIFdelbegin \DIFdel{of much }\DIFdelend more difficult~\citep{huiqingThesis}. Instead HaRe used Strafunski-StrategyLib, a combinator library for generic programming, to traverse the abstract syntax tree (AST) of the source code~\citep{strafunski}. 

Commonly \DIFdelbegin \DIFdel{transforming }\DIFdelend \DIFaddbegin \DIFadd{refactoring }\DIFaddend a program only modifies small sections of the source program's AST. Renaming a function, for example, will only need to modify the name used in the binding and places where that name is being used; all other sections of the AST remain unmodified. A commonly used operation in Strafunski takes a function that works on a particular data type (or types)\footnote{In the renaming example this could be \DIFdelbegin \DIFdel{code }\DIFdelend \DIFaddbegin \DIFadd{the operation }\DIFaddend that checks if a variable is the one being renamed and replaces it with the new name.} and extends it to work on all types by leaving all other types unmodified. Strafunski also provides "strategies" that define how that extended function will be applied to the syntax tree as a traversal of the tree~\citep{strafunski}\footnote{\DIFdelbegin \DIFdel{e}\DIFdelend \DIFaddbegin \DIFadd{E}\DIFaddend .g. \DIFaddbegin \DIFadd{full }\DIFaddend top-down or \DIFaddbegin \DIFadd{full }\DIFaddend bottom-up\DIFaddbegin \DIFadd{.}\DIFaddend }. \DIFdelbegin \DIFdel{Using Strafunski will be discussed in more detail in section}\DIFdelend \DIFaddbegin \DIFadd{These two mechanisms, extending a function to work over all types and combinators that define how functions will be applied to a tree of values, are common to many generic programming libraries. A more specific example of using generic programming libraries is shown in Section}\DIFaddend ~\ref{genProg}.

These two dependencies allowed the original implementation of HaRe to obtain the abstract syntax tree of source code to be refactored, traverse and transform that AST, and output the modified program. All these tasks are things that the current implementation of HaRe needs to do but the dependencies that it relies on have changed somewhat. The Haskell standard was updated in 2010~\citep{haskell2010} and GHC continues to expand the number of language extensions it supports. Unfortunately Programatica has not kept up with these changes and does not support anything beyond Haskell 98. At the same time GHC's own API has been defined and matured so that it can replace Programatica as HaRe's front end. HaRe now relies on this and a few other projects for its language frontend. For generic traversals "Scrap Your Boilerplate"\DIFaddbegin \DIFadd{~\mbox{%DIFAUXCMD
\citep{syb}}\hspace{0pt}%DIFAUXCMD
, }\DIFaddend has been added as a dependency though Strafunski-StrategyLib is still used in \DIFdelbegin \DIFdel{HaRe~\mbox{%DIFAUXCMD
\citep{syb}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{some of the older parts of HaRe's code base and when certain traversal schemes that Strafunski provides are needed}\DIFaddend .

\subsection{HaRe's original refactorings}\label{origRefactorings}

HaRe's original refactorings fall into three categories, structural, module, and data-oriented refactorings.

\subsubsection{Structural Refactorings}

Structural refactorings principally concern the name and scope of entities defined in a program~\citep{huiqingThesis}. These refactorings target functions, and smaller sections of code.  A traditional example of this is the renaming refactoring. \DIFdelbegin %DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Renaming is one of }\DIFdelend \DIFaddbegin \DIFadd{Renaming is }\DIFaddend the most basic \DIFdelbegin \DIFdel{refactorings}\DIFdelend \DIFaddbegin \DIFadd{refactoring}\DIFaddend . The purpose of renaming is to change the name of a given \DIFdelbegin \DIFdel{identifier}\DIFdelend \DIFaddbegin \DIFadd{entity}\DIFaddend . A renaming refactoring could target a variable, function name, type, or any other \DIFdelbegin \DIFdel{piece of syntax }\DIFdelend \DIFaddbegin \DIFadd{entity }\DIFaddend that a programmer can \DIFdelbegin \DIFdel{name. This }\DIFdelend \DIFaddbegin \DIFadd{assign a name to. This refactoring }\DIFaddend allows for the names \DIFdelbegin \DIFdel{in code to be in sync with the purpose of that code}\DIFdelend \DIFaddbegin \DIFadd{used in a program to truly reflect what that program is actually doing}\DIFaddend . 

Though \DIFdelbegin \DIFdel{the target of these refactorings are }\DIFdelend \DIFaddbegin \DIFadd{these refactorings target }\DIFaddend small pieces of code\DIFaddbegin \DIFadd{, }\DIFaddend the changes made to the code base can affect modules throughout the codebase. The renaming refactoring, for example, can affect any other module that uses the renamed object.

Other examples of structural refactorings include \DIFdelbegin \DIFdel{deleting a definition, duplicating a function, and adding an argument~\mbox{%DIFAUXCMD
\citep{huiqingThesis}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{Deleting a Definition, Duplicating a Function, and Adding an Argument~\mbox{%DIFAUXCMD
\citep[p. 15]{huiqingThesis}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend .

\subsubsection{Module Refactorings}

Module refactorings concern the imports and exports of an individual module, or the relocation of definitions between modules~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\citep{huiqingThesis}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep[p. 20]{huiqingThesis}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . A simple refactoring in this category would be "clean an import list," which analyses a module's import list and removes redundant import declarations. Another example of \DIFdelbegin \DIFdel{a refactoring in this category }\DIFdelend \DIFaddbegin \DIFadd{such a refactoring }\DIFaddend would be the "move a definition" refactoring. As its name suggests, move a definition takes a definition from one module and moves it to another and fixes the imports and exports of any affected modules. For example if we were moving some function \texttt{foo} from module \texttt{A} to module \texttt{B} any external dependencies of \texttt{foo} \DIFdelbegin \DIFdel{need }\DIFdelend \DIFaddbegin \DIFadd{needed }\DIFaddend to be imported into \texttt{B} and if those dependencies are no longer used in \texttt{A} \DIFaddbegin \DIFadd{then }\DIFaddend the relevant import statements should be removed. If \texttt{foo} is still being used in \texttt{A} then \texttt{A} needs to import \texttt{B} (if it does not already do so). Finally any other modules that currently depend on \texttt{foo} need to now import \texttt{B} (if it's not already imported) and remove the import of \texttt{A} (\DIFdelbegin \DIFdel{assuming }\DIFdelend \DIFaddbegin \DIFadd{if }\DIFaddend none of \texttt{A}'s other definitions are used).

\subsubsection{Data Type Based Refactorings}

The third category of refactorings are those that are associated with data type definitions~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\citep{huiqingThesis}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep[p. 21]{huiqingThesis}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . "Add field names" is a good example of a data-oriented refactoring.  The add field names refactoring will add field names to a data type. These names can then be used as selector functions that make extraction of a particular part of a type a simple function call. The new field names are generated by HaRe but can be renamed by the user~\citep{huiqingThesis}.

These original refactorings were chosen to be basic yet still useful, and for their ability to give insight into the issues surrounding implementing an automated refactoring tool~\citep{huiqingThesis}. In addition to the refactorings that were implemented by HaRe's developers\DIFaddbegin \DIFadd{, }\DIFaddend an API was exposed so that other developers could implement their own refactorings \DIFdelbegin \DIFdel{which will be }\DIFdelend \DIFaddbegin \DIFadd{this is }\DIFaddend discussed in more detail in \DIFdelbegin \DIFdel{the next section~\mbox{%DIFAUXCMD
\citep{hareApi}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{Section~\ref{hareApi}}\DIFaddend .


\subsection{The HaRe API}\label{hareApi}

Early in the development cycle of HaRe\DIFaddbegin \DIFadd{, }\DIFaddend it was restructured to expose an API for implementing refactorings and general Haskell program transformations~\citep{hareApi}. The HaRe API contains a collection of functions for program analysis and transformation of Haskell 98 programs. These functions, along with the functionality provided by Strafunski and Programatica, form the basis for implementing basic refactorings~\citep{hareApi}.

The HaRe API exposes the full Programatica abstract syntax for Haskell 98 to the user but because of generic programming with Strafunski only the to be transformed parts of the AST have to be explicitly referenced in a refactoring~\citep{hareApi}. Another key feature of the API was to hide layout and comment preservation allowing the programmer to focus on program transformation instead. Each subtree in the AST is tagged with its absolute location in the source file. Any modifications to the AST will change the location of all elements that occur after the change. The API abstracts over this cascade of changes that follows even the simplest of \DIFdelbegin \DIFdel{changes}\DIFdelend \DIFaddbegin \DIFadd{modifications}\DIFaddend . The HaRe API transformation functions modify the token stream and the AST simultaneously which keeps refactoring definitions free of this location bookkeeping ~\citep{hareApi}. \DIFaddbegin \DIFadd{The token stream of a program is produced by the lexer, and is a list of all of that program's tokens in the order that they appear. The complete format of a program is only stored in the token stream but the full structure of a program is only clear when working with the abstract syntax tree. This means that refactoring's want to reason about the AST of source program but, to preserve formatting, must make sure that all modifications made to the program are also reflected in the token stream.
}\DIFaddend 

The overall goal of the HaRe API is to help ensure the correctness of new refactorings by limiting the amount of code required that is not related to program transformation, and isolating common error sources \DIFaddbegin \DIFadd{caused by having to keep the AST and token stream synced}\DIFaddend ~\citep{hareApi}. This design goal still guides the development of HaRe, and many of the functions that were provided in the original HaRe API have been \DIFdelbegin \DIFdel{updated }\DIFdelend \DIFaddbegin \DIFadd{ported }\DIFaddend to HaRe's latest implementation.  

\DIFdelbegin %DIFDELCMD < \section{Underlying technologies}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend As was mentioned in Section~\ref{prog&Strafunski}, HaRe's dependencies have changed somewhat in its current implementation. HaRe originally used Programatica as its front end, now the language front end is composed of several projects\DIFdelbegin \DIFdel{now}\DIFdelend , the GHC API~\citep{ghcApi}, ghc-mod~\citep{ghcMod}, and ghc-exactprint~\citep{exactprint}. The following sections will describe these dependencies as well as Scrap Your Boilerplate~\citep{syb}, another generic programming library, and how HaRe currently uses them.

\section{The GHC API}
\label{ghcApi}

Rather than being a monolithic executable, the Glasgow Haskell Compiler (GHC) is composed of several smaller components that each correspond to a separate compiler stage. GHC's executable consists of a lightweight main function that ties together the smaller components~\citep{ghcDesign}. These components are exposed to users  and this is what constitutes the GHC API.

\subsection{Compiler stages of GHC}\label{ghcStages}

\begin{figure}[t]
	\DIFdelbeginFL %DIFDELCMD < \label{compilerStages}
%DIFDELCMD < 	%%%
\DIFdelendFL \begin{center}
		\includegraphics[scale=.4]{graphVis/Chapter2/compilerStgs.png}
	\end{center}
	\caption{GHC Compiler stages.}
	\DIFaddbeginFL \label{compilerStages}
\DIFaddendFL \end{figure}

Some of the major components and the order in which they are used in GHC are shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{compilerStages} which has been adapted from~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\citep{ghcDesign}}\hspace{0pt}%DIFAUXCMD
. This figure }\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\cite{ghcDesign}}\hspace{0pt}%DIFAUXCMD
. This Figure }\DIFaddend is not a complete \DIFdelbegin \DIFdel{list of all the components GHCuses}\DIFdelend \DIFaddbegin \DIFadd{description of all of GHC's components}\DIFaddend , just the parts \DIFdelbegin \DIFdel{that HaReinteracts with}\DIFdelend \DIFaddbegin \DIFadd{relevant to HaRe}\DIFaddend . The full diagram can be found in~\citep{ghcDesign}. The label after each compiler stage indicates the type of AST that is produced by that stage.

The top level \DIFdelbegin \DIFdel{datatype }\DIFdelend \DIFaddbegin \DIFadd{data type }\DIFaddend for all of the GHC abstract syntax is \texttt{HsSyn} ~\citep{ghcDesign}. \texttt{HsSyn} is parameterised by some identifier type; each compiler stage produces a different type of identifier with the additional information that stage produces. For example the typechecker takes in an AST parameterised by \texttt{Name} and returns an AST parameterised by \texttt{Id} which is a \texttt{Name} with additional type information.

\subsection{GHC's Name Types}\label{ghcNames}

There are five name types that GHC uses, they are:

\begin{itemize}
	\item \texttt{OccName} is the simplest type of name. It is just a wrapper around a \texttt{FastString}\DIFaddbegin \DIFadd{, which is an array of bytes, }\DIFaddend and an optional \texttt{NameSpace}. An \texttt{OccName} is contained in each of the other four identifier types. \DIFaddbegin \DIFadd{A }\texttt{\DIFadd{NameSpace}} \DIFadd{is an simple descriptor of what type of name the }\texttt{\DIFadd{OccName}} \DIFadd{comes from such as value, type constructor, or data constructor.
	}\DIFaddend \item \texttt{RdrName} names are produced by GHC's parser. \texttt{RdrName}s are essentially just an \texttt{OccName} with optional module information if the source name has been qualified. 
	\item \texttt{Name}s are produced by the renamer\DIFaddbegin \DIFadd{, the component that resolves every name's scoping and binding}\DIFaddend . A \texttt{Name} contains an \texttt{OccName} and a \texttt{Unique} that differentiates \texttt{Name}s that have the same \texttt{OccName}. \DIFaddbegin \DIFadd{A }\texttt{\DIFadd{Unique}} \DIFadd{is just an integer but it is generated by GHC in such a way that it is guaranteed to be unique. }\DIFaddend They have also had their scoping and binding resolved\DIFdelbegin \DIFdel{. Each name }\DIFdelend \DIFaddbegin \DIFadd{, so each }\texttt{\DIFadd{Name}} \DIFaddend contains the source span where \DIFdelbegin \DIFdel{the }\DIFdelend \DIFaddbegin \DIFadd{that }\DIFaddend name was defined and a data type that describes what \DIFdelbegin \DIFdel{type }\DIFdelend \DIFaddbegin \DIFadd{sort }\DIFaddend of \texttt{Name} it is (e.g. a name defined internally \DIFdelbegin \DIFdel{to }\DIFdelend \DIFaddbegin \DIFadd{in }\DIFaddend the module or a name from another module).
	\item \texttt{Id} \& \texttt{Var} are the identifiers produced by the typechecker.\footnote{\texttt{Id} is just a synonym for \texttt{Var}.} These identifiers contain a \texttt{Name}, and a \texttt{Unique}. \DIFdelbegin \DIFdel{They also contain type information but }\DIFdelend \DIFaddbegin \DIFadd{Finally these names also contain }\DIFaddend the type of \DIFdelbegin \texttt{\DIFdel{Var}}%DIFAUXCMD
\DIFdel{s may contain type variables that have a kind rather than a type whereas the type of an }\texttt{\DIFdel{Id}} %DIFAUXCMD
\DIFdel{will be a monotype}\DIFdelend \DIFaddbegin \DIFadd{the variable they represent}\DIFaddend . 
\end{itemize}

These identifier types change throughout the compilation process but these names only parameterise the syntax tree, the shape of the tree itself stays the same throughout compilation. 

\subsection{GHC's syntax tree}
GHC's abstract syntax is currently made up of over 90 data types. Many of those types have multiple constructors\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{; }\DIFaddend the expression data type \texttt{HsExpr} for example has over 40 constructors. This section will briefly introduce the structure of GHC's abstract syntax tree.

\subsubsection{Common syntax types}

The most common type in any given piece of GHC abstract syntax would be a \texttt{Located} as seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{located}.

\begin{figure}[t]
\begin{lstlisting}

type Located e = GenLocated SrcSpan e

\end{lstlisting}
\caption{The \texttt{located} type.}
\label{located}
\end{figure}

\texttt{Located} is used to tag syntax elements with their \DIFdelbegin \DIFdel{position }\DIFdelend \DIFaddbegin \DIFadd{start and end positions }\DIFaddend in a source file. The \texttt{SrcSpan} contains the filename \DIFdelbegin \DIFdel{that }\DIFdelend \DIFaddbegin \DIFadd{from which }\DIFaddend the span comes \DIFdelbegin \DIFdel{from }\DIFdelend and then the start and end columns and start and end lines that the span covers.\footnote{GHC has small optimisation where if a span exists entirely on a single line it only stores the single line number and the start and end column, instead of storing the same line number twice.} 

There are located versions of many of the AST types. For example, \texttt{HsExpr} is the type that represents expressions, and there is a related type \texttt{LHsExpr} that represents a located expression whose definition is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{lhsexpr}.

\begin{figure}[t]
\begin{lstlisting}
type LHsExpr id = Located (HsExpr id)
\end{lstlisting}
\caption{The located expression}
\label{lhsexpr}
\end{figure}

\DIFaddbegin \begin{figure}[t]
\begin{lstlisting}
case (x, y) of
	(Just i, Just j) -> Just (i+j)	
	(Just _, Nothing) -> x
	(Nothing, Just _) -> y
	(Nothing, Nothing) -> Nothing
\end{lstlisting}
\caption{\DIFaddFL{A case statement}}
\label{caseStmt}
\end{figure}

\DIFaddend The \texttt{HsExpr} type represents much of the Haskell language such as function application, lambdas, if and case statements. Pattern \DIFdelbegin \DIFdel{matching is handled }\DIFdelend \DIFaddbegin \DIFadd{matches are represented }\DIFaddend by a \texttt{MatchGroup} type. Each \texttt{MatchGroup} contains a list of \texttt{Match}es and some typing information. Consider the case statement in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{caseStmt}. \DIFdelbegin %DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < case (x, y) of
%DIFDELCMD < 	(Just i, Just j) -> Just (i+j)	
%DIFDELCMD < 	(Just _, Nothing) -> x
%DIFDELCMD < 	(Nothing, Just _) -> y
%DIFDELCMD < 	(Nothing, Nothing) -> Nothing
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{A case statement}}
%DIFAUXCMD
%DIFDELCMD < \label{caseStmt}
%DIFDELCMD < \end{figure}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend This would be parsed into the \texttt{HsCase} constructor of \texttt{HsExpr}. This constructor is seen in \DIFdelbegin \DIFdel{figure~\ref{hscase}, the }\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{hscase}. The }\DIFaddend first constructor argument represents the expression that the patterns are matched against \DIFdelbegin \DIFdel{, }\DIFdelend (the tuple \texttt{(x,y)} in this case). The abstract syntax tree of this expression is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{caseAST}. The second argument to \texttt{HsCase} represents all \DIFdelbegin \DIFdel{four }\DIFdelend of the matches in the case statement. \texttt{MatchGroup}s are used to represent any code that associates some patterns with a right hand side expression. Function bindings, lambda expression, and case statements all use \texttt{MatchGroup}s. 

\begin{figure}[t]
\begin{lstlisting}
HsCase (LHsExpr id) (MatchGroup id (LHsExpr id))	
\end{lstlisting}
\caption{The \texttt{HsCase} constructor}
\label{hscase}
\end{figure}

\begin{figure}[t]
\begin{lstlisting}
(L {case.hs:(3,11)-(7,31)} 
                 (HsCase 
                  (L {case.hs:3:16-21} 
                   (ExplicitTuple 
                    [
                     (L {case.hs:3:17} 
                      (Present 
                       (L {case.hs:3:17} 
                        (HsVar 
                         (Unqual {OccName: x}))))),
                     (L {case.hs:3:20} 
                      (Present 
                       (L {case.hs:3:20} 
                        (HsVar 
                         (Unqual {OccName: y})))))]))
\end{lstlisting}
\caption{The fragment of parsed abstract syntax representing the expression being pattern matched in \DIFdelbeginFL \DIFdelFL{figure}\DIFdelendFL \DIFaddbeginFL \DIFaddFL{Figure}\DIFaddendFL ~\ref{caseStmt}.}
\label{caseAST}
\end{figure}

In the case expression from \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{caseStmt} each of the four pattern matches is a \texttt{Match} in the expressions \texttt{MatchGroup}. Each \texttt{Match} associates a left hand side pattern with \DIFdelbegin \DIFdel{the appropriate }\DIFdelend \DIFaddbegin \DIFadd{a }\DIFaddend right hand side \DIFdelbegin \DIFdel{expression}\DIFdelend \DIFaddbegin \DIFadd{clause}\DIFaddend . 

\subsubsection{The syntax tree}
The previous section gave a brief overview of GHC's identifiers and its representation of expressions. This section describes the broader picture, the representation of whole Haskell programs.  

According to \DIFdelbegin \DIFdel{the }\DIFdelend GHC a Haskell program is simply a list of modules.\footnote{This is a very simple view and additional tools are needed to properly represent "projects" which is the context that most Haskell programs exist inside of. This is discussed further in \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{hareInners}.} Inside of GHC each module is represented by an \texttt{HsModule}. This top level structure keeps track of everything that \DIFdelbegin \DIFdel{module imports, }\DIFdelend \DIFaddbegin \DIFadd{the module imports and }\DIFaddend exports, and a list of all the declarations that the module defines. Declarations (of type \texttt{HsDecl}) are what represent everything that can be defined in Haskell. 

The \texttt{HsDecl} type is \DIFdelbegin \DIFdel{only }\DIFdelend used as a wrapper around other types. Bindings (of functions and/or values), instance, and type class declaration are differentiated by \texttt{HsDecl}'s constructors. \DIFaddbegin \DIFadd{A few of the more common }\texttt{\DIFadd{HsDecl}} \DIFadd{constructors are shown in Figure~\ref{hsdecl}. 
}\DIFaddend 

\DIFdelbegin %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < [caption={A subset of \texttt{HsDecl constructors}}]
%DIFDELCMD < data HsDecl id =
%DIFDELCMD < 	   TyClD (TyClDecl id)
%DIFDELCMD < 	| InstD (InstDecl id)
%DIFDELCMD < 	| ValD  (HsBind id)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \begin{figure}
\begin{lstlisting}
data HsDecl id =
	   TyClD (TyClDecl id)
	| InstD (InstDecl id)
	| ValD  (HsBind id)
\end{lstlisting}
\caption{\DIFaddFL{A subset of }\texttt{\DIFaddFL{HsDecl constructors}}}
\label{hsdecl}
\end{figure}
\DIFaddend 

\texttt{HsDecl} \DIFdelbegin \DIFdel{only }\DIFdelend provides a high level categorisation of what type it is. There is very little information stored at this level, the inner type is the "payload" of the declaration (e.g. \texttt{HsBind} or \texttt{TyClDecl}). These payload types are what store the type and \DIFdelbegin \DIFdel{expression level }\DIFdelend \DIFaddbegin \DIFadd{expression-level }\DIFaddend abstract syntax of Haskell that was described in the previous section.

Some notable payload types are \texttt{TyClDecl}, \texttt{InstDecl}, \texttt{HsBind}, and \texttt{Sig}. \texttt{TyClDecl} represents a family, type synonym, data, or type class declaration. The \texttt{InstDecl} is used to represent the instance declarations for type classes, and data or type families. The real workhorse of the language is \texttt{HsBind}. \texttt{HsBind} represents functions and pattern bindings (e.g. \texttt{(x,y) = (1,2)}). The \texttt{HsBind} type also has constructors that are introduced by the type checker such as dictionary binding. Finally \texttt{Sig} is the data type that represents type signatures. 

\DIFaddbegin \DIFadd{GHC's abstract syntax, for the most part, resembles the syntax described in the Haskell 2010 report. Types in GHC's syntax may have additional constructors to support GHC's language extensions but the broader AST structure stays the same for most programs. The syntax for language extensions can be quite extensive and can make the documentation of the GHC API difficult to understand.
}

\DIFaddend \section{Generic programming}
\label{genProg}

The need for a generic programming library, as previously discussed in \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{prog&Strafunski}, remains the same when using the GHC API's AST as opposed to Programatica's. Currently HaRe still uses Strafunski-StrategyLib as well as another library, Scrap Your Boilerplate. Scrap Your Boilerplate (SYB) is a generic programming library developed by Ralf L{\"a}mmel and Simon Peyton-Jones~\citep{syb}. 

SYB is included with GHC and has become the Haskell community's standard generic traversal library, however HaRe still uses Strafunski because it provides ``stop'' traversals. These traversals (\DIFdelbegin \DIFdel{stop top down type preserving}\DIFdelend \DIFaddbegin \DIFadd{stop-top-down, type preserving, }\DIFaddend and type unifying) descend the tree until the strategy ``succeeds'' (that is\DIFdelbegin \DIFdel{the extended functioncan be applied}\DIFdelend \DIFaddbegin \DIFadd{, the type of the transforming function's parameter is found}\DIFaddend ) and then stops. Essentially it cuts a line across the abstract syntax tree where everything below the line is unvisited. This traversal is useful when refactoring because of how the ASTs types are nested within each other. For example, \DIFdelbegin \DIFdel{a refactoring commonly wants to modify }\DIFdelend \DIFaddbegin \DIFadd{when a refactoring modifies }\DIFaddend the body of a function \DIFdelbegin \DIFdel{and }\DIFdelend the stop traversals ensure that when the \DIFdelbegin \DIFdel{extended function is applied}\DIFdelend \DIFaddbegin \DIFadd{transformation is applied, }\DIFaddend it is applied with the expression representing the whole binding rather than sub-expressions.

\subsection{Generic Traversals}
The Stratego/XT library was one of first systems for programming tree transformations in a systematic way~\citep{stratego}. Stratego developed the idea of a transformation strategy. A strategy is the combination of a term rewriting function and a traversal function that describes how that rewriting function should be applied to a tree of terms. Stratego provides combinators that help construct term rewriting functions and tree traversal functions \citep{stratego}.

Stratego is an untyped transformation system and so was \DIFdelbegin \DIFdel{unsuitable for working with the }\DIFdelend \DIFaddbegin \DIFadd{difficult to transform }\DIFaddend statically typed Haskell \DIFaddbegin \DIFadd{in a way that respects its types}\DIFaddend . Strafunski was ``largely inspired'' by Stratego, but for a statically typed context~\citep{strafunski}.

\subsection{Scrap Your Boilerplate}\label{syb}

	Scrap Your Boilerplate (SYB) is a generic traversal library for Haskell that was inspired by Strafunski~\citep{syb}. SYB comes with GHC and has become the most used generic programming library in the Haskell community.\footnote{According to \url{http://packdeps.haskellers.com/reverse}} 

	Suppose there was a simple expression language that contained integers, integer addition, assignment, and variables. This language is represented by the type defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{expression} and a function that works over this type to rename a variable "x" to "a" is defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{renamexvar}.

	\begin{figure}[t]
	\begin{lstlisting}
type Name = String

data Expr =
     Value Int
   | Var Name
   | Add Expr Expr
   | Assign Name Expr
      deriving(Data,Typeable)
	\end{lstlisting}
	\caption{A simple expression type.}
	\label{expression}
	\end{figure}

	\begin{figure}[t]
	\begin{lstlisting}
renameXVar :: Expr -> Expr
renameXVar (Var "x") = Var "a"
renameXVar (Assign c e) = 
	| c == "x" = Assign "a" (renameXVar e)
	| otherwise = Assign c (renameXVar e)
renameXVar (Add e1 e2) = Add (renameXVar e1) (renameXVar e2)
renameXVar v = v
	\end{lstlisting}
	\caption{A function to rename the "x" variable.}
	\label{renamexvar}
	\end{figure}

	There are four cases to the \texttt{renameXVar} function. The first case matches when \texttt{renameXVar} is called with a \texttt{(Var "x")} value. This is one of the cases that the function will replace the "x" name with an "a" name. The second case matches an assignment and is the other case when actual ``work'' happens. In this case the function needs to check if the variable being assigned is an "x" or not, if it is then the name being assigned is replaced with \DIFdelbegin \DIFdel{"a" }\DIFdelend \DIFaddbegin \texttt{\DIFadd{"a"}} \DIFaddend otherwise the name remains the same. Then there is a recursive call replace the name in the right hand side of the assignment. The other two cases will not directly modify an expression. In the \texttt{Add} case there is \DIFdelbegin \DIFdel{just }\DIFdelend a recursive call to perform replacements in its sub-expressions. The final case is \DIFdelbegin \DIFdel{just }\DIFdelend a catch-all term that returns its parameter. This will be called when \texttt{renameXVar} is called with a \texttt{Value} or a \texttt{Var} that \DIFdelbegin \DIFdel{isn't }\DIFdelend \DIFaddbegin \DIFadd{is not }\DIFaddend \texttt{"x"}.

	This is fairly straightforward and doesn't take much time to write. However, if subtraction was added to the definition of expression \texttt{renameXVar} would need to be updated as well to include a recursive call very similar to the addition case. These duplicated recursive calls are  \DIFdelbegin \DIFdel{what is known as "boilerplate " }\DIFdelend \DIFaddbegin \DIFadd{just boilerplate }\DIFaddend code~\citep{syb}. \DIFdelbegin \DIFdel{Boilerplate code is highly repetitive, verbose, and difficult to debug and maintain.  }\DIFdelend In this small example having a few of these types of cases is not an issue. However, as the expression \DIFaddbegin \DIFadd{type }\DIFaddend begins to approach the size of an actual programming language writing traversals like \texttt{renameXVar} would become much more time-consuming and a nightmare to maintain. 

	The reduction of boilerplate code like this is the point of SYB. SYB allows us to rewrite \texttt{renameXVar} as shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{renameSYB}.

	\begin{figure}[t]
	\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < import Data.Generics	
%DIFDELCMD < 	

%DIFDELCMD < rename :: Name -> Name
%DIFDELCMD < rename "x" = "a"
%DIFDELCMD < rename n = n
%DIFDELCMD < 

%DIFDELCMD < renameXVar :: Expr -> Expr
%DIFDELCMD < renameXVar = everywhere (mkT rename)
%DIFDELCMD < 	%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < 	%%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
import Data.Generics	

rename :: Name -> Name
rename "x" = "a"
rename n = n

renameXVar :: Expr -> Expr
renameXVar = everywhere (mkT rename)
\end{lstlisting}
\DIFaddendFL \caption{\texttt{renameXVar} written using SYB}
\label{renameSYB}
\end{figure} 

This example nicely illustrates the four key components of an SYB traversal~\citep{syb}.

	\begin{itemize}
		\item The function that performs the "interesting" part of the traversal
		\item A type extension for that function
		\item A generic traversal combinator
		\item The data type to be traversed must be an instance of the \texttt{Typeable} and \texttt{Data} classes (as explained below)
	\end{itemize}

	From the \DIFdelbegin \DIFdel{example mentioned previously }\DIFdelend \DIFaddbegin \DIFadd{earlier example }\DIFaddend the "interesting" part of this traversal is the rename function, because this function contains the code that actually changes the name "x" to the name "a." The \texttt{mkT} function extends the type of the \texttt{rename} function to \texttt{Typeable~a = > a~ -> a}. 

	Type extension allows for the \texttt{rename} function to work over any members of the \texttt{Typeable} class rather than just \texttt{Name}s. The extended version of \texttt{rename} will work as expected when provided with an argument of type \texttt{Name}, and if an argument of any other type is provided the traversal will continue \DIFdelbegin \DIFdel{onto the arguments component parts }\DIFdelend \DIFaddbegin \DIFadd{to descend the tree by moving onto the argument's children, }\DIFaddend if it has any\DIFaddbegin \DIFadd{, }\DIFaddend and returns the argument unchanged \DIFdelbegin \DIFdel{otherwise}\DIFdelend \DIFaddbegin \DIFadd{if it doesn't}\DIFaddend .

	The \texttt{everywhere} function is this traversal's generic combinator\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{. }\DIFaddend \texttt{everywhere} \DIFdelbegin \DIFdel{applies }\DIFdelend \DIFaddbegin \DIFadd{will apply }\DIFaddend a generic function to every node in \DIFdelbegin \DIFdel{the }\DIFdelend \DIFaddbegin \DIFadd{a }\DIFaddend tree in a bottom-up manner\DIFdelbegin \DIFdel{. Finally as you can see in the declaration of }\texttt{\DIFdel{Expr}} %DIFAUXCMD
\DIFdel{derives both the }\texttt{\DIFdel{Typeable}} %DIFAUXCMD
\DIFdel{and }\DIFdelend \DIFaddbegin \DIFadd{, as long as that tree is of type }\DIFaddend \texttt{Data} \DIFdelbegin \DIFdel{classes so it can be traversed by }\DIFdelend \DIFaddbegin \DIFadd{and }\DIFaddend \texttt{\DIFdelbegin \DIFdel{everywhere}\DIFdelend \DIFaddbegin \DIFadd{Typeable}\DIFaddend }. A member of the \texttt{Typeable} class has defined a generic representation of itself and members of the \texttt{Data} class implement generic folding operations.	\DIFdelbegin \DIFdel{Put together these two classes are what allow a data typeto be generically traversed }\DIFdelend \DIFaddbegin \DIFadd{As you can see that the }\texttt{\DIFadd{Expr}} \DIFadd{data type, from Figure~\ref{expression}, derives both the }\texttt{\DIFadd{Typeable}} \DIFadd{and }\texttt{\DIFadd{Data}} \DIFadd{classes so it can be traversed by }\texttt{\DIFadd{everywhere}}\DIFaddend .  

\subsubsection{Types of Generic Algorithms}

SYB defines three types of generic algorithms\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{: }\DIFaddend transformations, queries, and monadic transformations. The \texttt{rename} example from the previous section is an example of a transformation. Transformations preserve the type of the structure that is traversed. Queries, on the other hand, are "type unifying" algorithms. Queries are good for summarizing information contained in a data structure. \DIFdelbegin \DIFdel{You would use a query }\DIFdelend \DIFaddbegin \DIFadd{A query would be used}\DIFaddend , for example, to traverse an expression and collect all of its bound variables. Using the same expression type (\DIFdelbegin \DIFdel{listing}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{expression}) from the previous section\DIFaddbegin \DIFadd{, }\DIFaddend the following function extracts all bound variables from a given expression.

\DIFdelbegin %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < [caption={A generic function that collects all bound variables from an expression.}]
%DIFDELCMD < bVars :: Expr -> [Name]
%DIFDELCMD < bVars e = everything (++) ([] `mkQ` f) e
%DIFDELCMD < 	where f (Assign nm _) = [nm]
%DIFDELCMD <           f _             = []
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \begin{figure}[t]
\begin{lstlisting}
bVars :: Expr -> [Name]
bVars e = everything (++) ([] `mkQ` f) e
	where f (Assign nm _) = [nm]
          f _             = []
\end{lstlisting}
\caption{\DIFaddFL{A generic function that collects all bound variables from an expression.}}
\end{figure}
\DIFaddend 

\texttt{everything}, as seen in the \texttt{bVars} function from the previous listing, is the generic query combinator that \DIFdelbegin \DIFdel{summarises }\DIFdelend \DIFaddbegin \DIFadd{queries }\DIFaddend all nodes, top down from left to right~\citep{sybDocs}. The first argument to \texttt{everything} is the function it uses to combine separate results from the query. In this case\DIFdelbegin \DIFdel{all }\DIFdelend \DIFaddbegin \DIFadd{, }\DIFaddend the lists of names will be appended together. The \DIFdelbegin \DIFdel{"interesting" }\DIFdelend \DIFaddbegin \DIFadd{``interesting'' }\DIFaddend function \texttt{f} that actually returns a \DIFaddbegin \DIFadd{singleton list of type }\DIFaddend \texttt{Name} when \DIFdelbegin \DIFdel{one is being bound is extended via }\DIFdelend \DIFaddbegin \DIFadd{applied to a variable binding, and an empty list otherwise. }\texttt{\DIFadd{f}} \DIFadd{has been extended with }\DIFaddend the \texttt{mkQ} function\DIFdelbegin \DIFdel{. }\DIFdelend \DIFaddbegin \DIFadd{; }\DIFaddend \texttt{mkQ} will apply \texttt{f} when possible\DIFaddbegin \DIFadd{, }\DIFaddend otherwise it will just return \DIFaddbegin \DIFadd{a default value, in this case }\DIFaddend the empty list.

It is also useful to \DIFdelbegin \DIFdel{do transformations from within some monadic context}\DIFdelend \DIFaddbegin \DIFadd{perform monadic transformations}\DIFaddend . A simple example of this would be to rewrite the finding bound variables example from previously but instead the list of found results is stored as a piece of state.

\DIFdelbegin %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < [caption={Finding bound variables using the state monad}]
%DIFDELCMD < type TransformState = State [Name]
%DIFDELCMD < 

%DIFDELCMD < bVars :: Expr -> [Name]
%DIFDELCMD < bVars e = execState findVars []
%DIFDELCMD <   where findVars = everywhereM (mkM f) e
%DIFDELCMD <         f :: Expr -> TransformState Expr
%DIFDELCMD <         f e@(Assign n _ ) = do
%DIFDELCMD <           modify (\lst -> n:lst)
%DIFDELCMD <           return e
%DIFDELCMD <         f e = return e
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \begin{figure}[t]
\begin{lstlisting}
type TransformState = State [Name]

bVars :: Expr -> [Name]
bVars e = execState findVars []
  where findVars = everywhereM (mkM f) e
        f :: Expr -> TransformState Expr
        f e@(Assign n _ ) = do
          modify (\lst -> n:lst)
          return e
        f e = return e
\end{lstlisting}
\caption{\DIFaddFL{Finding bound variables using the state monad}}
\end{figure}
\DIFaddend 

\DIFdelbegin \DIFdel{A big }\DIFdelend \DIFaddbegin \DIFadd{An }\DIFaddend advantage of monadic traversals is that both querying and transformations can happen in a single pass. The example in \DIFdelbegin \DIFdel{listing}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{renameVars} renames every \texttt{Name} in an expression by \DIFdelbegin \DIFdel{add }\DIFdelend \DIFaddbegin \DIFadd{adding }\DIFaddend \texttt{"\_old"} as a suffix and stores the original names in a list. 

\DIFdelbegin %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < [caption={Changing every found name and storing the old names in a list.},label=renameVars]
%DIFDELCMD < renameVars :: Expr -> TransformState Expr
%DIFDELCMD < renameVars e = everywhereM (mkM f) e
%DIFDELCMD <   where f :: Expr -> TransformState Expr
%DIFDELCMD <           f (Assign n e) = do
%DIFDELCMD <             modify (\lst -> n:lst)
%DIFDELCMD <             return (Assign (n++"_old") e)
%DIFDELCMD <           f (Var n) = return (Var (n++"_old"))
%DIFDELCMD <           f e = return e
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD <  %%%
\DIFdelend \DIFaddbegin \begin{figure}[t]
\begin{lstlisting}
renameVars :: Expr -> TransformState Expr
renameVars e = everywhereM (mkM f) e
  where f :: Expr -> TransformState Expr
          f (Assign n e) = do
            modify (\lst -> n:lst)
            return (Assign (n++"_old") e)
          f (Var n) = return (Var (n++"_old"))
          f e = return e
\end{lstlisting}
\caption{\DIFaddFL{Changing every found name and storing the old names in a list.}}
\label{renameVars}
\end{figure}
 \DIFaddend 

This type of generic traversal is very common in HaRe because traversals often need to make use of a refactoring's stored state or run something from the GHC API which needs the features provided by an instance of \texttt{GhcMonad}\DIFaddbegin \DIFadd{, }\DIFaddend all while modifying the abstract syntax. The GHC API's operations all work within the \texttt{GhcMonad} that provides the features GHC needs to compile a single Haskell source file such as IO, logging warning, exception handling, and keeping track of the compilation session~\citep{ghcApi}.   

\section{ghc-exactprint}
\label{ghcExactprint}

After \DIFdelbegin \DIFdel{using generic programming to transform }\DIFdelend \DIFaddbegin \DIFadd{transforming }\DIFaddend the abstract syntax \DIFdelbegin \DIFdel{, HaRe }\DIFdelend \DIFaddbegin \DIFadd{tree with generic programming, that abstract syntax }\DIFaddend needs to be \DIFdelbegin \DIFdel{able to print the modified code}\DIFdelend \DIFaddbegin \DIFadd{printed}\DIFaddend . A challenging part of building a refactoring tool is that a user will not want non-refactored parts of their code to change at all. \DIFdelbegin \DIFdel{HaRe needs to preserve the user's comments and spacing from the source file}\DIFdelend \DIFaddbegin \DIFadd{A source program's layout and comments need to be preserved after refactoring}\DIFaddend . 

Prior to GHC version 7.10.1\DIFaddbegin \DIFadd{, }\DIFaddend the location of certain keywords and punctuation (such as \texttt{do} and \texttt{let}) and user comments were lost after parsing. This made parsing and then printing an exact copy of a GHC Haskell source file \DIFdelbegin \DIFdel{using }\DIFdelend \DIFaddbegin \DIFadd{impossible from }\DIFaddend the AST alone\DIFdelbegin \DIFdel{impossible}\DIFdelend . GHC's 7.10.1 release added annotations for the "lost" syntax elements that had previously not been represented in the parsed abstract syntax~\citep{apiAnns}. 

The \DIFdelbegin \DIFdel{parser produces }\DIFdelend \DIFaddbegin \DIFadd{parsed source now includes }\DIFaddend a map that associates \DIFdelbegin \DIFdel{the keyword and }\DIFdelend \DIFaddbegin \DIFadd{each ``lost'' keyword with }\DIFaddend the source span that the keyword can be found in\DIFdelbegin \DIFdel{with that keywords }\DIFdelend \DIFaddbegin \DIFadd{, and that keyword's }\DIFaddend exact location. This approach was taken to avoid littering the existing AST with functionally meaningless keyword data~\citep{apiAnns}. 

Even with the position of every syntax element being recorded, printing a module after the AST has been modified is not easy. Many AST elements are "located" with a source span that indicates that element's exact position in the file. This means that any change to the AST will require updating the location of all the syntax elements that occur later in the line at least for single line changes and, in the case of changes that modify entire lines, every element after that change will need its location to be updated.

Ghc-exactprint simplifies this immensely by allowing us to position elements relative to their neighbours rather than absolutely~\citep{exactprint}. After parsing a source file\DIFaddbegin \DIFadd{, }\DIFaddend HaRe takes the annotations GHC returns with the parsed abstract syntax and relativises them using ghc-exactprint. For each syntax element ghc-exactprint creates a new data type called an "\texttt{Annotation}" which contains an offset that indicates where this element should be positioned compared to the previous element.
\DIFaddbegin 

\begin{figure}[t]
\begin{lstlisting} 
f a = (a+ 1 )
\end{lstlisting}
\caption{\DIFaddFL{A definition with strange spacing.}}
\label{strangeSpacing}
\end{figure}


\DIFaddend Take for example the \DIFdelbegin \DIFdel{following definition .
}%DIFDELCMD < 

%DIFDELCMD < \begin{lstlisting}%DIFDELCMD <  
%DIFDELCMD < f a = (a+ 1 )
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \DIFadd{definition in Figure~\ref{strangeSpacing}. }\DIFaddend The absolute position of the plus sign is row one column nine (GHC's locations are one-based) but using ghc-exactprint we instead can think of the position of the plus sign using the offset \texttt{(0,0)} because there is no space between it and the previous element (the variable \texttt{a}). Using this system the number literal following the plus sign has an offset of (0,1) because of the single column of space before it. 

The \texttt{Annotation}s are stored in a map that is keyed \DIFdelbegin \DIFdel{based }\DIFdelend on the parsed location of a syntax element and the string representation of the AST constructor. In the previous example the right hand side of the definition is located at the source span (1,7)-(1,13) which stands for ``row one columns\DIFaddbegin \DIFadd{, }\DIFaddend seven through thirteen'' and GHC represents this expression with the \texttt{HsPar} constructor for the \texttt{HsExpr} type. The source span and the constructor together can be combined to retrieve the annotation data associated with this fragment of the AST. This syntax tree has two elements associated with it, the opening and closing parenthesis. Each of these keywords is given its own offset, which in this case is (0,0) for the opening parenthesis\footnote{The offset in this case is (0,0) and not (0,1) as you might expect because the space between the equals sign and the opening parenthesis is represented in the offset for the entire right hand side expression.} and (0,1) for the closing parentheses. We could \DIFdelbegin \DIFdel{obviously }\DIFdelend infer from the use of the \texttt{HsPar} constructor that this tree is wrapped in parentheses however the user's specific spacing would be lost without the annotations.

Comments are another element of a source file that prior to GHC 7.10.1 were "lost" after parsing. Using ghc-exactprint comments are stored in the annotations associated with the next piece of syntax. In the \DIFdelbegin \DIFdel{following code snippet }\DIFdelend \DIFaddbegin \DIFadd{code snippet in Figure~\ref{commentSnippet} }\DIFaddend the comment on line three is added to the annotations associated with the declaration of the function \texttt{f} along with a delta position that indicates the comment is a single line before this declaration starts.

\DIFaddbegin \begin{figure}[t]
\DIFaddendFL \begin{lstlisting}
type Name = String

--a comment
f i = i + 1
\end{lstlisting}
\DIFaddbeginFL \caption{\DIFaddFL{The comment is ``attached'' to the AST of the function }\texttt{\DIFaddFL{f}}\DIFaddFL{.}}
\label{commentSnippet}
\end{figure}
\DIFaddend 


\section{The current implementation of HaRe}
\label{hareCurr}

\DIFdelbegin %DIFDELCMD < \begin{figure}
%DIFDELCMD < 	%%%
\DIFdelendFL \DIFaddbeginFL \begin{figure}[t]
	\DIFaddendFL \begin{center}
		\includegraphics[scale=.4]{graphVis/Chapter2/hareDesign.png}
	\end{center}
	\caption{A diagram of the components of HaRe and their dependencies.}
	\label{design}
\end{figure}

We have just discussed the major components \DIFdelbegin \DIFdel{that HaRe dependson}\DIFdelend \DIFaddbegin \DIFadd{on which HaRe depends}\DIFaddend . The ghc-api gives \DIFdelbegin \DIFdel{us }\DIFdelend access to the internal representation of Haskell \DIFaddbegin \DIFadd{syntax}\DIFaddend , the generic programming libraries Scrap Your Boilerplate and Strafunski-StrategyLib \DIFdelbegin \DIFdel{allows HaRe to more easily work }\DIFdelend \DIFaddbegin \DIFadd{allow HaRe to work more easily }\DIFaddend with that internal representation, and ghc-exactprint is how HaRe preserves the source files spacing when writing the output file. The relationship between the dependencies and HaRe's components is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{design}. From this foundation \DIFdelbegin \DIFdel{we can make HaRe focused }\DIFdelend \DIFaddbegin \DIFadd{the focus of HaRe can be }\DIFaddend on refactoring rather than solving these more generic problems.
\DIFdelbegin \DIFdel{This section will discuss how HaRe is implemented, what its APIprovides, and some general conventions that its refactorings use}\DIFdelend \DIFaddbegin 

\DIFadd{Very little, if any, of the code from the original implementation of HaRe is used today. Development of HaRe has been coordinated through GitHub}\footnote{\url{https://github.com/RefactoringTools/HaRe}} \DIFadd{since 2011. When I began working on HaRe in 2013, Alan Zimmerman had upgraded much of the internal structure of HaRe to use the GHC API}\DIFaddend .

\DIFaddbegin \DIFadd{The internal structure of HaRe is mostly the work of Alan, however a description of the current implementation has not appeared in the literature yet and so it has been described in some detail in this section. My contributions to HaRe are in the development of the refactorings described throughout this thesis and in the creation of a high level API for program analysis, synthesis, and transformation which is described in Section~\ref{hareAPI}.
}

\DIFaddend \subsection{The Internal Structure of HaRe}
\label{hareInners}

The GHC API exposes \DIFdelbegin \DIFdel{all }\DIFdelend the functionality needed to compile a set of Haskell source files. In reality Haskell programs consist of more than just single source files without dependencies. Projects are organised using build tools such as Cabal~\citep{cabal} or Stack~\citep{stack} that handle \DIFdelbegin \DIFdel{these issues }\DIFdelend \DIFaddbegin \DIFadd{acquiring dependencies with the correct versions, running tests, compilation, and packaging binary code into an executable }\DIFaddend for the programmer. HaRe needs to be aware of the context that these tools provide because refactorings may change multiple modules or modify modules that import modules from external dependencies. In HaRe's case\DIFaddbegin \DIFadd{, }\DIFaddend ghc-mod provides a monadic context that handles these build environments and compiler setup~\citep{ghcMod}. Within ghc-mod's context\DIFaddbegin \DIFadd{, }\DIFaddend HaRe keeps track of the state of the refactoring session.

\DIFdelbegin %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < [caption={HaRe's Monad \texttt{RefactGhc}},captionpos=b, label=refactghc] 
%DIFDELCMD < newtype RefactGhc a = RefactGhc
%DIFDELCMD <     { unRefactGhc :: GM.GhcModT (StateT RefactState IO) a}
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \begin{figure}[t]
\begin{lstlisting}
newtype RefactGhc a = RefactGhc
    { unRefactGhc :: GM.GhcModT (StateT RefactState IO) a}
\end{lstlisting}
\caption{\DIFaddFL{HaRe's Monad }\texttt{\DIFaddFL{RefactGhc}}}
\label{refactghc}
\end{figure}
\DIFaddend 

\DIFdelbegin \DIFdel{Listing}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{refactghc} shows the definition of HaRe's monad that each refactoring runs in. \texttt{RefactState} is an algebraic data type that keeps track of all the settings \DIFaddbegin \DIFadd{for the refactoring session, the abstract syntax trees of the current target file}\DIFaddend , \DIFdelbegin \DIFdel{abstract syntax, }\DIFdelend and filepath for refactoring a single file.
\DIFdelbegin \DIFdel{It is possible to refactor client modules by setting the filepath in the }\texttt{\DIFdel{RefactState}} %DIFAUXCMD
\DIFdel{to target another module. HaRe will then parse and typecheck that module's information into the state for transformation.
}\DIFdelend 

Even though \DIFdelbegin \DIFdel{there are three types of the AST, }\DIFdelend the \DIFaddbegin \DIFadd{GHC API produces a separate version of AST for each compilation stage (parsing, renaming, and type checking), the }\DIFaddend annotations are only included in the parsed AST. Once a refactoring has finished\DIFaddbegin \DIFadd{, }\DIFaddend all of its changes need to be reflected in the parsed abstract syntax tree and its annotations because this is the structure that ghc-exactprint works over. HaRe's state still contains all three of the syntax trees because the renamed and typed \DIFdelbegin \DIFdel{source }\DIFdelend \DIFaddbegin \DIFadd{sources }\DIFaddend are useful for the additional type and scope information they contain about \DIFdelbegin \DIFdel{the }\DIFdelend \DIFaddbegin \DIFadd{a }\DIFaddend source file. 

\subsection{HaRe's API}

Modifying and reasoning about GHC's abstract syntax and maintaining the associated annotations is still a complex task\DIFaddbegin \DIFadd{, }\DIFaddend even with help from HaRe's dependencies. HaRe defines its own API to help fill this gap between its dependencies and its refactorings. In addition to the obvious functions that are required for running a refactoring within the \texttt{RefactGhc} monad\DIFaddbegin \DIFadd{, }\DIFaddend the API also includes helper functions that make working with the state easier. 

HaRe also defines a large collection of program analysis and transformation functions. For example, pulling the binding of a top level variable from a module's \DIFdelbegin \DIFdel{entire }\DIFdelend abstract syntax tree is a task that many refactorings have to do\DIFaddbegin \DIFadd{, }\DIFaddend so this functionality is part of HaRe's API. There are also small program transformations that are not in and of themselves refactorings but common \DIFdelbegin \DIFdel{low level }\DIFdelend \DIFaddbegin \DIFadd{low-level }\DIFaddend modifications that are useful to several refactorings, such as adding a new import declaration or making a function infix by wrapping it in back quotes (the \texttt{`} character).

Additionally there are several transformations that \DIFdelbegin \DIFdel{don't }\DIFdelend \DIFaddbegin \DIFadd{do not }\DIFaddend affect the abstract syntax\DIFdelbegin \DIFdel{as much as they }\DIFdelend \DIFaddbegin \DIFadd{, but which }\DIFaddend change the annotations that format ghc-exactprint's output. Adding new lines before a syntax element doesn't change the meaning of a program but is important for a refactoring's output to be well formatted and easy to read. 

HaRe's dependencies help abstract away the low-level\footnote{Low level from the perspective of a refactoring at least.} details of a language back-end, build tools, and pretty printing. HaRe's API tries to close the gap still left between the dependencies and the refactorings themselves. In the next section we will take a look at how HaRe's refactorings are implemented.


\subsection{Implementing Refactorings in HaRe}

HaRe \DIFdelbegin \DIFdel{actually requires very few things from }\DIFdelend \DIFaddbegin \DIFadd{does not enforce any particular structure onto }\DIFaddend a refactoring implementation. \DIFdelbegin \DIFdel{As was mentioned previously in section~\ref{hareInners} everything }\DIFdelend \DIFaddbegin \DIFadd{A refactoring implementation in HaRe needs to do a few things:
}

\begin{enumerate}
\item \DIFadd{A refactoring }\DIFaddend must run inside \DIFdelbegin \DIFdel{of }\DIFdelend the \texttt{RefactGhc} monad whose state is where the abstract syntax of a target module is stored.
\DIFaddbegin \item \DIFaddend A refactoring is also expected to return a list of  "\texttt{ApplyRefacResult}s" which contains an updated parsed AST and annotations\DIFdelbegin \DIFdel{along }\DIFdelend \DIFaddbegin \DIFadd{, together }\DIFaddend with the filepath the AST originated from; this updated AST is what HaRe writes out as the result of the refactoring\DIFdelbegin \footnote{\DIFdel{HaRe actually outputs to a temporary file. When refactoring "file.hs" HaRe produces "file.refactored.hs." This allows programmers to check the result of a refactoring before overwriting the existing module.}}  
%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdelend \DIFaddbegin \DIFadd{.
}\end{enumerate}
\DIFaddend 

Beyond those two features\DIFaddbegin \DIFadd{, }\DIFaddend refactorings are free to be implemented however the programmer chooses. However, certain conventions have been adopted within many of HaRe's refactorings. When describing a refactoring one would imagine that checking \textit{pre}conditions would be the first thing the implementation of that refactoring computes. A more efficient implementation checks preconditions throughout computation alongside the AST transformation. For example, the renaming refactoring doesn't check for name clashes in client modules until after the source module has been modified. If a name clash is found in a client module then an error is thrown at that point and the whole refactoring is rolled back. Though this slows down the time to failure somewhat\DIFaddbegin \DIFadd{, }\DIFaddend it saves HaRe from traversing every client module twice, once for preconditions and a second time for rewriting.

Merging precondition checking with transformation saves the refactoring from traversing parts of the AST multiple times\DIFdelbegin \DIFdel{, for }\DIFdelend \DIFaddbegin \DIFadd{. For }\DIFaddend example, the renaming refactoring checks for name conflicts while it descends the AST\DIFaddbegin \DIFadd{, }\DIFaddend replacing the old name with the new one. Obviously this strategy only works for certain preconditions\DIFaddbegin \DIFadd{: }\DIFaddend the only precondition for deleting a definition is that the target definition \DIFdelbegin \DIFdel{isn't }\DIFdelend \DIFaddbegin \DIFadd{is not }\DIFaddend used. The transformation only affects the syntax tree of the definition to be deleted so the implementation of the refactoring has to do a separate scan of the rest of the target module and any of its client modules to determine if the target definition is used or not.

\DIFdelbegin %DIFDELCMD < \subsection{The Development Process of HaRe}
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \subsection{Using GHC's Abstract Syntax Trees}
\DIFadd{Each stage of the compiler takes the previous stage's output and creates its own version of the tree decorated with some additional information. This provides a challenge when writing refactorings because all of the changes made to the source needs to be reflected in the parsed syntax tree. In general the parsed syntax contains enough information to perform a refactoring. Since the parsed AST is also where changes must be made for ghc-exactprint to output the modifications, most of the refactorings only look at the parsed AST. If information is required from one of the other compilation stages it is either pre-collected at the start of the refactoring, or the }\texttt{\DIFadd{SrcSpan}}\DIFadd{s that decorate all of the syntax trees can be used to navigate to the equivalent points in different syntax trees.
}\DIFaddend 

\DIFdelbegin \DIFdel{Very little, if any, of the code from the original implementation of HaRe is used today.
Development of HaRe has been coordinated through GitHub}\footnote{%DIFDELCMD < \url{https://github.com/RefactoringTools/HaRe}%%%
} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{since 2011. When I began working on HaRe in 2013 Alan Zimmerman had upgraded much of the internal structure of HaRe to use the GHC API. }%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The internal structure of HaRe is mostly the work of Alan however a description of the current implementation has not appeared in the literature yet and so it has been described in some detail here. My contributions to HaRe are in the development of the refactorings described in this thesis and in the creation of a high level api for program analysis, synthesis, and transformation}\DIFdelend \DIFaddbegin \DIFadd{In the same way that a refactoring can be sped up through careful consideration of how much re-traversing of the abstract syntax tree is absolutely necessary, it is also important to be mindful of when it is absolutely necessary to traverse through the other stages' trees. For the most part HaRe's refactorings can avoid having to look up information in either the renamed or typechecked trees. The renaming refactoring, for example, uses a }\texttt{\DIFadd{NameMap}} \DIFadd{which associates every }\texttt{\DIFadd{RdrName}} \DIFadd{in the parsed source with its }\texttt{\DIFadd{Name}} \DIFadd{from the renamed source. This allows the refactoring to ensure that the instance of a variable it is going to rename is actually the targeted variable and not just a variable with the same name in a different scope. This map is calculated at the start of the refactoring, so the renamed syntax tree only needs to be traversed once rather than multiple times to check if each instance of a name that matches the target variable's name actually refers to the target variable or if the same name is used again in a different scope.
}

\DIFadd{Collecting any required information from the other syntax trees up front, is a good way to limit the amount that these trees are traversed, but often traversing these trees can be entirely avoided. For example, the ``generalising }\texttt{\DIFadd{Maybe}}\DIFadd{'' refactoring described in Chapter~\ref{chp:ddRefs} works on replacing the }\texttt{\DIFadd{Maybe}} \DIFadd{type with a more generic type if possible. }\texttt{\DIFadd{Maybe a}} \DIFadd{is a type that represents a computation that may return a value of type }\texttt{\DIFadd{a}} \DIFadd{or fail in some way, this case is represented by a }\texttt{\DIFadd{Nothing}} \DIFadd{constructor. One tricky part about Haskell is that imports can be ``qualified,'' that is certain modules can force the values from another module be referred to with a prefix, then variables with the same name as an imported value can be used without name clashes. This means that it is not safe to just assume that any constructor named }\texttt{\DIFadd{Just}} \DIFadd{or }\texttt{\DIFadd{Nothing}} \DIFadd{is actually of type }\texttt{\DIFadd{Maybe}}\DIFadd{.    The generalising }\texttt{\DIFadd{Maybe}} \DIFadd{refactoring, does not check the type of all the }\texttt{\DIFadd{Just}}\DIFadd{s and }\texttt{\DIFadd{Nothing}}\DIFadd{s encountered in the target program, instead the refactoring begins by checking the imports of the target module to see if the }\texttt{\DIFadd{Data.Maybe}} \DIFadd{module has been given a qualification and takes into account that the target module will refer to }\texttt{\DIFadd{Maybe}}\DIFadd{'s constructors with a qualified name. After that check, since }\texttt{\DIFadd{Maybe}} \DIFadd{is included in the prelude it is safe to assume that any }\texttt{\DIFadd{Just}}\DIFadd{s and }\texttt{\DIFadd{Nothing}}\DIFadd{s in the target program are of type }\texttt{\DIFadd{Maybe}}\DIFaddend .

\DIFaddbegin \DIFadd{One refactoring that does need to reference the typed syntax tree fairly heavily is the List to Hughes List refactoring described in Section~\ref{listToDlist}. This refactoring needs to change the type of entire subtrees, so it references the typed syntax tree to see what the type of the current node is during the transformation. This is fairly straightforward due to the source being tagged with }\texttt{\DIFadd{SrcSpan}}\DIFadd{s that are consistent throughout the compilation process. This means that a piece of typed abstract syntax is the same as a particular parsed syntax subtree if their }\texttt{\DIFadd{SrcSpan}}\DIFadd{s are the same. Searching through the later compilation stage's trees is as simple as doing a full traversal of the tree searching for the appropriate }\texttt{\DIFadd{SrcSpan}}\DIFadd{.
}

\section{Summary}

\DIFadd{The work done for this thesis was fortunate enough to be built upon the contributions of others who made HaRe and its dependencies. The current implementation of HaRe}\footnote{\DIFadd{Which for the purpose of this thesis is release v0.8.4.1 on GitHub: }\url{https://github.com/RefactoringTools/HaRe/tree/4055ef45f0de3c966fd7841986ab0ed2ee814055}} \DIFadd{and some of its notable dependencies was the focus of this chapter. 
}

\DIFadd{The next chapter will begin to describe the major contribution of this thesis: the design and implementation of data-driven refactorings. The next chapter will begin the discussion of what is a data-driven refactoring and some simple refactoring designs.
}

\DIFaddend \chapter{Data-driven refactorings}
\label{chp:ddRefs}

Some refactorings focus on rewriting the structure of a program (e.g. refactoring an \texttt{if} to \texttt{case} statement)\DIFaddbegin \DIFadd{, but }\DIFaddend a data-driven refactoring focuses on the \DIFdelbegin \DIFdel{types }\DIFdelend \DIFaddbegin \DIFadd{data }\DIFaddend that a program works over and \textit{how} that program manipulates \DIFdelbegin \DIFdel{those types}\DIFdelend \DIFaddbegin \DIFadd{that data}\DIFaddend . Any changes that a data-driven refactoring makes to the structure of the program are \textit{driven} by the data types of that program rather than being the main motivation of performing the refactoring. 

\DIFdelbegin \DIFdel{This chapter will begins in section~\ref{ooRefs} }\DIFdelend \DIFaddbegin \DIFadd{Consider the introducing a type synonym refactoring (that is described in  more detail in Section~\ref{introSyn}), this refactoring adds a type synonym, i.e. a user chosen name, for some type. The motivation behind this is to ensure that the names of types correspond to the types that they represent. For example, if some strings are used to represent customer names in a program, introducing a synonym for strings called }\texttt{\DIFadd{Name}} \DIFadd{helps clarify what kind of data certain strings are supposed to represent. In this way the refactoring's changes to the source program are motivated by the data in that program and the desire for the program's types to better communicate the nature of that data.
}

\DIFadd{This chapter begins }\DIFaddend with a discussion of data-driven refactorings for imperative \DIFaddbegin \DIFadd{and }\DIFaddend object-oriented languages \DIFaddbegin \DIFadd{in Section~\ref{ooRefs}}\DIFaddend . Imperative programs are written as a sequence of steps that modify an object to \DIFdelbegin \DIFdel{the }\DIFdelend \DIFaddbegin \DIFadd{a }\DIFaddend desired output state. Types in an object-oriented \DIFdelbegin \DIFdel{also }\DIFdelend \DIFaddbegin \DIFadd{program }\DIFaddend exist in a strict hierarchy\DIFdelbegin \DIFdel{. }\DIFdelend \DIFaddbegin \DIFadd{, every type is a sub-type of a top type commonly known as ``}\texttt{\DIFadd{Object}}\DIFadd{.'' }\DIFaddend These two factors mean that object-oriented data-driven refactorings modify the hierarchy\DIFaddbegin \DIFadd{, by moving methods between related classes or extracting new child or parent classes, }\DIFaddend while leaving the structure of the code mostly \DIFdelbegin \DIFdel{intact}\DIFdelend \DIFaddbegin \DIFadd{unchanged}\DIFaddend . The rest of this chapter will describe data-driven refactorings for functional programs.

In comparison to \DIFdelbegin \DIFdel{the imperative way of constructing }\DIFdelend \DIFaddbegin \DIFadd{imperative }\DIFaddend programs, functional programs \DIFdelbegin \DIFdel{tend to }\DIFdelend describe the relationship between the input and output data. Functional programming languages also tend to offer a much richer type system than object-oriented languages where data types are typically either \DIFdelbegin \DIFdel{objects or a set of }\DIFdelend \DIFaddbegin \DIFadd{classes or a }\DIFaddend non-user expandable \DIFaddbegin \DIFadd{set of }\DIFaddend primitive types. These two facts mean that the structure of a functional program can be determined by the types it works over to a much greater degree than in imperative languages.

Section~\ref{introSyn} describes the \DIFdelbegin \DIFdel{introduce type synonym}\DIFdelend \DIFaddbegin \DIFadd{``introducing a type synonym'' }\DIFaddend refactoring. This is the renaming of data-driven refactorings\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{: }\DIFaddend neither the structure nor the type of the target program changes\DIFaddbegin \DIFadd{, }\DIFaddend just how the program refers to the types it works over. Next, \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{genMaybe} describes a generalisation refactoring. This thesis outlines two generalisations, one (described in \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{genMaybe}) makes code of a specific type (\texttt{Maybe}) work over a type class (\texttt{MonadPlus} or \texttt{Monad}) instead. The other generalises code \DIFdelbegin \DIFdel{of one }\DIFdelend \DIFaddbegin \DIFadd{using the functions defined in a particular }\DIFaddend type class to \DIFdelbegin \DIFdel{use a class higher the hierarchy}\DIFdelend \DIFaddbegin \DIFadd{instead use the functions of it's superclass }\DIFaddend (covered in \DIFdelbegin \DIFdel{chapter}\DIFdelend \DIFaddbegin \DIFadd{Chapter}\DIFaddend ~\ref{chp:applicative}). Finally \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{listToDlist} describes a refactoring that tranforms programs that work over lists to instead use an alternate implementation of lists. The refactoring method described in this chapter could be applied to refactor between \DIFaddbegin \DIFadd{any }\DIFaddend types that can be \DIFdelbegin \DIFdel{projected }\DIFdelend \DIFaddbegin \DIFadd{``projected'' }\DIFaddend onto another type. \DIFaddbegin \DIFadd{A type ``}\texttt{\DIFadd{A}}\DIFadd{'' can be projected onto some type ``}\texttt{\DIFadd{B}}\DIFadd{'' when all of the information stored in an instance of }\texttt{\DIFadd{A}} \DIFadd{can be transformed to be of type }\texttt{\DIFadd{B}} \DIFadd{instead. 
}\DIFaddend 

\section{Object-Oriented Data Refactorings}\label{ooRefs}

The origins of refactoring are deeply rooted in the object-oriented world~(\cite{programRestructuring}\DIFdelbegin \DIFdel{,}\DIFdelend \DIFaddbegin \DIFadd{;}\DIFaddend ~\cite{refactOOFrameworks})\DIFdelbegin \DIFdel{. Though it traces its origins to }\DIFdelend \DIFaddbegin \DIFadd{, though its origins can be found in }\DIFaddend work on transforming Algol~\citep{recursiveTransformation}. The canonical catalogue of refactorings remains Martin Fowler's \emph{Refactoring: Improving the Design of Existing Code} ~\citep{fowler}. Fowler's catalogue of refactorings are all written in Java though he purposefully avoided using any features that were unique to Java, so that the refactorings could be \DIFdelbegin \DIFdel{applicable }\DIFdelend \DIFaddbegin \DIFadd{applied }\DIFaddend in many different \DIFaddbegin \DIFadd{object-oriented }\DIFaddend programming languages.

As a functional programmer \DIFdelbegin \DIFdel{when }\DIFdelend going through this catalogue \DIFdelbegin \DIFdel{of refactorings }\DIFdelend there seem to be three types of refactorings.

\begin{itemize}
	\item Refactorings that are directly applicable in a functional language
	\item Refactorings that could be adapted for use in a functional language
	\item Refactorings not applicable to a functional language
\end{itemize}

That first type of refactorings \DIFdelbegin \DIFdel{' are }\DIFdelend \DIFaddbegin \DIFadd{are typically }\DIFaddend structural refactorings and their usefulness to a functional program is easily understood. Refactorings like renaming or adding a parameter don't depend on object-oriented features existing in the target language.

The third type of refactorings are so dependent on features associated with object-oriented languages that they are impossible to implement or have no equivalent in a functional language. The "remove setting method" refactoring depends on the common OO pattern of each field of a class having "getter" and "setter" methods that retrieve or modify that field respectively. This pattern, on the other hand, is not as \DIFdelbegin \DIFdel{ubiquitous }\DIFdelend \DIFaddbegin \DIFadd{common }\DIFaddend in functional languages because most do not support objects\DIFaddbegin \DIFadd{, }\DIFaddend and immutability makes a "setter" function irrelevant. OCaml would be a notable exception to this rule: OCaml supports objects and allows the programmer to mark variables as mutable, and so getter and setter methods are possible.

\begin{figure}[t]
\begin{lstlisting}[language=caml, morekeywords={object,method}]
let mInt init_i = object
    val mutable i = init_i

    method get_i = i
    method set_i new_i =
      i <- new_i
  end
\end{lstlisting}
\caption{An OCaml object with getter and setter methods.}
\label{ocamlObj}
\end{figure}

The OCaml object from \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{ocamlObj} would be a valid target for this refactoring. However these methods are not as common as they are in imperative object-oriented languages with mutable data as the default, so a refactoring to remove a setter method is of limited \DIFdelbegin \DIFdel{value }\DIFdelend \DIFaddbegin \DIFadd{applicability }\DIFaddend even for OCaml.

The second type of refactoring found in~\citep{fowler} is much more interesting to a functional programmer. The specifics of these refactorings are not directly applicable to functional programs but the underlying motivations are relevant to any programming paradigm. 

For example\DIFdelbegin \DIFdel{here is }\DIFdelend \DIFaddbegin \DIFadd{, }\DIFaddend the motivation for the "Replace Data Value with Object" refactoring in~\citep[pg. 175]{fowler} \DIFaddbegin \DIFadd{reads as}\DIFaddend :

\begin{displayquote}
Often in early stages of development you make decisions about representing simple facts as simple data items. As development proceeds you realize that those simple items aren't so simple anymore. A telephone number may be represented as a string for a while, but later you realize that the telephone needs special behavior for formatting, extracting the area code, and the like. For one or two items you may put methods in the owning object, but quickly the code smells of duplication and feature envy\footnote{Feature envy is when the methods of \DIFdelbegin \DIFdel{a }\DIFdelend \DIFaddbegin \DIFadd{one }\DIFaddend class are more \DIFdelbegin \DIFdel{interesting in a different }\DIFdelend \DIFaddbegin \DIFadd{concerned with the features of another }\DIFaddend object than the one it is in. \DIFaddbegin \DIFadd{In this phone number as strings example feature envy would be seen when objects are doing the area code and formatting calculations for the object that contains the phone number.}\DIFaddend }. When the smell begins, turn the data value into an object.
\end{displayquote}

This refactoring extracts a field that was some primitive type into an object. The example from~\citep{fowler} works over an order class with a string that represents the customer that placed an order.

\begin{figure}[t]
\begin{lstlisting}[language = java,tabsize=4]
class Order {
	public Order (String customer) {
		_customer = customer;	
	}

	public String getCustomer() {
		return _customer;
	}

	public void setCustomer(String arg){
		_customer = arg;	
	}

	private String _customer;
}
\end{lstlisting}
\caption{The Order class}
\end{figure}

The refactoring creates a new customer class that has a string field with a getter method as seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{custCls}. The customer class doesn't add any additional features but the extra layer of abstraction \DIFaddbegin \DIFadd{that has been added }\DIFaddend sets up the code base for further development. The customer object could have fields added that represent contact info or further demographic information without polluting the order class with order-irrelevant data.

\begin{figure}[t]
\begin{lstlisting}[language = java,tabsize=4]
class Order {
	public Order (String customer) {
		_customer = new Customer(customer);	
	}

	public String getCustomer() {
		return _customer.getName();
	}

	public void setCustomer(String arg){
		_customer = new Customer(arg);	
	}

	private Customer _customer;
}

class Customer {
	public Customer(String name){
		_name = name;
	}

	public String getName() {
		return _name;
	}

	private final String _name;
}
\end{lstlisting}
\caption{The result of the Replace Data Value with Object refactoring when applied to the customer field of the order class.}
\label{custCls}
\end{figure}

Functional programmers have to make similar data representation decisions \DIFdelbegin \DIFdel{as }\DIFdelend \DIFaddbegin \DIFadd{to those }\DIFaddend the object-oriented programmer. At the start of a project representing a customer \DIFdelbegin \DIFdel{just }\DIFdelend by their name could be reasonable. As the project develops this can become a serious limitation and a more robust abstraction \DIFdelbegin \DIFdel{is }\DIFdelend \DIFaddbegin \DIFadd{could be }\DIFaddend required. 

This section introduced how object-oriented refactorings help build up the data model over the lifetime of a project. \DIFdelbegin \DIFdel{In an object-oriented language the primary abstraction method is to introduce additional objects. }\DIFdelend These sorts of refactorings for a language with a rich type system like Haskell offer many more choices for evolving the data model of a system. \DIFdelbegin \DIFdel{An equivalent, but simpler refactoring, in the Haskell case would replace the string type synonym with the abstract data type in figure~\ref{custData}}\DIFdelend \DIFaddbegin \DIFadd{Though a Haskell programmer can take inspiration or directly reimplement several of the canonical Fowler refactorings there may be additional refactorings that are completely functional with no OO counterparts, like monadification (Chapter~\ref{chp:monadification}) or abstraction of expressions into a local }\texttt{\DIFadd{where}} \DIFadd{or }\texttt{\DIFadd{let}} \DIFadd{clause}\DIFaddend . The rest of this chapter will describe refactorings for Haskell that support data model evolution.  

\DIFdelbegin %DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < data Customer = Customer {name :: String}
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{The algebraic data type for }\texttt{\DIFdelFL{Customer}}%DIFAUXCMD
\DIFdelFL{.}}
%DIFAUXCMD
%DIFDELCMD < \label{custData}
%DIFDELCMD < \end{figure}
%DIFDELCMD < %%%
\DIFdelend %DIF > \begin{figure}[t]
%DIF > \begin{lstlisting}
%DIF > data Customer = Customer {name :: String}
%DIF > \end{lstlisting}
%DIF > \caption{The algebraic data type for \texttt{Customer}.}
%DIF > \label{custData}
%DIF > \end{figure}
\section{Data-Driven Refactorings in Haskell}

Haskell offers a rich environment for data representation. The Haskell 2010 standard defines several types that are included in the prelude, including tuples, lists, characters, strings (which are just lists of characters), several types of numbers, and the function type. \DIFdelbegin \DIFdel{Additionally }\DIFdelend \DIFaddbegin \DIFadd{GHC also provides more types than the standard requires and }\DIFaddend programmers can construct new \DIFdelbegin \DIFdel{types with }\DIFdelend algebraic and abstract data types or rename existing types with type synonyms. Type classes support overloading as well. The standard library of GHC comes with many type classes that can help produce powerful abstractions~\citep{typeclassopedia}. Contrast this with how most object-oriented type systems are either unified\DIFaddbegin \DIFadd{, }\DIFaddend where every type is a subclass of some top level \texttt{Object} class\DIFaddbegin \DIFadd{, }\DIFaddend (e.g. C\# or Ruby) or there are \DIFdelbegin \DIFdel{is }\DIFdelend a fixed set of predefined primitive types as well as the object hierarchy (e.g. C++ and Java's type systems). 

These two different approaches to type systems both have pros and cons\DIFdelbegin \DIFdel{which has }\DIFdelend \DIFaddbegin \DIFadd{, which have }\DIFaddend sparked a vigorous (and quite possibly \DIFdelbegin \DIFdel{eternal}\DIFdelend \DIFaddbegin \DIFadd{protracted}\DIFaddend ) debate. The goal of this thesis is not to add to this debate, instead \DIFaddbegin \DIFadd{it is }\DIFaddend to build from a number of general principles that transcend the debate.

\begin{itemize}
	\item Data representation is a language independent problem that must be answered in every project.
	\item The way a project manages and represents its data is very likely to evolve over a project's lifetime and this is indeed desirable.
	\item Refactoring is a structured way to support this evolution.
		\item Refactorings for a language like Haskell need to take a different approach than those for object-oriented languages, particularly when refactorings are data-driven rather than ``structural.''  
	\item Data representation in a rich type system like Haskell's\DIFaddbegin \DIFadd{, }\DIFaddend in some sense\DIFaddbegin \DIFadd{, }\DIFaddend determines the structure of the program.
\end{itemize} 

\DIFdelbegin %DIFDELCMD < \subsection{Introducing a Type Synonym}%%%
\DIFdelend \DIFaddbegin \section{Introducing a Type Synonym}\DIFaddend \label{introSyn}

Type synonyms in Haskell, as mentioned previously, are a way to name an existing type. A simple example can be seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{fooSyn}. 

\begin{figure}[t]
	\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < type Foo = (String, Int)
%DIFDELCMD < 

%DIFDELCMD < f :: Foo -> Foo
%DIFDELCMD < f x@(_, 0) = x
%DIFDELCMD < f (str, i) = (tail str, i-1) 
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < \label{fooSyn}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
type Foo = (String, Int)

f :: Foo -> Foo
f x@(_, 0) = x
f (str, i) = (tail str, i-1) 
	\end{lstlisting}
	\DIFaddendFL \caption{A simple type synonym.}\DIFaddbeginFL \label{fooSyn}
\DIFaddendFL \end{figure}

Any place that where the \texttt{Foo} synonym is in scope the new name can be used to refer to any value of type \texttt{(String, Int)}. In fact "\texttt{Foo}s" and "\texttt{(String, Int)}s" are completely interchangeable. Introducing a synonym is a good way to quickly and simply name types to suggest their specific use in the current application.

Returning to the example\DIFdelbegin \DIFdel{from~\mbox{%DIFAUXCMD
\citep{fowler} }\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{, used in Section~\ref{ooRefs}, }\DIFaddend of an order type that keeps track of the customer who placed the order used in\DIFdelbegin \DIFdel{section~\ref{ooRefs}, see figure~\ref{haskellOrder} for }\DIFdelend \DIFaddbegin \DIFadd{, from~\mbox{%DIFAUXCMD
\cite{fowler}}\hspace{0pt}%DIFAUXCMD
, Figure~\ref{haskellOrder} shows }\DIFaddend a Haskell implementation of this type and a function that counts how many orders a particular customer has placed in a list of orders.

\begin{figure}
	\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < data Order = Order {customer :: String}
%DIFDELCMD < 

%DIFDELCMD < numberOfOrdersFor :: [Order] -> String -> Int
%DIFDELCMD < numberOfOrdersFor orders name = length (filter (\ord -> name == (customer ord)) orders)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < \label{haskellOrder}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
data Order = Order {customer :: String}

numberOfOrdersFor :: [Order] -> String -> Int
numberOfOrdersFor orders name = length (filter (\ord -> name == (customer ord)) orders)
	\end{lstlisting}
	\DIFaddendFL \caption{Using an algebraic data type for \texttt{order}.}
	\DIFaddbeginFL \label{haskellOrder}
\DIFaddendFL \end{figure}

The \DIFdelbegin \DIFdel{current }\DIFdelend \DIFaddbegin \DIFadd{initial }\DIFaddend representation of a customer as just a \texttt{String} is underdeveloped. \DIFaddbegin \DIFadd{this representation of a customer only allows the system to store a single ``field'' of information per customer, as the system becomes more developed it will want to associate more information with a particular customer and this representation will need to change.
}

 \DIFaddend Introducing a customer synonym is a simple step that sets up the code base for further development. The synonym will clearly mark which strings in the program stand for customers and which do not.

The \DIFdelbegin \DIFdel{introduce }\DIFdelend \DIFaddbegin \DIFadd{``introducing }\DIFaddend a type synonym\DIFaddbegin \DIFadd{'' }\DIFaddend refactoring works by taking a type and a valid synonym name (as per the Haskell 2010 standard~\citep{haskell2010}) and creates a new synonym. In this case the type is \texttt{String} and the synonym name should be something like "\texttt{Customer}." The \DIFdelbegin \DIFdel{only }\DIFdelend \DIFaddbegin \DIFadd{(only) }\DIFaddend precondition of the refactoring is that the new synonym\DIFaddbegin \DIFadd{'s }\DIFaddend name cannot cause a name clash with any other \DIFdelbegin \DIFdel{variables in the same module and }\DIFdelend \DIFaddbegin \DIFadd{types. Also }\DIFaddend if the synonym is exported \DIFdelbegin \DIFdel{in }\DIFdelend \DIFaddbegin \DIFadd{to }\DIFaddend any client modules that define something \DIFdelbegin \DIFdel{of }\DIFdelend \DIFaddbegin \DIFadd{with }\DIFaddend the same name\DIFaddbegin \DIFadd{, these modules }\DIFaddend will need to qualify the import of the synonym. 

\begin{figure}[t]
\begin{lstlisting}
type Customer = String
\end{lstlisting}
\caption{The customer synonym}
\end{figure}

After this\DIFaddbegin \DIFadd{, }\DIFaddend the next part of the refactoring involves replacing the appropriate uses of the type with the new synonym. This part of the refactoring is difficult to automate and needs to be interactive\DIFdelbegin \footnote{\DIFdel{This is not a feature of HaRe yet.}}%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{. There is }\DIFdelend \DIFaddbegin \DIFadd{, which is not a feature of HaRe yet. The current implementation of this refactoring takes in a flag as an argument that determines if the refactoring will replace all instances of the type with the newly introduced synonym. This is not the ideal implementation of the refactoring,  but there is }\DIFaddend no way to infer which instances of \texttt{String} (in the case of this example) should be replaced with the \texttt{Customer} synonym. The code from listing~\ref{haskellOrder} can have all of the string instances replaced by \texttt{Customer} because all the strings are being used to represent a customer. \DIFdelbegin \DIFdel{If there were a second function }\DIFdelend \DIFaddbegin \DIFadd{However, consider the }\DIFaddend \texttt{printThankYou} \DIFaddbegin \DIFadd{function in Figure~\ref{printTY} }\DIFaddend which has type \texttt{String -> Order -> IO ()} \DIFdelbegin \DIFdel{which }\DIFdelend \DIFaddbegin \DIFadd{and }\DIFaddend prints out a customized ``thank you'' message to the customer for their order. 

\begin{figure}[t]
\begin{lstlisting}
printThankYou :: String -> Order -> IO ()
printThankYou businessName order = do
	putStrLn ("Thank you " ++ (customer order) ++ " for your order.")
	putStrLn (businessName ++ " hopes to see you again soon!")
\end{lstlisting}
\caption{The \texttt{printThankYou} function}
\DIFaddbeginFL \label{printTY}
\DIFaddendFL \end{figure}

Though the first argument to \texttt{printThankYou} is a \texttt{String} it does not represent a customer therefore should not be replaced by the new synonym. The implicit meaning of the first argument hasn't been encoded in the type system so the programmer has to make a decision during the refactoring to make their intention for each instance of \texttt{String} clear. The final result of the refactoring can be seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{orderRefact}. 

\begin{figure}[t]
	\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < type Customer = String
%DIFDELCMD < 

%DIFDELCMD < data Order = Order {customer :: Customer}
%DIFDELCMD < 

%DIFDELCMD < numberOfOrdersFor :: [Order] -> Customer -> Int
%DIFDELCMD < numberOfOrdersFor orders name = length (filter (\ord -> name == (customer ord)) orders)
%DIFDELCMD < 

%DIFDELCMD < printThankYou :: String -> Order -> IO ()
%DIFDELCMD < printThankYou businessName order = do
%DIFDELCMD < 	putStrLn ("Thank you " ++ (customer order) ++ " for your order.")
%DIFDELCMD < 	putStrLn (businessName ++ " hopes to see you again soon!")
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < \label{orderRefact}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
type Customer = String

data Order = Order {customer :: Customer}

numberOfOrdersFor :: [Order] -> Customer -> Int
numberOfOrdersFor orders name = length (filter (\ord -> name == (customer ord)) orders)

printThankYou :: String -> Order -> IO ()
printThankYou businessName order = do
	putStrLn ("Thank you " ++ (customer order) ++ " for your order.")
	putStrLn (businessName ++ " hopes to see you again soon!")
	\end{lstlisting}
	\DIFaddendFL \caption{The program after adding a synonym for String.}\DIFaddbeginFL \label{orderRefact}
\DIFaddendFL \end{figure}

 This transformation might seem like too small of a step. Wouldn't it be preferable to introduce a more powerful abstraction such as an algebraic data type? One of the principles of the Agile Manifesto is "simplicity" which is described as "maximizing the amount of work not done is essential"~\citep{agileManifesto}. The work not done in this case is the introduction of a more complex customer representation. This small step does clearly differentiate the strings that represent customers from other strings that represent other types of data.

Small refactorings are also a good practice for tool builders. Creating multiple small composable steps that can be used in multiple ways \DIFdelbegin \DIFdel{are }\DIFdelend \DIFaddbegin \DIFadd{is }\DIFaddend much more flexible than large ``monolithic'' style refactorings.


\DIFdelbegin %DIFDELCMD < \section{Generalising Maybe}%%%
\DIFdelend \DIFaddbegin \section{Generalisation}
\DIFaddend \label{genMaybe}

A common type of refactoring is generalisation. A \DIFdelbegin \DIFdel{structural generalising refactoring would be extract a method or function. It takes expressions specific to a single function and generalises them so that they are applicable to multiple functions.
}\DIFdelend \DIFaddbegin \DIFadd{generalising refactoring that is structural would be the ``generalise a definition'' refactoring from~\mbox{%DIFAUXCMD
\citep{huiqingThesis}}\hspace{0pt}%DIFAUXCMD
. This refactoring takes an expression from inside of a function and refactors it so that expression is passed as a parameter instead. Figure~\ref{genOp} shows two functions, }\texttt{\DIFadd{f}} \DIFadd{and }\texttt{\DIFadd{f\_ref}}\DIFadd{, where }\texttt{\DIFadd{f\_ref}} \DIFadd{is the function }\texttt{\DIFadd{f}} \DIFadd{after the generalise a definition refactoring is performed on it. The refactored function has become more general because instead of only being used for summing two numbers, it can be reused for any binary operations on numbers. 
}\DIFaddend 

\DIFdelbegin \DIFdel{Data-driven generalisation is taking code written for a specific type and rewriting it to use a more general type. The newly generic code is applicable in more places and can help reduce code duplication. 
Fowler's Refactoring dedicates an entire chapter to generalisation , cataloguing refactorings like "extract subclass" and "extract interface"~\mbox{%DIFAUXCMD
\citep[pg. 319]{fowler}}\hspace{0pt}%DIFAUXCMD
. }\DIFdelend \DIFaddbegin \begin{figure}[t]
\begin{lstlisting}
f :: Num a => a -> a -> a
f x y = x + y

f_ref :: Num a => (a -> a -> a) -> a -> a -> a
f_ref op x y = x `op` y
\end{lstlisting}
\caption{\DIFaddFL{Generalising the }\texttt{\DIFaddFL{+}} \DIFaddFL{operator from the function f.}}
\label{genOp}
\end{figure} 
\DIFaddend 

\DIFdelbegin \DIFdel{Generalisations for object-oriented languageseither move functionality upin a }\DIFdelend \DIFaddbegin \DIFadd{In an object oriented language generalisation refactorings deal mostly with either ``moving methods around a hierarchy of inheritance,'' or creating new classes that change the hierarchy~\mbox{%DIFAUXCMD
\citep{fowler}}\hspace{0pt}%DIFAUXCMD
. Examples of generalising refactorings for OO languages as described in~\mbox{%DIFAUXCMD
\citep{fowler} }\hspace{0pt}%DIFAUXCMD
are pull up method, push down method, extract subclass, and extract interface. Interestingly about half of the generalisation refactorings described by Fowler are better described as specialisations, such as push down method, because they make general code more specific by moving it further down the object hierarchy.
}

\DIFadd{In object oriented languages, moving elements ``up'' the }\DIFaddend hierarchy (as in \DIFdelbegin \DIFdel{"push method up") or change the hierarchy by adding classes (e. g. "extract subclass"). Object-oriented languages have a single hierarchy since every object inherits from a root class typically just called }\texttt{\DIFdel{Object}}%DIFAUXCMD
\DIFdel{.
Functional languages don't have this single unified hierarchy , but smaller hierarchies do exist because type classes can inherit from one another. }\DIFdelend \DIFaddbegin \DIFadd{from subclass to superclass) is a generalisation, and doing the opposite is specialisation. This should make intuitive sense; there are more fruits than just oranges, so moving a method from the orange class up to its superclass, the fruit class, makes that code more general because it can now be applied to all subclasses of fruit, not just the orange class.
}\DIFaddend 

\DIFaddbegin \DIFadd{Code in functional languages doesn't inhabit the rigid hierarchy of objects that exists in OO languages, so generalisation can take on more forms. The core effect that a generalisation refactoring has is taking something specific (such as a sub-expression, or an instance of a particular type) and makes it more general in some way. A sub-expression may be extracted into a parameter, this makes the target function more general because the behaviour that was previously set by the extracted sub-expression is now determined by the expression passed in via the new parameter. Specific instances of types are generalised by being replaced with a type class. The purpose of type classes is to provide a generic interface for some behaviour, so replacing a specific type with a type class means that the target code is more general because now it can be applied to the set of types that implement that type class, not just a single type.
}

\DIFadd{This section describes refactoring functions that use the concrete }\texttt{\DIFadd{Maybe}} \DIFadd{type to instead use one of the type classes it is an instance of, either }\texttt{\DIFadd{Monad}} \DIFadd{or }\texttt{\DIFadd{MonadPlus}}\DIFadd{. These refactorings will take a function of type }\texttt{\DIFadd{a -> Maybe b}} \DIFadd{and rewrite it to become either }\texttt{\DIFadd{Monad m => a -> m b}} \DIFadd{or }\texttt{\DIFadd{MonadPlus m => a -> m b}}\DIFadd{. These refactored types are a generalisation of the original function because instead of only being applicable to a single type (}\texttt{\DIFadd{Maybe}} \DIFadd{in this case) the refactored function can now be used with data of type }\texttt{\DIFadd{Monad}} \DIFadd{or }\texttt{\DIFadd{MonadPlus}}\DIFadd{.
}

\subsection{Generalising Maybe}

\DIFaddend This section describes a refactoring that rewrites programs of type \texttt{Maybe a} to use the \texttt{MonadPlus} type class or, if possible, the \texttt{Monad} type class. \DIFdelbegin %DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < data Maybe a = Nothing
%DIFDELCMD <                  | Just a
%DIFDELCMD <                  

%DIFDELCMD < instance MonadPlus Maybe where
%DIFDELCMD <    mzero = Nothing
%DIFDELCMD <    Nothing `mplus` r = r
%DIFDELCMD <    l          `mplus` _ = l
%DIFDELCMD <                  

%DIFDELCMD < class Monad m => MonadPlus m where
%DIFDELCMD <    mzero :: m a
%DIFDELCMD <    mplus :: m a -> m a -> m a
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{The }\texttt{\DIFdelFL{Maybe}} %DIFAUXCMD
\DIFdelFL{data type definition and }\texttt{\DIFdelFL{MonadPlus}} %DIFAUXCMD
\DIFdelFL{instance declaration, and the }\texttt{\DIFdelFL{MonadPlus}} %DIFAUXCMD
\DIFdelFL{class definition.}}
%DIFAUXCMD
%DIFDELCMD < \label{maybeMonadPlus}
%DIFDELCMD < \end{figure}
%DIFDELCMD <  

%DIFDELCMD < %%%
\DIFdel{Figure~\ref{maybeMonadPlus} contains the class declaration for }\texttt{\DIFdel{MonadPlus}} %DIFAUXCMD
\DIFdel{the }\DIFdelend \texttt{Maybe} \DIFdelbegin \DIFdel{instance of that class. The }\DIFdelend \DIFaddbegin \DIFadd{is a data type whose declaration is shown in Figure~\ref{maybeDecl}. }\DIFaddend \texttt{Maybe} \DIFdelbegin \DIFdel{type }\DIFdelend represents a computation that \DIFdelbegin \DIFdel{may }\DIFdelend \DIFaddbegin \DIFadd{can fail or }\DIFaddend return a value \DIFdelbegin \DIFdel{wrapped in }\DIFdelend \DIFaddbegin \DIFadd{of type }\texttt{\DIFadd{a}}\DIFadd{. When a computation has failed }\texttt{\DIFadd{Maybe}} \DIFadd{represents this ``result'' with the }\texttt{\DIFadd{Nothing}} \DIFadd{constructor. When a calculation succeeds then }\texttt{\DIFadd{Maybe}} \DIFadd{returns the result wrapped with }\DIFaddend the \texttt{Just} constructor\DIFdelbegin \DIFdel{or may fail (represented by the }\texttt{\DIFdel{Nothing}} %DIFAUXCMD
\DIFdel{constructor). The }\DIFdelend \DIFaddbegin \DIFadd{. }\DIFaddend \texttt{\DIFdelbegin \DIFdel{MonadPlus}%DIFDELCMD < } %%%
\DIFdel{class is a typeclass for monads that also have a monoidal structure~}\footnote{\DIFdel{A monoid is a semigroup whose associative binary operation has an identity element.}}%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdelend \DIFaddbegin \DIFadd{Maybe}} \DIFadd{is a commonly used type in Haskell programs, it also implements many common type classes including }\texttt{\DIFadd{Monad}} \DIFadd{and }\texttt{\DIFadd{MonadPlus}}\DIFaddend . 

The \DIFdelbegin \texttt{\DIFdel{MonadPlus}} %DIFAUXCMD
\DIFdel{class helps generalise monads that contain some concept of failure }\DIFdelend \DIFaddbegin \DIFadd{definition of }\texttt{\DIFadd{Maybe}} \DIFadd{as well as its instance declarations for the }\texttt{\DIFadd{Monad}} \DIFaddend and \DIFdelbegin \DIFdel{choice. The }\DIFdelend \DIFaddbegin \texttt{\DIFadd{MonadPlus}} \DIFadd{type classes are shown in Figures~\ref{maybeDecl} and~\ref{maybeMonadDef}. The declaration of the }\texttt{\DIFadd{MonadPlus}} \DIFadd{type class as well as }\texttt{\DIFadd{Maybe}}\DIFadd{'s definition of it are shown in Figure~\ref{maybeMonadPlusDef}.
}

\begin{figure}[t]
\begin{lstlisting}
data Maybe a = Nothing
                 | Just a
\end{lstlisting}
\caption{\DIFaddFL{The }\texttt{\DIFaddFL{Maybe}} \DIFaddFL{data type declaration.}}
\label{maybeDecl}
\end{figure}

\begin{figure}[t]
\begin{lstlisting}   
instance Monad Maybe where
	return = Just

	(Just a) >>= f = f a
	Nothing  >>= _ = Nothing
\end{lstlisting}
\caption{\texttt{\DIFaddFL{Maybe}}\DIFaddFL{'s monad instance definition}}
\label{maybeMonadDef}
\end{figure}

\begin{figure}[t]
\begin{lstlisting}                 
class Monad m => MonadPlus m where
   mzero :: m a
   mplus :: m a -> m a -> m a

instance MonadPlus Maybe where
   mzero = Nothing
   Nothing `mplus` r = r
   l          `mplus` _ = l

\end{lstlisting}
\caption{\DIFaddFL{The }\texttt{\DIFaddFL{Maybe}} \texttt{\DIFaddFL{MonadPlus}} \DIFaddFL{instance declaration, and the }\texttt{\DIFaddFL{MonadPlus}} \DIFaddFL{class definition.}}
\label{maybeMonadPlusDef}
\end{figure}


\subsection{Refactoring Maybe to MonadPlus} 
\label{maybeMonadPlus}
\texttt{\DIFadd{MonadPlus}} \DIFadd{is the obvious generalisation of }\texttt{\DIFadd{Maybe}}\DIFadd{. }\texttt{\DIFadd{Maybe}} \DIFadd{encodes failure using the }\texttt{\DIFadd{Nothing}} \DIFadd{constructor and its definition of }\texttt{\DIFadd{mplus}} \DIFadd{chooses to discard failed computations on the left in exchange for a possibly successful one. This behaviour is exactly what }\texttt{\DIFadd{MonadPlus}} \DIFadd{is designed to encapsulate. This section describes a refactoring for rewriting functions that use }\texttt{\DIFadd{Maybe}} \DIFadd{to use the }\texttt{\DIFadd{MonadPlus}} \DIFadd{type class instead.
}

\begin{figure}[t]
\begin{lstlisting}
showNat :: Int -> Maybe String
showNat i =
  if (i >= 0)
    then (Just (show i))
    else Nothing
\end{lstlisting}
\caption{\texttt{\DIFaddFL{showNat}}}
\label{mmp2}
\end{figure}

\DIFadd{The function }\DIFaddend \texttt{\DIFdelbegin \DIFdel{mzero}\DIFdelend \DIFaddbegin \DIFadd{showNat}\DIFaddend } \DIFdelbegin \DIFdel{value represents a failed computation and }\texttt{\DIFdel{mplus}} %DIFAUXCMD
\DIFdel{represents a way of making a "choice" between two computations that may or may not have failed~\mbox{%DIFAUXCMD
\citep{typeclassopedia}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{takes a pure value and returns a }\texttt{\DIFadd{Maybe}}\DIFadd{, instead of taking in a }\texttt{\DIFadd{Maybe}} \DIFadd{and returning another }\texttt{\DIFadd{Maybe}} \DIFadd{as in the previous example. The more general version of }\texttt{\DIFadd{showNat}} \DIFadd{needs to be able to express the idea of failure that }\texttt{\DIFadd{MonadPlus}} \DIFadd{encodes with the }\texttt{\DIFadd{mzero}} \DIFadd{operation}\DIFaddend . 

\DIFdelbegin %DIFDELCMD < \begin{figure}
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < class Applicative m => Monad m where
%DIFDELCMD < 	return :: a -> m a
%DIFDELCMD <     (>>=)  :: forall a b. m a -> (a -> m b) -> m b
%DIFDELCMD <     

%DIFDELCMD < instance Monad Maybe where
%DIFDELCMD < 	return = Just
%DIFDELCMD < 	

%DIFDELCMD < 	(Just a) >>= f = f a
%DIFDELCMD < 	Nothing  >>= _ = Nothing
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{figure}[t]
\begin{lstlisting}
showNat :: (MonadPlus m) => Int -> m String
showNat i =
  if (i >= 0)
    then (return (show i))
    else mzero
\end{lstlisting}
\DIFaddendFL \caption{\DIFdelbeginFL \texttt{\DIFdelFL{Maybe}}%DIFAUXCMD
\DIFdelFL{'s monad instance definition}\DIFdelendFL \DIFaddbeginFL \texttt{\DIFaddFL{showNat}} \DIFaddFL{refactored}\DIFaddendFL }
\DIFdelbeginFL %DIFDELCMD < \label{maybeMonad}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \label{mmp2Ref}
\DIFaddendFL \end{figure} 

\DIFdelbegin %DIFDELCMD < \subsection{Refactoring Maybe to Monad}%DIFDELCMD < \label{genMonad}%%%
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{This refactoring replaces }\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{mmp2Ref} shows the refactored version of }\texttt{\DIFadd{showNat}}\DIFadd{. In this case the refactoring can simply replace the }\DIFaddend \texttt{Maybe}-specific \DIFdelbegin \DIFdel{code with the more general }\texttt{\DIFdel{Monad}} %DIFAUXCMD
\DIFdel{and }\texttt{\DIFdel{MonadPlus}} %DIFAUXCMD
\DIFdel{operations}\DIFdelend \DIFaddbegin \DIFadd{calls with more general ones}\DIFaddend . This is \DIFdelbegin \DIFdel{done by recognizing when }\DIFdelend \DIFaddbegin \DIFadd{very similar to the way that the expression that made up the anonymous function from Figure~\ref{mmp1Ref} was constructed by changing calls to }\texttt{\DIFadd{Just}} \DIFadd{to }\texttt{\DIFadd{return}} \DIFadd{and calls to }\texttt{\DIFadd{Nothing}} \DIFadd{to }\texttt{\DIFadd{mzero}}\DIFadd{. 
}

\DIFadd{Due to the }\texttt{\DIFadd{return}} \DIFadd{operator of }\texttt{\DIFadd{Monad}} \DIFadd{being equivalent to }\texttt{\DIFadd{Just}} \DIFadd{and }\texttt{\DIFadd{mzero}} \DIFadd{being supplied by }\texttt{\DIFadd{MonadPlus}}\DIFadd{, both of }\DIFaddend \texttt{Maybe}\DIFdelbegin \DIFdel{-specific code is structured in the same way as the }\DIFdelend \DIFaddbegin \DIFadd{'s constructors can be replaced with }\DIFaddend more general operations. \DIFdelbegin \DIFdel{Figure~\ref{mmp1} contains a simple example of this}\DIFdelend \DIFaddbegin \DIFadd{Between these two type classes all of the functionality of }\texttt{\DIFadd{Maybe}} \DIFadd{can be replaced in functions that build }\texttt{\DIFadd{Maybe}}\DIFadd{s. However if a function deconstructs a }\texttt{\DIFadd{Maybe}} \DIFadd{type through pattern matching it may not be generalisable. Consider the function }\texttt{\DIFadd{printResult}} \DIFadd{in Figure~\ref{printRes}. }\texttt{\DIFadd{printResult}} \DIFadd{performs a pattern match over the }\texttt{\DIFadd{Maybe}} \DIFadd{type constructors rather than only in right hand side expressions; there is no way to generalise pattern matching on }\texttt{\DIFadd{Just}}\DIFaddend .

\begin{figure}[t]
\DIFaddbeginFL \begin{lstlisting}
printResult :: (Show a) => Maybe a -> IO ()
printResult m =
  case m of
     Nothing -> putStrLn "Something went wrong"
     (Just i) -> putStrLn $ "The result is: " ++ (show i)
\end{lstlisting}
\caption{\texttt{\DIFaddFL{printResult}}}
\label{printRes}
\end{figure}

\subsection{Refactoring Maybe to Monad}
\label{genMonad}

\DIFadd{In some cases it is possible to generalise a }\texttt{\DIFadd{Maybe}} \DIFadd{type to become }\texttt{\DIFadd{Monad}} \DIFadd{instead. When functions duplicate the work that is done by }\texttt{\DIFadd{Maybe}}\DIFadd{'s implementation of bind then that function can be generalised to use the monadic interface instead.
}

\begin{figure}[t]
\DIFaddendFL \begin{lstlisting}
inc :: Maybe Int -> Maybe Int
inc Nothing = Nothing
inc (Just i) = (Just (i + 1))
\end{lstlisting}
\caption{\texttt{inc}}
\label{mmp1}
\end{figure}

The function \texttt{inc} \DIFaddbegin \DIFadd{from Figure~\ref{mmp1} }\DIFaddend can be rewritten to use the monadic operations bind (\texttt{>>=}) and \texttt{return} instead, this is because the definition of \texttt{inc} matches the definition of bind in the instance declaration of \texttt{Maybe} as a monad. Figure~\ref{incRewrite} has another version \texttt{inc} in which that relationship is clearer.

\begin{figure}[t]
\begin{lstlisting}
inc :: Maybe Int -> Maybe Int
inc x = new_inc x f

new_inc :: Maybe Int -> (Int -> Maybe Int) -> Maybe Int
Nothing `new_inc` _ = Nothing
(Just i) `new_inc` f = f i

f i = Just (i + 1)
\end{lstlisting}
\caption{\texttt{inc} rewritten to look more like bind}
\label{incRewrite}
\end{figure}

The new version of \texttt{inc} has been written using infix notation, to more closely match \texttt{Maybe}'s definition of bind. Also like \texttt{Maybe}'s bind when \texttt{inc}'s first argument is \texttt{Nothing} it just returns \texttt{Nothing}. Finally the right hand side of \texttt{inc}'s second case was lifted into a separate function and is passed to the new \texttt{inc} as an argument, just like \texttt{bind}.

In practice the refactoring will, by default, create an anonymous function from the right hand side of the \texttt{Just} case with calls to \texttt{Just} and \texttt{Nothing} replaced with \texttt{return} and \texttt{mzero} respectively. The final refactored version of \texttt{inc} is in \DIFdelbegin \DIFdel{figure~\ref{mmp1Ref}}\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{mmp1Ref}.
}\DIFaddend 

\begin{figure}[t]
\begin{lstlisting}
inc :: (Monad m) => m Int -> m Int
inc mi = mi >>= (\i -> (return (i+1)))
\end{lstlisting}
\caption{Final output from generalising \texttt{inc}}
\label{mmp1Ref}
\end{figure}

It is worth saying that if the right hand side of the \texttt{Just} case from the original implementation of \texttt{inc} contained calls to \texttt{Nothing} then this function could not be generalised to \texttt{Monad}. \DIFdelbegin \DIFdel{Instead, the anonymous function would replace the occurrences of }\texttt{\DIFdel{Nothing}} %DIFAUXCMD
\DIFdel{with }\texttt{\DIFdel{mzero}} %DIFAUXCMD
\DIFdel{instead.  
 }%DIFDELCMD < 

%DIFDELCMD < \subsection{Refactoring Maybe to MonadPlus} 
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The first example could be rewritten using functionality provided only by the }\DIFdelend \DIFaddbegin \DIFadd{This is because }\DIFaddend \texttt{Monad}\DIFdelbegin \DIFdel{type class. Obviously this isn't always the case. An example of a function that cannot be completely generalised to }\texttt{\DIFdel{Monad}} %DIFAUXCMD
\DIFdel{is in listing~\ref{mmp2}. }%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < showNat :: Int -> Maybe String
%DIFDELCMD < showNat i =
%DIFDELCMD <   if (i <= 0)
%DIFDELCMD <     then (Just (show i))
%DIFDELCMD <     else Nothing
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\texttt{\DIFdelFL{showNat}}%DIFAUXCMD
}
%DIFAUXCMD
%DIFDELCMD < \label{mmp2}
%DIFDELCMD < \end{figure}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The function }\texttt{\DIFdel{showNat}} %DIFAUXCMD
\DIFdel{takes in a pure value and returns a }\texttt{\DIFdel{Maybe}}%DIFAUXCMD
\DIFdel{, instead of taking in a }\texttt{\DIFdel{Maybe}} %DIFAUXCMD
\DIFdel{and returning another }\texttt{\DIFdel{Maybe}} %DIFAUXCMD
\DIFdel{as in the previous example. The more general version of }\texttt{\DIFdel{showNat}} %DIFAUXCMD
\DIFdel{needs to be able to express the idea of failure that }\DIFdelend \DIFaddbegin \DIFadd{s do not have a built in concept that can represent }\texttt{\DIFadd{Nothing}}\DIFadd{. The refactored function could be generalised to }\DIFaddend \texttt{MonadPlus} \DIFdelbegin \DIFdel{encodes with the }\texttt{\DIFdel{mzero}} %DIFAUXCMD
\DIFdel{operation. 
}%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < showNat :: (MonadPlus m) => Int -> m String
%DIFDELCMD < showNat i =
%DIFDELCMD <   if (i <= 0)
%DIFDELCMD <     then (return (show i))
%DIFDELCMD <     else mzero
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\texttt{\DIFdelFL{showNat}} %DIFAUXCMD
\DIFdelFL{refactored}}
%DIFAUXCMD
%DIFDELCMD < \label{mmp2Ref}
%DIFDELCMD < \end{figure} 
%DIFDELCMD <  

%DIFDELCMD < %%%
\DIFdel{Figure~\ref{mmp2Ref} shows the refactored version of }\texttt{\DIFdel{showNat}}%DIFAUXCMD
\DIFdel{. In this case the refactoring can simply replace the }\texttt{\DIFdel{Maybe}}%DIFAUXCMD
\DIFdel{-specific calls with more general ones. This is very similar to the way that the expression that made up the anonymous function from listing~\ref{mmp1Ref} was constructed by changing calls to }\texttt{\DIFdel{Just}} %DIFAUXCMD
\DIFdel{to }\texttt{\DIFdel{return}} %DIFAUXCMD
\DIFdel{and calls to }\DIFdelend \DIFaddbegin \DIFadd{by replacing the }\DIFaddend \texttt{Nothing}\DIFdelbegin \DIFdel{to }\DIFdelend \DIFaddbegin \DIFadd{s with }\DIFaddend \texttt{mzero}\DIFaddbegin \DIFadd{s}\DIFaddend .

\DIFdelbegin \DIFdel{Due to the }\texttt{\DIFdel{return}} %DIFAUXCMD
\DIFdel{operator of }\texttt{\DIFdel{Monad}} %DIFAUXCMD
\DIFdel{being equivalent to }\texttt{\DIFdel{Just}} %DIFAUXCMD
\DIFdel{and }\texttt{\DIFdel{mzero}}%DIFAUXCMD
\DIFdel{being supplied by }\texttt{\DIFdel{MonadPlus}}%DIFAUXCMD
\DIFdel{, both of }\texttt{\DIFdel{Maybe}}%DIFAUXCMD
\DIFdel{'s constructors can be replaced with a more general operation. Between these two type classes all of the functionality of }\texttt{\DIFdel{Maybe}} %DIFAUXCMD
\DIFdel{can be replaced in functions that build }\texttt{\DIFdel{Maybe}}%DIFAUXCMD
\DIFdel{s. However if a function deconstructs a }\texttt{\DIFdel{Maybe}} %DIFAUXCMD
\DIFdel{type through pattern matching it may not be generalisable. Consider the function }\texttt{\DIFdel{printResult}} %DIFAUXCMD
\DIFdel{in figure~\ref{printRes}.
}%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < printResult :: (Show a) => Maybe a -> IO ()
%DIFDELCMD < printResult m =
%DIFDELCMD <   case m of
%DIFDELCMD <      Nothing -> putStrLn "Something went wrong"
%DIFDELCMD <      (Just i) -> putStrLn $ "The result is: " ++ (show i)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\texttt{\DIFdelFL{printResult}}%DIFAUXCMD
}
%DIFAUXCMD
%DIFDELCMD < \label{printRes}
%DIFDELCMD < \end{figure}
%DIFDELCMD < 

%DIFDELCMD < %%%
\texttt{\DIFdel{printResult}} %DIFAUXCMD
\DIFdel{uses }\texttt{\DIFdel{Maybe}}%DIFAUXCMD
\DIFdel{'s constructors for pattern matching rather than in expressions; there is no generic pattern match for }\texttt{\DIFdel{Just}}%DIFAUXCMD
\DIFdel{. Refactoring }\texttt{\DIFdel{printResult}} %DIFAUXCMD
\DIFdel{will require the programmer to provide a way to convert its argument to }\texttt{\DIFdel{Maybe}}%DIFAUXCMD
\DIFdel{.
}%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < printResult :: (Show a, MonadPlus m) => (m a -> Maybe a) -> m a -> IO ()
%DIFDELCMD < printResult f = printResult_old . f
%DIFDELCMD <    where
%DIFDELCMD < 	printResult_old :: Maybe a -> IO ()
%DIFDELCMD <    	printResult_old m =
%DIFDELCMD <    	  case m of
%DIFDELCMD <   	  Nothing -> putStrLn "Something went wrong"
%DIFDELCMD <   	  (Just i) -> putStrLn $ "The result is: " ++ (show i)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{Refactored }\texttt{\DIFdelFL{printResult}}%DIFAUXCMD
}
 %DIFAUXCMD
%DIFDELCMD < \label{printRef}
%DIFDELCMD < \end{figure}
%DIFDELCMD <  

%DIFDELCMD < %%%
\DIFdel{Figure~\ref{printRef} show the finished refactoring of }\texttt{\DIFdel{printResult}}%DIFAUXCMD
\DIFdel{. The original definition of }\texttt{\DIFdel{printResult}} %DIFAUXCMD
\DIFdel{is renamed and moved into a local definition. The new }\texttt{\DIFdel{printResult}} %DIFAUXCMD
\DIFdel{function now takes in an additional parameter, an "abstraction function." The abstraction function abstracts the new type}\DIFdelend \DIFaddbegin \DIFadd{Generalisation is a very common type of refactoring because it helps prevent repetition. A function with some specific behaviour can be generalised by extracting a sub-expression and passing that expression as a parameter instead. After the refactoring the target function's behaviour can be changed and applied in more places without having to repeat the implementation over again to change the extracted sub-expression. This section has discussed a refactoring to generalise a specific type, }\DIFaddend \texttt{\DIFdelbegin \DIFdel{m a}\DIFdelend \DIFaddbegin \DIFadd{Maybe}\DIFaddend }\DIFdelbegin \DIFdel{back to the old }\texttt{\DIFdel{Maybe a}}%DIFAUXCMD
\DIFdel{. The inverse of the abstraction function is the projection function}\footnote{\DIFdel{In this specific case the projection function is of type: }\texttt{\DIFdel{Maybe a -> m a}}%DIFAUXCMD
}%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{. These types of functions are used more in the next section (\ref{listToDlist}).
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{When passed the abstraction function }\texttt{\DIFdel{printResult}} %DIFAUXCMD
\DIFdel{just converts the now genericly typed argument to }\texttt{\DIFdel{Maybe a}} %DIFAUXCMD
\DIFdel{and then calls the original function definition.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{In general this refactoring case doesn't do much. This is because its difficult to ``consume'' a more general typedue to argument types being contravariant under the function type constructor}\DIFdelend \DIFaddbegin \DIFadd{, to one of its type classes, }\texttt{\DIFadd{MonadPlus}} \DIFadd{or }\texttt{\DIFadd{Monad}}\DIFadd{. Doing this generalises the target functions so that they can be applied to different values, rather than needing to implement separate functions for each specific type}\DIFaddend .

\section{``List to Hughes List'' Refactoring}\label{listToDlist}

The previous section discussed a refactoring that is most useful in the early stages of development when the details of data representation are in their infancy. As a project develops more and more decisions must be made about how data is represented and processed. Mid-development it may become necessary to change which data structures the program uses in order to facilitate adding additional features or for performance reasons.

This section \DIFdelbegin \DIFdel{will cover }\DIFdelend \DIFaddbegin \DIFadd{covers }\DIFaddend a refactoring for automatically replacing a type by \DIFdelbegin \DIFdel{"projecting " }\DIFdelend \DIFaddbegin \DIFadd{projecting }\DIFaddend that type into another\DIFaddbegin \DIFadd{~\mbox{%DIFAUXCMD
\citep{galoisConnection}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . The two types (the original and \DIFdelbegin \DIFdel{the }\DIFdelend new type) don't necessarily \DIFdelbegin \DIFdel{to have a formal syntactic }\DIFdelend \DIFaddbegin \DIFadd{have an explicit }\DIFaddend relationship from the compiler's point of view (e.g. both implement the same type class) but instead \DIFdelbegin \DIFdel{have an ad-hoc, semantic similarity. }\DIFdelend \DIFaddbegin \DIFadd{present a semantically similar interface. For example, a binary search tree and a list support the same basic operations, such as add, remove, and delete. Though the implementation details of lists and trees are very different, from the user's perspective all they need to know is what operations their chosen structure supports. The refactoring described in this section replaces one data type for another that supports similar operations.
}\DIFaddend 

\subsection{Hughes Lists}

Appending two lists into a single list is a fundamental operation over lists. The standard implementation of append is \DIFdelbegin \DIFdel{seen on line one in figure}\DIFdelend \DIFaddbegin \DIFadd{shown in Figure}\DIFaddend ~\ref{append}.
\DIFdelbegin \footnote{\DIFdel{The }\texttt{\DIFdel{(++)}} %DIFAUXCMD
\DIFdel{operator is the infix definition of append.}} 
%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdelend 

\begin{figure}[t]
\begin{lstlisting}
(++) :: [a] -> [a] -> [a]
[]  ++ ys = ys
(x:xs) ++ ys = x:(xs ++ ys)
\end{lstlisting}
\caption{The standard definition of append}
\label{append}
\end{figure}

If the first argument to append is the empty list then append can just return the second argument. In the other case append traverses the first list popping off the head of the first list and recursively appending the tail of the first list and the second argument. The performance of append is \DIFaddbegin \DIFadd{therefore }\DIFaddend proportional to the length of its first argument. This has the unfortunate side effect that if a program builds \DIFdelbegin \DIFdel{of }\DIFdelend a list by repeatedly appending \DIFdelbegin \DIFdel{to }\DIFdelend \DIFaddbegin \DIFadd{a single element }\DIFaddend the end of a list\DIFaddbegin \DIFadd{, }\DIFaddend the program will spend significant amounts of time traversing the beginning of the list \DIFdelbegin \DIFdel{over and over}\DIFdelend \DIFaddbegin \DIFadd{repeatedly}\DIFaddend . In total the performance of this function ends up being $O(n^2)$ where $n$ is the length of the final list.

An example of a function that exhibits this behaviour is \texttt{countdown}, which is defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{countdown}. \texttt{countdown} constructs a string that lists the numbers starting at the argument \texttt{i} down to zero. During each recursive call the previously computed result is traversed\DIFaddbegin \DIFadd{, }\DIFaddend but this parameter gets larger each time\DIFdelbegin \DIFdel{which is what causes the $O(n^2)$ performance}\DIFdelend .

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < countdown :: Int -> String
%DIFDELCMD < countdown i = f i ""
%DIFDELCMD <   where f 0 s = s ++ "0"
%DIFDELCMD <           f i s = let newS = s ++ (show i) ++ ", " in
%DIFDELCMD <           f (i-1) newS
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
countdown :: Int -> String
countdown i = f i ""
  where f 0 s = s ++ "0"
          f i s = let newS = s ++ (show i) ++ ", " in
                      f (i-1) newS
\end{lstlisting}
\DIFaddendFL \caption{The countdown function runs in $O(n^2)$ time.}
\label{countdown}
\end{figure}

The poor performance of \texttt{countdown} quickly becomes noticable with \texttt{countdown 10000} taking around six seconds to run.\footnote{On an Intel i5 4690k processor with 16 GB of RAM}

Fortunately there is an alternative representative of lists that allows $O(n)$ time for such nested appends. This alternative representation was first described by John Hughes in~\citep{hughesList} (hence \DIFdelbegin \DIFdel{their name}\DIFdelend \DIFaddbegin \DIFadd{``Hughes-lists''}\DIFaddend ), they are also known as difference lists~\citep{realWorldHaskell}; \DIFdelbegin \DIFdel{difference lists is the name that they are provided by }\DIFdelend \DIFaddbegin \DIFadd{and this is the nomenclature used }\DIFaddend in Hackage~\citep{dlist}. In Hughes lists elements are stored as partial applications of the append function, these partial applications can then be composed \DIFdelbegin \DIFdel{together }\DIFdelend using function composition (the \texttt{(.)} operator in Haskell) to append the two lists together. 

Difference lists store the values \texttt{[1,2,3]} as \texttt{([1,2,3] ++)} which is of type \texttt{Num a => [a] -> [a]}. Appending \texttt{[4,5,6]} to \texttt{([1,2,3] ++)} first involves converting it to a difference lists (\texttt{([4,5,6] ++)} in this case) then these two difference lists can be appended with function composition which results in\DIFaddbegin \DIFadd{: }\\ \DIFaddend \texttt{([1,2,3] ++) . ([4,5,6] ++)}. 

\begin{figure}[t]
\begin{lstlisting}
> let lst = ([1,2,3] ++) . ([4,5,6] ++)
> :t lst
lst :: Num a => [a] -> [a]
> lst []
[1,2,3,4,5,6]
\end{lstlisting}
\caption{Building and deconstructing difference lists.}
\label{ghciDList}
\end{figure}

As seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{ghciDList}, once it comes time to retrieve the normal list\DIFaddbegin \DIFadd{, }\DIFaddend the difference list is applied to an empty list. Function composition is \DIFdelbegin \DIFdel{evaluated }\DIFdelend \DIFaddbegin \DIFadd{applied }\DIFaddend from right to left so this keeps the left operand of \texttt{(++)} small. Internally difference lists are just a wrapper around a function from lists to lists. Figure~\ref{dlistDef} shows the definition of the \texttt{DList} new type which contains the partial application. The \texttt{unDL} function simply removes the \texttt{DL} constructor. 

\begin{figure}[t]
\begin{lstlisting}
newtype DList a = DL {
   unDL :: [a] -> [a]
}

fromList :: [a] -> DList a
fromList xs = DL (xs ++)

toList :: DList a -> [a]
toList (DL xs) = xs []

append :: DList a -> DList a -> DList a
append xs ys = DL (unDL xs . unDL ys)
\end{lstlisting}
\caption{The definition of \texttt{DList} taken from~\citep{realWorldHaskell}}
\label{dlistDef}
\end{figure}

While difference lists support fast appends there is no such thing as a free lunch\DIFdelbegin \DIFdel{, speed ups }\DIFdelend \DIFaddbegin \DIFadd{: speedups }\DIFaddend for certain functions are paid for by slowdowns in other places. Getting the head and tail of a normal list are both constant time operations but become linear time for difference lists because the difference lists will have to be converted back to normal lists. 


\DIFdelbegin %DIFDELCMD < \subsection{Refactoring lists to Hughes lists}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{For functional programmers, lists are a very familiar and versatile data structure. However, if an application requires repeated appends as described at the start of this section their performance becomes an issue. Difference lists provide a similar interface to lists but without this troublesome behaviour. This section will describe a refactoring to convert programs written using normal lists to use difference lists instead. 
}%DIFDELCMD < 

%DIFDELCMD < %%%
\subsubsection{\DIFdel{Embeddable Types}} 
%DIFAUXCMD
\addtocounter{subsubsection}{-1}%DIFAUXCMD
\DIFdel{This refactoring }\DIFdelend \DIFaddbegin \subsection{Embeddable Types} 
\DIFadd{This refactoring }\DIFaddend takes the view that a type consists of some structure and a set of functions that operate on that structure. 

This refactoring requires that a source type is ``reversibly embeddable'' into the target type. If the source type is some type $A$ and the target type is some type $B$ then for $A$ to be reversibly embeddable \DIFdelbegin \DIFdel{in }\DIFdelend \DIFaddbegin \DIFadd{into }\DIFaddend $B$\DIFaddbegin \DIFadd{, }\DIFaddend two functions must exist\DIFdelbegin \DIFdel{. The }\DIFdelend \DIFaddbegin \DIFadd{: the }\DIFaddend projection function $proj :: A \rightarrow B$ and the abstraction function $abs :: B \rightarrow A$\DIFaddbegin \DIFadd{, as shown in Figure~\ref{galois}}\DIFaddend . The property in equation~\ref{eq1} must hold for $A$ to be embeddable in $B$. However, the \DIFdelbegin \DIFdel{property in equation~\ref{eq2} does not necessarily hold}\DIFdelend \DIFaddbegin \DIFadd{reverse compositions of $proj . abs$ is not necessarily $id_B$}\DIFaddend .   

\begin{figure}[t]
\begin{equation}\label{eq1}
abs . proj = id\DIFaddbeginFL \DIFaddFL{_A
}\DIFaddendFL \end{equation}
\DIFaddbeginFL \end{figure}
\DIFaddend 

\DIFdelbegin \begin{displaymath}\DIFdel{%DIFDELCMD < \label{eq2}%%%
proj . abs \neq id
}\end{displaymath}
%DIFAUXCMD
\DIFdelend \DIFaddbegin \begin{figure}[h]
	\begin{center}
		\includegraphics[scale=.5]{graphVis/Chapter3/galoisConn.png}
	\end{center}
	\caption{\DIFaddFL{The relationship between the source type $A$ and target type $B$ and the respective projection and abstraction functions.}}
	\label{galois}
\DIFaddendFL \end{figure}

Intuitively this means that it needs to be possible to retrieve the $A$ type from a $B$ exactly as it was when it was projected into the new type. The reverse does not need to be true because any values of type $B$ were introduced by the refactoring so any information lost converting a $B$ back into an $A$ wasn't in the original program and so does not need to be preserved to preserve behaviour.

In the "list to Hughes list" case the projection function is \texttt{fromList} (because it embeds lists into the new type \texttt{DList}) and the abstraction function is \texttt{toList}. How to define the set of pairs is an interesting problem with multiple solutions. The \texttt{Data.DList} module exports the  \DIFdelbegin \DIFdel{following functions ~}\DIFdelend \DIFaddbegin \DIFadd{functions shown in Table~\ref{dlistApi}~}\DIFaddend \citep{dlist}.


\begin{table}
\begin{center}
\begin{tabular}{| c | c | c |}
  \hline
  \texttt{apply} & \texttt{empty} & \texttt{singleton}\\
  \hline
  \texttt{cons} & \textbf{\texttt{snoc}} & \texttt{append} \\
  \hline
  \texttt{concat} & \texttt{replicate} & \textbf{\texttt{list}}\\ 
  \hline	
  \texttt{head} & \texttt{tail} & \texttt{unfoldr}\\ 
  \hline  
  \texttt{foldr} & \texttt{map} & \\
  \hline
\end{tabular}
\end{center}
\caption{The DList API}
\DIFaddbeginFL \label{dlistApi}
\DIFaddendFL \end{table}


From this list the two bold functions are the only functions without normal list counterparts. In the case of \texttt{snoc}, which appends a single element to the end of a \texttt{DList}, \DIFdelbegin \DIFdel{there is no }\DIFdelend \DIFaddbegin \DIFadd{a }\DIFaddend normal list equivalent \DIFdelbegin \DIFdel{presumably because Hughes lists are specifically designed for append-heavy operations so making a function specifically for appending a single element onto the end of a }\texttt{\DIFdel{DList}} %DIFAUXCMD
\DIFdel{is useful}\DIFdelend \DIFaddbegin \DIFadd{is not provided by the prelude because of the linear time performance of this operation}\DIFaddend . The \texttt{list} \DIFaddbegin \DIFadd{function}\DIFaddend , shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{dListList}, makes up for the lack of pattern matching over \texttt{DList}s. \DIFaddbegin \DIFadd{The first two arguments to }\texttt{\DIFadd{list}} \DIFadd{are values that handle the two standard pattern match cases over lists, an empty list, and when the list contains at least a single element. The third argument to }\texttt{\DIFadd{list}}\DIFadd{, ``}\texttt{\DIFadd{dl}}\DIFadd{'', is the difference list that pattern matching should be performed on. If }\texttt{\DIFadd{dl}} \DIFadd{is empty than }\texttt{\DIFadd{list}}\DIFadd{'s first argument ``}\texttt{\DIFadd{nill}}\DIFadd{'' is returned. In the other case }\texttt{\DIFadd{list}} \DIFadd{will pass the head and the tail of }\texttt{\DIFadd{dl}} \DIFadd{to it's second argument a function that performs the computation when the list is non-empty.
}\DIFaddend 

\begin{figure}[t]
\begin{lstlisting}
list :: b -> (a -> DList a -> b) -> DList a -> b
list nill consit dl =
  case toList dl of
    [] -> nill
    (x : xs) -> consit x (fromList xs)
\end{lstlisting}
\caption{The definition of \texttt{list} from~\citep{dlist}}
\label{dListList}
\end{figure}

The rest of the \texttt{DList} API could be paired with equivalent list functions. However, \DIFdelbegin \DIFdel{its }\DIFdelend \DIFaddbegin \DIFadd{it's }\DIFaddend not necessarily a good idea for every normal list function to be \DIFdelbegin \DIFdel{refactoring }\DIFdelend \DIFaddbegin \DIFadd{refactored }\DIFaddend to its \texttt{DList} equivalent. As was mentioned \DIFaddbegin \DIFadd{earlier, }\DIFaddend certain \texttt{DList} functions are less efficient than the corresponding normal function, \DIFaddbegin \DIFadd{so }\DIFaddend the primary purpose of this refactoring is defeated if the refactored code runs slower than the original source.

Fortunately different versions of the refactoring can be made by defining separate sets depending on the behaviour that is desired. For example one set could only include the \texttt{DList} constant time operations (\texttt{append}, \texttt{empty}, and \texttt{cons}) and another set could include all possible pairings \DIFaddbegin \DIFadd{of operations in Table~\ref{dlistApi}}\DIFaddend .

\DIFdelbegin \DIFdel{The obvious choice for the set is to just include every possible pairing. However, it may be beneficial to limit which functions can be replaced based on the }\DIFdelend \DIFaddbegin \DIFadd{Which functions should be changed is highly dependent on the relationship between the source and target types, the }\DIFaddend motivation for the refactoring\DIFdelbegin \DIFdel{and what list functions are being used in }\DIFdelend \DIFaddbegin \DIFadd{, and how }\DIFaddend the source program \DIFdelbegin \DIFdel{. In the }\DIFdelend \DIFaddbegin \DIFadd{is constructed. The }\DIFaddend list to Hughes \DIFdelbegin \DIFdel{list case this refactoring is performed to increase the performance of append operations. In this case it may be beneficial to only allow append to be swapped for the DList append. Another possibility in this case would be to only allow the }\texttt{\DIFdel{DList}} %DIFAUXCMD
\DIFdel{functions }\DIFdelend \DIFaddbegin \DIFadd{replacement is motivated for performance reasons so limiting the replacements to only operations }\DIFaddend that run in constant time \DIFdelbegin \DIFdel{to be introduced. }\footnote{\texttt{\DIFdel{empty}}%DIFAUXCMD
\DIFdel{, }\texttt{\DIFdel{cons}}%DIFAUXCMD
\DIFdel{, and }\texttt{\DIFdel{append}}%DIFAUXCMD
}%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{. 
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The composition of the set of functions is highly dependent on the relationship between the source and target types and which functions the source program is using. For this reason the }\DIFdelend \DIFaddbegin \DIFadd{makes sense. If the refactoring is being used to completely upgrade a system from using one type to another then the programmer will want to make sure that every function used by the source type can be replaced. The }\DIFaddend implementation of this refactoring in HaRe is as adaptable as possible so that users can customize \DIFdelbegin \DIFdel{the set for their own }\DIFdelend \DIFaddbegin \DIFadd{it to meet their specific }\DIFaddend needs (see Chapter~\DIFdelbegin \DIFdel{\ref{generalImp}}\DIFdelend \DIFaddbegin \DIFadd{\ref{chp:generalImp}}\DIFaddend ).


\DIFdelbegin \subsubsection{\DIFdel{Transforming functions}}
%DIFAUXCMD
\addtocounter{subsubsection}{-1}%DIFAUXCMD
\DIFdelend \DIFaddbegin \subsection{Refactoring functions to use Hughes lists}
\DIFaddend 

\DIFdelbegin \DIFdel{The refactoring breaks into three different cases depending on the type }\DIFdelend \DIFaddbegin \DIFadd{For functional programmers, lists are a very familiar and versatile data structure. However, if an application requires repeated appends as described at the start of this section their performance becomes an issue. Difference lists provide a similar interface to lists but without this troublesome behaviour and provide projection (}\texttt{\DIFadd{fromList}}\DIFadd{) and abstraction (}\texttt{\DIFadd{toList}}\DIFadd{) functions between themselves and normal lists. This section will describe a refactoring to convert programs written using normal lists to use difference lists instead. 
}


\DIFadd{The refactoring is different depending on which types }\DIFaddend of the target function \DIFdelbegin \DIFdel{. 
}\DIFdelend \DIFaddbegin \DIFadd{are lists. There are three different cases:
}\DIFaddend 

\DIFdelbegin \DIFdel{In introduce type synonym refactoring from section~\ref{introSyn} there was a point when the refactoring required the user to tell HaRe where the new synonym should be used because the tool does not, and indeed cannot, know what is the user's intention for the synonym.
Many data-driven refactorings need to either make assumptions about what the user's intent is or to directly solicit information from them. This refactoring is no different: }\DIFdelend \DIFaddbegin \begin{enumerate}
		\item \DIFadd{A list is the type of parameters only 
		}\item \DIFadd{A list is the result type
		}\item \DIFadd{Both the result type and at least one parameter are lists
}\end{enumerate} 

\DIFadd{Each of these cases differs in the way that the Hughes list type is introduced to the function. In the first case the type changes begin at the leaves and the refactoring needs to fix any type errors from the bottom towards the top of the tree. When the result type is the only list instance in the type signature the opposite process must happen, the new type needs to be ``pushed'' down the AST from the root. In the final case the refactoring will attempt to convert the entire AST to use Hughes lists.
}

\DIFadd{Though the general scheme the refactoring takes to transform the target function is simple to understand, how the refactoring actually goes about transforming is more challenging. This is because }\DIFaddend for any given function there are multiple possible ``correct" definitions the refactoring could produce. Take for example the function \texttt{insComma} in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{insComma}.

\begin{figure}[t]
\begin{lstlisting}
insComma :: String -> String -> String
insComma s1 s2 = s1 ++ "," ++ s2
\end{lstlisting} 
\caption{\texttt{insComma}}
\label{insComma}
\end{figure} 

When refactoring both of the arguments and the result type of \texttt{insComma} to become \texttt{DList Char} there are multiple ways to refactor this function. Figure~\ref{commaRef} shows two possibilities\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{; }\DIFaddend which one should the refactoring produce and why?

To make this decision, the refactoring is designed to reflect the assumption that because this function is being refactored to use the new type\DIFaddbegin \DIFadd{, }\DIFaddend the user wants the new type to be used in as many places as possible. This makes the first definition preferable to the second one because it only converts a single item using \texttt{fromList} and replaces the appends\DIFaddbegin \DIFadd{, }\DIFaddend whereas the second example converts the two arguments into lists\DIFaddbegin \DIFadd{, }\DIFaddend appends everything together and then converts the result back into a \texttt{DList}. The refactoring prioritises minimising the \DIFdelbegin \DIFdel{amount }\DIFdelend \DIFaddbegin \DIFadd{number }\DIFaddend of conversion introduced into the refactored program.  

\begin{figure}[t]
\begin{lstlisting}
insComma_1 :: DList Char -> DList Char -> DList Char
insComma_1 s1 s2 = s1 `append` fromList (",") `append` s2

insComma_2 :: DList Char -> DList Char -> DList Char
insComma_2 s1 s2 = fromList ((toList s1) ++ (",") ++ (toList s2))
\end{lstlisting}
\caption{Two possible refactorings for \texttt{insComma}}
\label{commaRef}
\end{figure}

\DIFdelbegin %DIFDELCMD < \subsection{Modifying the Type of a Parameter}
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \subsection{Modifying the Type of an Input}
\DIFaddend 

The simplest case of this refactoring is modifying the type of a parameter. Consider the example in \DIFdelbegin \DIFdel{figure~\ref{median} }\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{mean} }\DIFaddend that calculates the \DIFdelbegin \DIFdel{median }\DIFdelend \DIFaddbegin \DIFadd{mean }\DIFaddend of list of numbers. 

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < median :: Fractional a => [a] -> a
%DIFDELCMD < median lst = foldr (+) 0 lst / length lst
%DIFDELCMD < \end{lstlisting} 
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
mean :: Fractional a => [a] -> a
mean lst = foldr (+) 0 lst / length lst
\end{lstlisting} 
\DIFaddendFL \caption{Calculating \DIFaddbeginFL \DIFaddFL{the mean of }\DIFaddendFL a \DIFdelbeginFL \DIFdelFL{median}\DIFdelendFL \DIFaddbeginFL \DIFaddFL{list.}\DIFaddendFL }
\DIFdelbeginFL %DIFDELCMD < \label{median}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \label{mean}
\DIFaddendFL \end{figure}

Refactoring \texttt{\DIFdelbegin \DIFdel{median}\DIFdelend \DIFaddbegin \DIFadd{means}\DIFaddend }' \DIFdelbegin \DIFdel{s }\DIFdelend first argument to become a Hughes list is fairly straight forward matter of wrapping the, now of type \texttt{DList}, parameter \texttt{lst} with the abstraction function \texttt{toList}. The refactored version of \texttt{\DIFdelbegin \DIFdel{median}\DIFdelend \DIFaddbegin \DIFadd{mean}\DIFaddend } is in \DIFdelbegin \DIFdel{figure~\ref{medianRef}. The }\texttt{\DIFdel{DList}} %DIFAUXCMD
\DIFdel{functions }\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{meanRef}. The functions from the }\texttt{\DIFadd{DList}} \DIFadd{library }\DIFaddend have been prefaced with the \DIFaddbegin \DIFadd{``}\DIFaddend \texttt{DL}\DIFdelbegin \DIFdel{qualifier }\DIFdelend \DIFaddbegin \DIFadd{'' qualifier, }\DIFaddend to differentiate which functions come from which API. This convention will be used through the remainder of the section. \DIFaddbegin \DIFadd{You'll also notice that }\texttt{\DIFadd{toList lst}} \DIFadd{is calculated twice; this is a good candidate for further refactoring by extracting this expression into a }\texttt{\DIFadd{let}} \DIFadd{or a }\texttt{\DIFadd{where}} \DIFadd{clause. 
}\DIFaddend 

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < median :: Fractional a => DList a -> a
%DIFDELCMD < median lst = foldr (+) 0 (DL.toList lst) / length (DL.toList lst)
%DIFDELCMD < \end{lstlisting}  
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
mean :: Fractional a => DList a -> a
mean lst = foldr (+) 0 (DL.toList lst) / length (DL.toList lst)
\end{lstlisting}  
\DIFaddendFL \caption{\DIFdelbeginFL \texttt{\DIFdelFL{median}} %DIFAUXCMD
\DIFdelendFL \DIFaddbeginFL \texttt{\DIFaddFL{mean}} \DIFaddendFL refactored}
\DIFdelbeginFL %DIFDELCMD < \label{medianRef} 
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \label{meanRef} 
\DIFaddendFL \end{figure}

This example is one of the cases where there are multiple possible refactorings. Figure~\DIFdelbegin \DIFdel{\ref{medianRef2} }\DIFdelend \DIFaddbegin \DIFadd{\ref{meanRef2} }\DIFaddend shows a different version of a refactored \DIFdelbegin \DIFdel{median, because }\DIFdelend \DIFaddbegin \texttt{\DIFadd{mean}}\DIFadd{. Because }\DIFaddend \texttt{foldr} is defined both for difference lists and normal lists\DIFaddbegin \DIFadd{, }\DIFaddend the abstraction function could be added around that expression instead. Which version should the refactoring produce?

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < median :: Fractional a => DList a -> a
%DIFDELCMD < median lst = DL.toList (DL.foldr (+) 0 lst) / length (DL.toList lst)
%DIFDELCMD < \end{lstlisting}  
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
mean :: Fractional a => DList a -> a
mean lst = (DL.foldr (+) 0 lst) / length (DL.toList lst)
\end{lstlisting}  
\DIFaddendFL \caption{\DIFdelbeginFL \texttt{\DIFdelFL{median}} %DIFAUXCMD
\DIFdelendFL \DIFaddbeginFL \texttt{\DIFaddFL{mean}} \DIFaddendFL refactored another way.}
\DIFdelbeginFL %DIFDELCMD < \label{medianRef2}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \label{meanRef2}
\DIFaddendFL \end{figure}  

In this case\DIFaddbegin \DIFadd{, }\DIFaddend where the refactoring targets a parameter and the \DIFdelbegin \DIFdel{function's }\DIFdelend result is a type other than a Hughes list\DIFaddbegin \DIFadd{, }\DIFaddend the refactoring assumes that the programmer wants the Hughes list converted as soon as possible in the function \DIFaddbegin \DIFadd{so the }\texttt{\DIFadd{fromList}} \DIFadd{conversions will be wrapped around the parameters}\DIFaddend . 

\subsection{Modifying the Result Type}
\label{hugesListResTy}

The second example covers the case where the result type of a function is changed to \texttt{DList} instead of list. Figure~\ref{enumBefore} contains the definition of a simple algebraic data type of a tree, a function (\texttt{enumerate}) that returns an in-order list of all the tree's elements, and a function that prints a tree's enumeration to standard output. 

This time the refactoring will affect the result type of the function. As opposed to the previous example where changes occurred at the leaves of the abstract syntax tree this case of the refactoring changes the type of the AST's root. 

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < data Tree a = Leaf
%DIFDELCMD <             | Node (Tree a) a (Tree a)
%DIFDELCMD < 

%DIFDELCMD < enumerate :: Tree a -> [a]
%DIFDELCMD < enumerate Leaf = []
%DIFDELCMD < enumerate (Node left x right) = (enumerate left) ++ [x] ++ (enumerate right)
%DIFDELCMD < 

%DIFDELCMD < printEnumTree :: (Show a) => Tree a -> IO ()
%DIFDELCMD < printEnumTree tree = let lst = enumerate tree in
%DIFDELCMD <   print lst
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
data Tree a = Leaf
                | Node (Tree a) a (Tree a)

enumerate :: Tree a -> [a]
enumerate Leaf = []
enumerate (Node left x right) = (enumerate left) ++ [x] ++ (enumerate right)

printEnumTree :: (Show a) => Tree a -> IO ()
printEnumTree tree = let lst = enumerate tree in
  print lst
\end{lstlisting}
\DIFaddendFL \caption{Definition of enumerate}
\label{enumBefore}
\end{figure} 

Changing the type of the AST's root requires the refactoring to traverse the tree top down from left to right. This is because the result type of any syntax tree is determined by the function (or value in the case of tree with only a single node) in the leftmost child. 

\texttt{enumerate}'s first case is simple enough to refactor. There is only a single value in the tree the empty list literal. This node's current type is \texttt{[a]} and it needs to become \texttt{DList a}. When the refactoring reaches the \texttt{[]} value it searches to see if it is paired with some difference list operation in the set of pairs. The empty list literal is paired with the difference list operation \texttt{empty}, the refactoring sees this and replaces the empty list with \texttt{DL.empty :: DList a}.

After the replacement of \texttt{[]}\DIFaddbegin \DIFadd{, }\DIFaddend \texttt{enumerate}'s first case is successfully refactored. The second case of \texttt{enumerate} is more complex than the first. Figure~\ref{enumAST} shows \DIFdelbegin \DIFdel{a }\DIFdelend \DIFaddbegin \DIFadd{the }\DIFaddend syntax tree for \texttt{enumerate}'s second case. 

\begin{figure}[h]
	\begin{center}
		\includegraphics[scale=.5]{graphVis/Chapter3/enumerate.png}
	\end{center}
	\caption{A simplified syntax tree of \texttt{enumerate}'s second case.}
	\label{enumAST}
\end{figure}

The refactoring begins the traversal at the top of the syntax tree with the goal of modifying the entire tree to have a result type of \texttt{DList a}. The root node of the tree is the operator application of the left-hand append operation. \DIFdelbegin \DIFdel{This append's result type }\DIFdelend \DIFaddbegin \DIFadd{The result type of this instance of append }\DIFaddend is the type that determines the whole tree's result type. The standard append operation is paired with the difference list \texttt{DL.append} operation. The refactoring checks to ensure that the difference list append has the correct result type and\DIFaddbegin \DIFadd{, }\DIFaddend because it does\DIFaddbegin \DIFadd{, }\DIFaddend makes the replacement.\footnote{The refactoring will automatically make \texttt{DL.append} infix by surrounding the call with backtick characters since this replacement is modifying an operator application.} After replacing \texttt{++} with \texttt{DL.append}\DIFaddbegin \DIFadd{, }\DIFaddend the result type is correct but the function will no longer type check because \texttt{++} and \texttt{DL.append}'s arguments are not the same types. 

The refactoring then must recurse down both the left and right subtrees to change their types from \texttt{[a]} (the type of \texttt{(++)}'s arguments) to \texttt{DList a} (the type \texttt{append}'s arguments). 

\begin{figure}[h]
	\begin{center}
		\includegraphics[scale=.5]{graphVis/Chapter3/enumLeft.png}
	\end{center}
	\caption{The left subtree of \texttt{enumerate}'s second case.}
	\label{enumLeft}
\end{figure}

The left subtree of the root node is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{enumLeft}. The refactoring can descend through the parentheses (represented by the \texttt{HsPar} constructor); the refactoring continues down the left side of the function application (\texttt{HsApp}). When the refactoring encounters the call to \texttt{enumerate} it recognises that this is the recursive call and even though the type of \texttt{enumerate} stored in the syntax tree is \texttt{Tree a -> [a]} after the refactoring its type will be \texttt{Tree a -> DList a} which is the correct result type  for this subtree. The refactoring can now confirm that the left subtree is of type \texttt{DList a} without checking the right side of the application because the refactoring didn't change the type of \texttt{enumerate}'s arguments.

\begin{figure}[h]
	\begin{center}
		\includegraphics[scale=.5]{graphVis/Chapter3/enumRight.png}
	\end{center}
	\caption{The right subtree of \texttt{enumerate}'s second case.}
	\label{enumRight}
\end{figure}

After refactoring the left subtree\DIFaddbegin \DIFadd{, }\DIFaddend the right subtree needs to be modified to be of type \texttt{DList a} as well. Shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{enumRight}, the right subtree's root is \DIFdelbegin \DIFdel{the }\DIFdelend \DIFaddbegin \DIFadd{a }\DIFaddend second call to \texttt{(++)}. Once again the refactoring replaces \texttt{++} with \texttt{DL.append} because \texttt{append}'s result type is also \texttt{DList a}. Doing this replacement sets off additional traversals that \DIFdelbegin \DIFdel{need }\DIFdelend \DIFaddbegin \DIFadd{are designed }\DIFaddend to ensure that the two arguments to the root node become typed \texttt{DList a} as well. \DIFaddbegin \DIFadd{This is possible because the original program is well-typed and the arguments to append are of type list so at the very least the arguments can be converted to }\texttt{\DIFadd{DList}}\DIFadd{s with }\texttt{\DIFadd{fromList}}\DIFadd{. }\DIFaddend The right argument to the root of this subtree is the \texttt{(enumerate right)} expression which is handled in the same way the \texttt{(enumerate left)} call was handled. The left subtree of \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{enumRight}  is the list literal \texttt{[x]}. This can be replaced with a call to \texttt{(DL.singleton :: a -> DList a)} the equivalent difference list function. At this point the refactoring is finished modifying \DIFaddbegin \DIFadd{the definition of }\DIFaddend \texttt{enumerate} and the result can be seen in listing~\ref{enumRef}.

\begin{figure}[t]
\begin{lstlisting}
enumerate :: Tree a -> DList a
enumerate Leaf = empty
enumerate (Node left x right) = (enumerate left) `append` (singleton x) `append` (enumerate right)
\end{lstlisting}
\caption{The refactored definition of \texttt{enumerate}}
\label{enumRef}
\end{figure}

The refactoring \DIFdelbegin \DIFdel{isn't }\DIFdelend \DIFaddbegin \DIFadd{is not }\DIFaddend finished yet, however. The original definition contained in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{enumBefore} had another function \texttt{printEnumTree} that depended on \texttt{enumerate}. The final modification that needs to happen to this example is to wrap all calls to \texttt{enumerate} in \textit{non-refactored} definitions with the abstraction function to convert the result back to a list. The final product of the refactoring can be seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{enumFinal}.

\begin{figure}[t]
\begin{lstlisting}
data Tree a = Leaf
            | Node (Tree a) a (Tree a)

enumerate :: Tree a -> DList a
enumerate Leaf = empty
enumerate (Node left x right) = (enumerate left) `append` (singleton x) `append` (enumerate right)

printEnumTree :: (Show a) => Tree a -> IO ()
printEnumTree tree = let lst = toList (enumerate tree) in
  print lst
\end{lstlisting}
\caption{The final product of the refactoring}
\label{enumFinal}
\end{figure}

\subsection{Modifying Parameter and Result Types}

The final case of this refactoring involves modifying both the result type and one (or more) of the parameters of the target function. This example will use the \texttt{explode} function from \DIFdelbegin \DIFdel{figure~\ref{explode} to }\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{explode} and will }\DIFaddend refactor both its argument and result type to become \texttt{DList a}. \DIFdelbegin %DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < explode :: [a] -> [a]
%DIFDELCMD < explode lst = concat (map (\x -> replicate (length lst) x) lst)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{The initial definition of }\texttt{\DIFdelFL{explode}}%DIFAUXCMD
}
%DIFAUXCMD
%DIFDELCMD < \label{explode}
%DIFDELCMD < \end{figure}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \texttt{\DIFadd{explode}} \DIFadd{is a silly function that replicates each of its elements a number of times equal to the length of the list, for example, }\texttt{\DIFadd{explode }[\DIFadd{1,2,3}]} \DIFadd{returns }\texttt{[\DIFadd{1,1,1,2,2,2,3,3,3}]}\DIFadd{. This is an odd function but is has two characteristics we are interested in, (a), both its parameter and result type are of type list and, (b), it uses the }\texttt{\DIFadd{map}} \DIFadd{higher ordered function which is an interesting case for this refactoring. }\DIFaddend The abstract syntax tree of \texttt{explode} is in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{explode}.

\DIFaddbegin \begin{figure}[t]
\begin{lstlisting}
explode :: [a] -> [a]
explode lst = concat (map (\x -> replicate (length lst) x) lst)
\end{lstlisting}
\caption{\DIFaddFL{The initial definition of }\texttt{\DIFaddFL{explode}}}
\label{explode}
\end{figure}


\DIFaddend \begin{figure}[h!]
	\begin{center}
		\includegraphics[scale=.5]{graphVis/Chapter3/explode.png}
	\end{center}
	\caption{A simplified representation of \texttt{explodes}'s syntax tree}
	\label{explode}
\end{figure}

Much like the previous case where just the result type was modified, the refactoring will start working on the syntax tree in a top-down manner modifying the function definition to have the correct result type. This case could also be implemented in a bottom-up manner \DIFaddbegin \DIFadd{but }\DIFaddend the top-down method allows for more code reuse from the implementation of the case when the result type is modified.

The refactoring starts on the left subtree with the call to\DIFaddbegin \DIFadd{: 
}

\DIFaddend \texttt{concat :: Foldable t => t [a] -> [a]}. 
\DIFaddbegin 

\DIFaddend The equivalent difference list function is\DIFaddbegin \DIFadd{: 
}

\DIFaddend \texttt{DL.concat :: Foldable t => t (DList a) -> DList a} 
\DIFdelbegin \DIFdel{since }\DIFdelend \DIFaddbegin 

\DIFadd{Since }\DIFaddend the result type of this version of \texttt{DL.concat} is \texttt{DList a} the refactoring performs the switch between \DIFdelbegin \DIFdel{which function to use}\DIFdelend \DIFaddbegin \DIFadd{the two functions}\DIFaddend . 

After the change on the left subtree\DIFaddbegin \DIFadd{, }\DIFaddend the refactoring needs to modify the right subtree so that it is of type \texttt{Foldable t => t (DList a)} rather than its current type of \texttt{Foldable t => t [a]}. The leftmost child of the right subtree is the call to \texttt{map :: (a -> b) -> [a] -> [b]}. The difference list equivalent map is appropriately typed \texttt{DL.map :: (a -> b) -> DList a -> DList b}. Should the refactoring change this node to the difference list version? And if so what changes will be need to made to other subtrees?

The refactoring will swap this node out if \texttt{Foldable t => t (DList a)} (the type of \texttt{DL.concat}'s parameter) can be the same type as \texttt{DList b}\DIFaddbegin \DIFadd{~}\DIFaddend (\texttt{DL.map}'s result type); since \texttt{DList} is a member of the \texttt{Foldable} type class\DIFaddbegin \DIFadd{, }\DIFaddend the swap can happen as long as the \texttt{b} type variable in \texttt{map}'s type is equal to \texttt{DList a}. This node's type after the swap and filling in the known type variables is: 
\DIFaddbegin 

\DIFaddend \texttt{DL.map :: (a -> DList b) -> DList a -> DList (DList b)}
\DIFdelbegin \DIFdel{this }\DIFdelend \DIFaddbegin 

\DIFadd{This }\DIFaddend new type's arguments' types are both different from the original types so the refactoring will need to check both of these subtrees as well. 

\begin{figure}[h]
	\DIFdelbeginFL %DIFDELCMD < \label{explodeLam}
%DIFDELCMD < 	%%%
\DIFdelendFL \begin{center}
		\includegraphics[scale=.5]{graphVis/Chapter3/explodeLam.png}
	\end{center}
	\caption{The syntax tree of the lambda expression in \texttt{explode}.}\DIFaddbeginFL \label{explodeLam}
\DIFaddendFL \end{figure}

Starting with the lambda expression (the syntax tree in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{explodeLam}) the refactoring needs to modify \DIFdelbegin \DIFdel{it}\DIFdelend \DIFaddbegin \DIFadd{this expression}\DIFaddend 's type to become \texttt{a -> DList b}\footnote{\texttt{a} and \texttt{b} could be the same type.}. The current type of the lambda expression is \texttt{a -> [a]} so the refactoring only needs to modify its result type. The refactoring can then proceed to the left-most child of this expression, the call to \texttt{replicate :: Int -> a -> [a]}. The refactoring swaps this call for the difference list version of \texttt{DL.replicate :: Int -> a -> DList a}. 

If this refactoring were only modifying the result type of \texttt{explode}\DIFaddbegin \DIFadd{, }\DIFaddend the refactoring would be done \DIFdelbegin \DIFdel{modifying }\DIFdelend \DIFaddbegin \DIFadd{working with }\DIFaddend the lambda expression because the type change affects the syntax tree in a top down manner. In this case, however, because  the type of \texttt{explode}'s argument was also changed\DIFaddbegin \DIFadd{, }\DIFaddend so leaves of the syntax tree can also have changed type. This means that every subtree needs to be checked to ensure that it still type checks.

The untouched sections of this expression's syntax tree are \texttt{replicate}'s arguments. The second argument is just the variable \texttt{x} which hasn't changed type. The first argument on the other hand is the expression \texttt{(length lst)} and \texttt{lst} is the argument of \texttt{explode} that is now a difference list rather than a normal list. Ideally the refactoring would rewrite this expression by replacing the normal list functions (\texttt{length} in this case) with difference list equivalents. Unfortunately there is no difference list version of \texttt{length} so the refactoring will have to introduce the abstraction function to convert back to a normal list to calculate its length. 

The lambda expression has now been successfully refactored. The rewritten lambda expression is in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{lamRef}.\footnote{To help clarify which functions are for lists and which are the difference list versions, all difference list functions have been qualified with the \DIFdelbegin \texttt{\DIFdel{DList}} %DIFAUXCMD
\DIFdelend \DIFaddbegin \texttt{\DIFadd{DL}} \DIFaddend identifier.}

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < (\x -> DList.replicate (length (DList.toList lst)) x)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
(\x -> DL.replicate (length (DL.toList lst)) x)
\end{lstlisting}
\DIFaddendFL \caption{The refactored lambda expression.}
\label{lamRef}
\end{figure}


Once refactoring the lambda expression is finished there is only a single node of \texttt{explode} that the refactoring hasn't touched yet, the use of \texttt{lst} as the second argument of \texttt{map}. When the refactoring swapped \texttt{map} for its difference list version it was able to determine the new type of both its arguments, \texttt{(a -> DList b)} for the first argument (the lambda expression) and \texttt{DList a} (\texttt{lst}). Fortunately because \texttt{lst} is a target of the refactoring\DIFaddbegin \DIFadd{, }\DIFaddend its new type is \texttt{DList a} so this node can remain unchanged.

\begin{figure}[t]
\begin{lstlisting}
import qualified Data.DList as DL
import Data.DList (DList)

explode :: DList a -> DList a
explode lst = DL.concat 
	(DL.map 
		(\x -> DL.replicate (length (DL.toList lst)) x) 
		lst)
\end{lstlisting} 
\caption{The final refactored result of \texttt{explode}.}
\label{explodeFinal}
\end{figure}

The final result of this case of the Hughes list refactoring is in listing~\ref{explodeFinal}.

\section{Summary}

This chapter has introduced the concept of data-driven refactorings for the object-oriented programming paradigm and described three of the data-driven refactorings developed for this thesis. The first, \DIFaddbegin \DIFadd{``}\DIFaddend introducing a type signature\DIFdelbegin \DIFdel{, creates }\DIFdelend \DIFaddbegin \DIFadd{'', creates an }\DIFaddend additional abstraction to better describe what types are representing. \DIFdelbegin \DIFdel{Generalise Maybe}\DIFdelend \DIFaddbegin \DIFadd{``Generalising Maybe'' }\DIFaddend takes code written for a specific type and generalises it so that it is applicable to more types. Finally the \DIFaddbegin \DIFadd{``}\DIFaddend list to Hughes list\DIFaddbegin \DIFadd{'' }\DIFaddend refactoring replaces one type with another equivalent type. This refactoring allows a project to be retyped mid-development because it \DIFdelbegin \DIFdel{'s }\DIFdelend \DIFaddbegin \DIFadd{is }\DIFaddend not always clear \DIFdelbegin \DIFdel{from }\DIFdelend \DIFaddbegin \DIFadd{at }\DIFaddend the beginning of a project what the correct data representation should be.

The following chapters will expand on some of these ideas. The next chapter will describe another form of generalisation that rewrites monadic code into its equivalent applicative functor code. The applicative code can be much cleaner and descriptive in certain cases. It can also allow programmers to take advantage of a different way of executing their programs. Chapter~\DIFdelbegin \DIFdel{\ref{chap:monadification} }\DIFdelend \DIFaddbegin \DIFadd{\ref{chp:monadification} }\DIFaddend discusses introducing effectful abstractions into pure code. This process helps \DIFdelbegin \DIFdel{a code base }\DIFdelend \DIFaddbegin \DIFadd{to }\DIFaddend add additional features \DIFaddbegin \DIFadd{to a code base}\DIFaddend , such as shared state, mid-development.

\chapter{Implementing Data-Driven Refactorings in HaRe}
\DIFdelbegin %DIFDELCMD < \label{generalImp}
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \label{chp:generalImp}
\DIFaddend 

\renewcommand{\topfraction}{1}
\renewcommand{\floatpagefraction}{1}

The previous chapter described \DIFdelbegin \DIFdel{two refactorings, }\DIFdelend \DIFaddbegin \DIFadd{three refactorings, ``introducing a type synonym,'' ``}\DIFaddend generalising \texttt{Maybe}\DIFdelbegin \DIFdel{and the }\DIFdelend \DIFaddbegin \DIFadd{,'' and the ``}\DIFaddend list to Hughes list refactorings\DIFaddbegin \DIFadd{''}\DIFaddend . This chapter will describe how refactorings are implemented in HaRe. \DIFaddbegin \DIFadd{This chapter is organised in a similar way to the previous one. Beginning with }\DIFaddend Section~\ref{sec:genMaybeImp} \DIFaddbegin \DIFadd{which }\DIFaddend describes the implementation of the generalising \texttt{Maybe} refactoring \DIFdelbegin \DIFdel{which was first covered in section}\DIFdelend \DIFaddbegin \DIFadd{that was discussed in Section}\DIFaddend ~\ref{genMaybe}. Section~\ref{sec:hughesListImp} covers the implementation of the \DIFaddbegin \DIFadd{``}\DIFaddend list to Hughes list\DIFaddbegin \DIFadd{'' }\DIFaddend refactoring (see \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{listToDlist}) and the embeddable type refactoring system that it is made with. \DIFdelbegin \DIFdel{The }\DIFdelend \DIFaddbegin \DIFadd{In addition to discussing the implementation of the refactorings from Chapter~\ref{chp:ddRefs} this }\DIFaddend chapter also covers the enhancements that HaRe's API has undergone while developing the refactorings discussed in this thesis. Section~\ref{hareAPI} discusses the enhancements I made to HaRe's API.

\section{Implementation of the ``Generalising Maybe" refactoring}
\label{sec:genMaybeImp}

The design of the generalising \texttt{Maybe} refactoring was discussed in the previous chapter (see \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{genMaybe}). This section will describe its implementation in the HaRe refactoring tool. 

This refactoring tries to generalise something of type \texttt{Maybe} to become either of type \texttt{Monad} or \texttt{MonadPlus}. The implementation of this refactoring attempts to produce the ``most general" version of the source program. This means that when possible the refactoring will replace the targetted \texttt{Maybe} type with a value of type \texttt{Monad}\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{; }\DIFaddend if this is not possible the value will be of type \texttt{MonadPlus}, and finally if that also \DIFdelbegin \DIFdel{isn't }\DIFdelend \DIFaddbegin \DIFadd{is not }\DIFaddend possible the refactoring cannot continue and the source and target programs of the refactoring are identical.

\DIFaddbegin \DIFadd{The implementation of the ``Generalising Maybe'' refactoring in HaRe attempts to convert the }\texttt{\DIFadd{Maybe}} \DIFadd{types in a function to the ``most general'' type class possible. This means that if the }\texttt{\DIFadd{Maybe}} \DIFadd{type can be replaced with }\texttt{\DIFadd{Monad}} \DIFadd{it will be, otherwise the function will be rewritten using }\texttt{\DIFadd{MonadPlus}} \DIFadd{instead. First the refactoring will replace all of the }\texttt{\DIFadd{Maybe}} \DIFadd{constructors in right hand side expressions with the corresponding }\texttt{\DIFadd{MonadPlus}} \DIFadd{operation (}\texttt{\DIFadd{Just}} \DIFadd{becomes }\texttt{\DIFadd{return}} \DIFadd{and }\texttt{\DIFadd{Nothing}} \DIFadd{becomes }\texttt{\DIFadd{mempty}}\DIFadd{). At this point the refactoring checks if the target function's computations were done entirely inside of }\texttt{\DIFadd{Maybe}}\DIFadd{, that is the function receives a }\texttt{\DIFadd{Maybe}} \DIFadd{value as a parameter, returns a value of type }\texttt{\DIFadd{Maybe}}\DIFadd{, and handles the }\texttt{\DIFadd{Nothing}} \DIFadd{constructor by just returning }\texttt{\DIFadd{Nothing}}\DIFadd{; when all of these things are true the target function is really just replicating the functionality of }\texttt{\DIFadd{Maybe}}\DIFadd{'s bind instance. In this case the refactoring rewrites the function body to use bind instead. Finally depending on how generalisable the target function was, the refactoring will update the type signature by replacing the }\texttt{\DIFadd{Maybe}}\DIFadd{s with ``}\texttt{\DIFadd{m}}\DIFadd{'' type variables that are bound the appropriate type class, either }\texttt{\DIFadd{MonadPlus}} \DIFadd{or }\texttt{\DIFadd{Monad}}\DIFadd{.
}

\DIFaddend \begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < [basicstyle=\small]
%DIFDELCMD < doMaybeToPlus :: FilePath -> SimpPos -> String -> Int -> RefactGhc ()  
%DIFDELCMD < doMaybeToPlus fileName pos@(row,col) funNm argNum = do
%DIFDELCMD <   parsed <- getRefactParsed
%DIFDELCMD <   let mBind = getHsBind pos parsed
%DIFDELCMD <   case mBind of
%DIFDELCMD <    Nothing -> error "Function bind not found"
%DIFDELCMD <    Just funBind -> do
%DIFDELCMD <        hasNtoN <- containsNothingToNothing funNm argNum pos funBind
%DIFDELCMD <        case hasNtoN of
%DIFDELCMD <          True -> do
%DIFDELCMD <            doRewriteAsBind fileName pos funNm
%DIFDELCMD <          False -> do 
%DIFDELCMD <            canReplaceConstructors <- isOutputType funNm argNum pos funBind
%DIFDELCMD <            case canReplaceConstructors of
%DIFDELCMD <              True -> do
%DIFDELCMD <                logm $ "Can replace constructors"
%DIFDELCMD <                replaceConstructors pos funNm argNum
%DIFDELCMD <              False -> return ()
%DIFDELCMD <        return ()
%DIFDELCMD < \end{lstlisting} 
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
doMaybeToPlus :: FilePath -> SimpPos -> String -> Int -> RefactGhc ()  
doMaybeToPlus fileName pos@(row,col) funNm argNum = do
  parsed <- getRefactParsed
  let mBind = getHsBind pos parsed
  case mBind of
   Nothing -> error "Function bind not found"
   Just funBind -> do
       hasNtoN <- containsNothingToNothing funNm argNum pos funBind
       case hasNtoN of
         True -> do
           doRewriteAsBind fileName pos funNm
         False -> do 
           canReplaceConstructors <- isOutputType funNm argNum pos funBind
           case canReplaceConstructors of
             True -> do
               logm $ "Can replace constructors"
               replaceConstructors pos funNm argNum
             False -> return ()
       return ()
\end{lstlisting} 
\DIFaddendFL \caption{The top level function of the generalising \texttt{Maybe} refactoring.}
\label{genMaybeTopLevel}
\end{figure}

This refactoring, at the top level (defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{genMaybeTopLevel}), determines which type the target program can be generalised to. This is determined by whether the target function contains a ``\texttt{Nothing} to \texttt{Nothing}" case. When this is true then the function can be rewritten with the \texttt{Monad} interface. This check is defined in the \texttt{containsNothingToNothing} function, which traverses the AST of the target function searching for a case where the \texttt{Pat} on the left hand side pattern is only \texttt{Nothing} and the \texttt{HsExpr} on the right hand side of the match is only \texttt{Nothing}. If this is true for the target function then \texttt{doRewriteAsBind} modifies the other case(s) of the function.

The other case has the refactoring checking if its possible to replace all of \texttt{Maybe}'s constructors with the equivalent \texttt{MonadPlus} values. This is checked in \texttt{isOutputType} which checks whether the target type of the refactoring is also the result type of the function. When this is the case \texttt{MonadPlus}' constructors can be substituted for \texttt{Maybe}'s \texttt{Nothing} and \texttt{Just}. This is done by the \texttt{replaceConstructors} function.

The rest of this section will discuss the two functions that modify the source function, \texttt{doRewriteAsBind} and \texttt{replaceConstructors}. There will also be a brief section on enhancements that can be made to this implementation of the generalise \texttt{Maybe} refactoring to make it more robust.

\subsection{Generalising to \texttt{Monad}}

\begin{figure}[t]
\begin{lstlisting}
doRewriteAsBind :: FilePath -> SimpPos -> String -> RefactGhc ()
doRewriteAsBind fileName pos funNm = do
  parsed <- getRefactParsed
  let bind = gfromJust "doRewriteAsBind" $ getHsBind pos parsed
      matches = GHC.mg_alts . GHC.fun_matches $ bind
  if (length matches) > 1
    then error "Multiple matches not supported"
    else do
    let (GHC.L _ match) = head matches
    (varPat, rhs) <- getPatAndRHS match
    (newPat, _) <- liftT $ cloneT varPat
    (newRhs, _) <- liftT $ cloneT rhs
    let rhs = justToReturn newRhs
    lam <- wrapInLambda newPat rhs
    let newNm = case newPat of
                     (GHC.L _ (GHC.VarPat nm)) -> mkNewNm nm
                     _ -> mkRdrName $ "m_value_" ++ funNm
    new_rhs <- createBindGRHS newNm lam
    replaceGRHS funNm new_rhs newNm
    fixType funNm
      where mkNewNm rdr = let str = GHC.occNameString $ GHC.rdrNameOcc rdr in
              GHC.Unqual $ GHC.mkVarOcc ("m_" ++ str)
\end{lstlisting}
\caption{The implementation of \texttt{doRewriteAsBind}. The function that generalises \texttt{Maybe} to \texttt{Monad}}
\label{doRewriteAsBind}
\end{figure}

Figure~\ref{doRewriteAsBind} shows the implementation of \texttt{doRewriteAsBind} the function that rewrites a function of type \texttt{Maybe} to use the monadic bind interface instead. The first three lines of \texttt{doRewriteAsBind} (lines 3-6 in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{doRewriteAsBind}) extract the list of matches that make up every Haskell function. A match consists of the left hand side pattern binding and the right hand side expression. For example, in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{matchExample}, the \texttt{last} function's AST representation consists of three matches, one for each function body, whereas the AST of \texttt{doRewriteAsBind} only has one match because its definition contains only a single body.

Once the target function's list of matches is retrieved \texttt{doRewriteAsBind} can then perform the appropriate refactoring for a target function with a single match or multiple matches\footnote{It's important to note that \texttt{containsNothingToNothing}, the function that checks if the target function has a \texttt{Nothing} to \texttt{Nothing} case, also removes this case from the AST. This means that the \texttt{matches} list excludes this case.}. Currently this implementation does not support multiple matches but the refactoring could be expanded to automatially wrap the target function's pattern matches in a \texttt{case} statement. Another way to allow HaRe to support this case is to create a separate refactoring that rewrites functions with multiple bindings to use a \texttt{case} statement instead, which would allow the refactoring to proceed.

\begin{figure}[t]
\begin{lstlisting}
last :: [a] -> Maybe a
last [] = Nothing
last [x] = Just x
last (x:xs) = last xs
\end{lstlisting}
\caption{The AST of the \texttt{last} function contains three matches.}
\label{matchExample}
\end{figure}

After checking that there is only a single match in the AST of the target function, \texttt{doRewriteAsBind} can start modifying that function. The rest of \texttt{doRewriteAsBind} (from line 10 in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{doRewriteAsBind}) constructs a lambda expression and variable to make up the new right hand side of the target function. 

This is done by taking the right hand side expression from the target function's match and replacing any calls to \texttt{Just} with \texttt{return}. This generalised version of the right hand side expression can then be wrapped in a lamdba expression with the original pattern being the binding for this expression (line 14's call to \texttt{wrapInLambda}). This new lambda expression can be bound to the newly created variable \texttt{newNm}. This new variable keeps the name given to the original pattern if it was just a simple name (e.g. ``\texttt{x}'') and appends ``\texttt{m\char`_}'' to this name to prevent a naming conflict with the variable inside of the lambda expression. In the case that the pattern was more complex, like when matching a data type constructor (e.g. \texttt{Just x}) or a list \texttt{(x:xs)}, then a generic variable name is created using the target function's name.\footnote{The user of the refactoring will most likely want to apply the renaming refactoring to this variable to give it a more appropriate name.}

The new name is then bound to the lambda expression (line 18) and the target function's body is replaced with this bind expression. The step of the rewriting generalises the type signature to be use the \texttt{Monad} type class in the place of \texttt{Maybe}.

\subsection{Generalising to \texttt{MonadPlus}}

When the target function cannot be generalised to \texttt{Monad} because it does not contain the \texttt{Nothing} to \texttt{Nothing} case\DIFaddbegin \DIFadd{, }\DIFaddend it may be possible to generalise it to use the \texttt{MonadPlus} \DIFdelbegin \DIFdel{typeclass }\DIFdelend \DIFaddbegin \DIFadd{type class }\DIFaddend instead. This section will cover how this case of the refactoring is implemented in HaRe.

An instance of \texttt{MonadPlus} is a \texttt{Monad} with a monoidal structure.
This structure is defined by an associative operator \texttt{mplus} and its identity value \texttt{mzero}~\citep{typeclassopedia}. 
In \texttt{Maybe}'s case \texttt{mzero} is defined as \texttt{Nothing} and \texttt{Maybe}'s other constructor, \texttt{Just}, is the value of \texttt{Maybe}'s definition \texttt{return}.
This case of the refactoring just replaces instances of \texttt{Just} with \texttt{return} and \texttt{Nothing} with \texttt{mzero}. 

\begin{figure}[t]
\begin{lstlisting}
replaceConstructors :: SimpPos -> String -> Int -> RefactGhc ()
replaceConstructors pos funNm argNum = do
  parsed <- getRefactParsed
  let (Just bind) = getHsBind pos parsed
  newBind <- applyInGRHSs bind replaceNothingAndJust
  replaceBind pos newBind
  fixType' funNm argNum
    where applyInGRHSs :: (Data a) => UnlocParsedHsBind -> (a -> RefactGhc a) -> RefactGhc UnlocParsedHsBind
          applyInGRHSs parsed fun = applyTP (stop_tdTP (failTP `adhocTP` (runGRHSFun fun))) parsed
          runGRHSFun :: (Data a) => (a -> RefactGhc a) -> ParsedGRHSs -> RefactGhc ParsedGRHSs
          runGRHSFun fun grhss@(GHC.GRHSs _ _) = SYB.everywhereM (SYB.mkM fun) grhss
          mzeroOcc = GHC.mkVarOcc "mzero"
          nothingOcc = GHC.mkVarOcc "Nothing"
          returnOcc = GHC.mkVarOcc "return"
          justOcc = GHC.mkVarOcc "Just"
          replaceNothingAndJust :: GHC.OccName -> RefactGhc GHC.OccName
          replaceNothingAndJust nm
            | (GHC.occNameString nm) == "Nothing" = do
                logm "Replacing nothing"
                return mzeroOcc
            | (GHC.occNameString nm) == "Just" = do
                logm "Replace just"
                return returnOcc            
            | otherwise = return nm
\end{lstlisting}
\caption{The \texttt{replaceConstructors} function replaces \texttt{Maybe}'s constructors with more general values.}
\label{replaceConstructors}
\end{figure}

Figure~\ref{replaceConstructors} shows the top level function \texttt{replaceConstructors} that modifies a target function to replace the \texttt{Maybe} specific values with \texttt{mzero} and \texttt{return}. This function primarily consists of two traversal patterns, the ``stop top down traversal'' (\texttt{stop\_tdTP}) in \texttt{applyInGRHSs} and the \texttt{everywhereM} in \texttt{runGRHSFun}. The only parts of the function that need to be changed are in the right hand side of the target function; the first traversal (from \texttt{applyInGRHSs}) descends to this level and then stops when the guarded right hand side type is found. From there the other traversal continues down the tree replacing the instances \texttt{Just} and \texttt{Nothing}.

After the right hand side binding has been rewritten, the target function's original binding is replaced with the new binding in the call to \texttt{replaceBind} on line six of \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{replaceConstructors}. Replacing the original binding of a function with a refactoring one is a common operation that many refactorings need to perform. Due to its re-usability \texttt{replaceBind} is a function from HaRe's API and will be discussed in section~\ref{hareAPI}. 

The final step of this refactoring changes the type signature of the target function. The instances of \texttt{Maybe} in the type signature need to be replaced with a type variable that is bound to the \texttt{MonadPlus} typeclass. The \texttt{fixType'} function performs this rewriting. The definition of \texttt{fixType'} is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{fixTypePrime}.

\begin{figure}[t]
\begin{lstlisting}
fixType' :: String -> Int -> RefactGhc ()
fixType' funNm argPos = do
  logm "Fixing type"
  parsed <- getRefactParsed
  let m_sig = getSigD funNm parsed
      (GHC.L sigL (GHC.SigD sig)) = gfromJust "fixType'" m_sig
  fixedClass <- fixTypeClass sig
  replacedMaybe <- replaceMaybeWithVariable fixedClass
  newSig <- locate (GHC.SigD replacedMaybe)
  addNewKeyword ((G GHC.AnnDcolon), DP (0,1)) newSig
  synthesizeAnns newSig
  addNewLines 2 newSig
  newParsed <- replaceAtLocation sigL newSig
  anns <- liftT getAnnsT
  putRefactParsed newParsed anns         
\end{lstlisting}
\caption{\texttt{fixType'} is a function that fixes the type signature of a function that is being generalised from \texttt{Maybe} to \texttt{MonadPlus}.}
\label{fixTypePrime}
\end{figure}

The definition of \texttt{fixType'} is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{fixTypePrime}, this functions follows the following steps:

\begin{itemize}
\item \textit{Line 5:} Retrieve the signature from the parsed abstract syntax
\item \textit{Line 7:} Insert the binding of the variable ``\texttt{m}'' to the \texttt{MonadPlus} typeclass
\item \textit{Line 8:} Replace the instances of \texttt{Maybe} with the ``\texttt{m}'' type variable
\item \textit{Lines 9 - 12:} Create the appropriate annotations for the new syntax elements
\item \textit{Line 13:} Replaces the old type signature in the parsed source
\item \textit{Line 15:} Updates the refactoring state with the modified parsed source
\end{itemize}

The \texttt{fixTypeClass} and \texttt{replaceMaybeWithVariable} functions transform the abstract syntax of the type signature, and the rest of \texttt{fixType'} creates and modifies the associated annotations so that all of its elements appear in the target program and are well formatted. For example, the call to \texttt{addNewKeyword} on line 10 of \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{fixTypePrime} associates the \texttt{AnnDcolon} annotation with the new type signature. This annotation represents the ``\texttt{::}'' operator that is otherwise not represented explicitly in the abstract syntax tree. These annotations are what is used by ghc-exactprint to determine where syntax elements that are not located by the abstract syntax are and how the target module should be formatted. This was discussed in more detail in section~\ref{ghcExactprint}.

This section has covered the implementation of the generalising \texttt{Maybe} refactoring. This is a fairly straightforward refactoring implementation, the preconditions are checked up front and depending on the structure of the target function the correct transformation is chosen. The next section will describe the implementation of the ``list to Hughes list'' refactoring. This refactoring must descend the entire syntax tree of each target function and transformations may occur based on the type of the AST's subtrees.  

\section{Implementation of the ``List to Hughes List" refactoring}
\label{sec:hughesListImp}

The ``list to Hughes list'' refactoring was first described in section~\ref{listToDlist}. This section will describe the implementation of that refactoring in HaRe. Section~\ref{listToDlist} also introduced the concept of a type that can be ``embedded'' into another. This means that the source type, which in this particular case is \texttt{[a]}, can be transformed into the target type (\texttt{DList a}) and the original value of the object can be retrieved from the target type.

Though ``list to Hughes list'' is the specific refactoring outlined in this thesis and implemented in HaRe, this sort of refactoring can be applied to any pair of types that hold the properties described in section~\ref{listToDlist}. HaRe's implementation of the ``list to Hughes list'' refactoring keeps this in mind and efforts were made to build reusable components so that similar refactorings can be easily implemented for different types.

The \texttt{doHughesList} function shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{doHughesList} is main function that performs the ``list to Hughes list'' refactoring. This function performs four primary tasks, adding the import declaration for the difference list library, transforming the function definition, replacing the existing function definition, and wrapping the call points of the target function with the abstraction function.

\begin{figure}[t]
\begin{lstlisting}
doHughesList :: FilePath -> String -> SimpPos -> Int -> EmbFuncStrings -> RefactGhc ()
doHughesList fileName funNm pos argNum fStrs = do
  let mqual = Just "DList"
  addSimpleImportDecl "Data.DList" mqual
  ty <- getDListTy mqual
  parsed <- getRefactParsed
  let
    (Just lrdr) = locToRdrName pos parsed
    rdr = GHC.unLoc lrdr
    dlistCon = getTyCon ty
    newFType = resultTypeToDList dlistCon 
    (Just funBind) = getHsBind rdr parsed
    (Just tySig) = getTypeSig pos funNm parsed
    newResTy = getResultType ty
  iDecl <- dlistImportDecl mqual
  iSt <- getInitState iDecl fStrs "toList" "fromList" mqual newResTy
  bind' <- embRefact argNum mqual rdr ty iSt funBind
  replaceFunBind pos bind'
  newTySig <- fixTypeSig argNum tySig
  replaceTypeSig pos newTySig
  let modQual = case mqual of
                  (Just s) -> s ++ "."
                  Nothing -> ""
  fixClientFunctions modQual (numTypesOfBind funBind) argNum rdr
  addConstructorImport
\end{lstlisting}
\caption{\texttt{doHughesList} is the top level function of the Hughes list refactoring.}
\label{doHughesList}
\end{figure}

The most interesting part of this refactoring is the transformation of the function definition. The other steps are handled by HaRe's API which is discussed in section~\ref{hareAPI}. The transformation occurs on lines 16 and 17 of \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{doHughesList}. The transformation runs within its own state and line 16 creates the initial value of this state while line 17 actually runs the stateful computation. The state's type is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{embState}.

\begin{figure}[t]
\begin{lstlisting}
data EmbRefactState = EmbState {
  funcs :: EmbeddableFuncs,
  typeStack :: [Maybe GHC.Type],
  insertAbs :: Bool
                               }

data EmbeddableFuncs = IF {
  projFun :: GHC.RdrName,
  absFun :: GHC.RdrName,
  eqFuns :: M.Map String (GHC.RdrName, GHC.Type)
  }
\end{lstlisting}
\caption{The type of the state that the refactoring runs in.}
\label{embState}.
\end{figure}

The state consists of three fields, \texttt{funcs} keeps track of the functions that operate over the source type and their equivalent functions that operate on the target type, and the names of the projection and abstraction functions. The \texttt{typeStack} field is a stack that keeps track of what changes need to be made further down the abstract syntax tree. The type stack is described in more detail in section~\ref{typeStack} below. Finally, the \texttt{insertAbs} field keeps track of when the abstraction function needs to be applied to certain values.

The call to \texttt{getInitState} on line 16 of \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{doHughesList} takes in the import declaration of the target type, a list of pairs of strings that represent the equivalent functions over the source and target types, the names of the projection and abstraction functions, the target type's import qualifier if it exists, and the final result type of the target function. In the ``list to Hughes list'' case the import declaration is ``\texttt{import Data.DList qualified as DList}.'' Because many of the Hughes list functions have the same names as their normal list counterparts a qualifier is required so \texttt{DList} is chosen. 

The list of pairs associates the normal list functions with the appropriate Hughes list functions. A single pair in this list contains, first, the normal list function name followed by the Hughes list function that performs the same operation. For example, ``\texttt{(:)}'' is paired with its Hughes list equivalent ``\texttt{cons}'' because both functions add a single element to the beginning of a list. The full list of pairs for the ``list to Hughes list'' refactoring is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{assocFuns}.

\begin{figure}[t]
\begin{lstlisting}
[("[]","empty"),(":","cons"),("++","append"),("concat", "concat"),("replicate","replicate"), ("head","head"),("tail","tail"),("foldr","foldr"),("map","map"), ("unfoldr", "unfoldr")]
\end{lstlisting}
\caption{The list of associated function pairs for the ``list to Hughes list'' refactoring}
\label{assocFuns}
\end{figure} 

The projection and abstraction functions are ``\texttt{toList}'' and ``\texttt{fromList}'' respectively in the ``list to Hughes list'' case. The final result type of the target function depends on the result type of the source function. If the result type of the source function is \texttt{[Int]} then the result type of the target function will be \texttt{DList Int}, if the source function's result type is \texttt{[a]} then the target function's result type will be \texttt{DList a}.

Once the initial state has been created the transformation is performed in a top down, left to right manner with the call to \texttt{embRefact} on line 17 of \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{doHughesList}. As seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{embRefact} the definition of \texttt{embRefact} simply calls the API function \texttt{modMGAltsRHS} which applies a function that modifies an expression to each of the right hand side values of the given function binding. The ``real'' work of the transformation occurs in \texttt{doEmbRefact} which is defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{doEmbRefact}.

\begin{figure}[t]
\begin{lstlisting}
embRefact :: Int -> Maybe String -> GHC.RdrName -> GHC.Type -> EmbRefactState -> ParsedBind -> RefactGhc ParsedBind
embRefact _ mqual funNm newFTy iST bnd = modMGAltsRHS (\e -> runEmbRefact (doEmbRefact e) iST) bnd
\end{lstlisting}
\caption{The definition of \texttt{embRefact}}
\label{embRefact}
\end{figure}

\begin{figure}[t]
\begin{lstlisting}
doEmbRefact :: ParsedLExpr -> EmbRefact ParsedLExpr
doEmbRefact expr = do
  b1 <- embDone
  b2 <- skipCurrent
  if b1 || b2
    then do
    lift $ logm "Skipping this expr: "
    lift $ logm (SYB.showData SYB.Parser 3 expr)
    return expr
    else doEmbRefact' expr
  where doEmbRefact' :: ParsedLExpr -> EmbRefact ParsedLExpr
  		doEmbRefact' = ...
\end{lstlisting}
\caption{\texttt{doEmbRefact} is the function that transforms a given expression to use the target type.}
\label{doEmbRefact}
\end{figure}

The top level, \texttt{doEmbRefact}, function controls when the traversal stops, the helper function \texttt{doEmbRefact'} is what actually transforms the given expression based on its constructor. The two stop conditions, \texttt{b1} and \texttt{b2} from \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{doEmbRefact} lines three and four, are determined by the state of the ``type stack''.

\subsection{The goal type stack}
\label{typeStack}

The type stack is part of the refactoring's state and is of type \texttt{[Maybe GHC.Type]} each member of the stack represents the type that a sub tree must be changed to. The stack will be initialised with a single value ``\texttt{Just (DList a)}'' where \texttt{a} is the type of list from the source function this is because the result type of the entire syntax tree needs to be changed to \texttt{Just (DList a)}. The stack keeps track the ``goal types'' of each sub-tree. 

Consider the \texttt{enumerate} example from section~\ref{hugesListResTy}, which is shown here again in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{enumerate2}.

\begin{figure}[t]
\begin{lstlisting}
data Tree a = Leaf
            | Node (Tree a) a (Tree a)

enumerate :: Tree a -> [a]
enumerate Leaf = []
enumerate (Node left x right) = (enumerate left) ++ [x] ++ (enumerate right)

\end{lstlisting}
\caption{Definition of enumerate}
\label{enumerate2}
\end{figure}

When refactoring the result type of \texttt{enumerate} to become \texttt{DList a} the refactoring begins at the root of \texttt{enumerate}'s abstract syntax tree and the goal type stack is initialised as \texttt{[Just (DList a)]}. At the start of this refactoring the goal type stack holds the single type that the root node needs to become. The initial state of the stack and the abstract syntax tree is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{initASTStack}.

\begin{figure}[t]
	\begin{subfigure}{\linewidth}
		\includegraphics[scale=.5]{graphVis/Chapter4/enumerate.png}
	\end{subfigure}\par\medskip

	\begin{subfigure}{\linewidth}
		\begin{lstlisting}[mathescape]
			typeStack = [Just (DList a)$^1$]
		\end{lstlisting}
	\end{subfigure}\par\medskip
\caption{The initial state of the AST and goal type stack}
\label{initASTStack}
\end{figure} 

In this case the root node of the AST can be replaced with a call to \texttt{DList.append} because this functions result type matches the goal type for the node and \texttt{DList.append} is paired with \texttt{++} the current value of the node. Once the root node is replaced with the call to \texttt{DList.append} the current value of the stack is popped off and \texttt{Just (DList a)} is pushed onto the stack twice. This is because \texttt{(++)} takes in two arguments both of type \texttt{[a]} whereas \texttt{DList.append}'s two arguments need to be of type \texttt{DList a}. The refactoring continues by changing the root node's left child first. The goal type of this subtree is \texttt{DList a} and so the refactoring traverses down to the leftmost child of this subtree. The state of the refactoring at this point in shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{enumStateL1}.

\begin{figure}[t]
	\begin{subfigure}{\linewidth}
		\includegraphics[scale=.5]{graphVis/Chapter4/enumerateL1.png}
	\end{subfigure}\par\medskip

	\begin{subfigure}{\linewidth}
		\begin{lstlisting}[mathescape]
			typeStack = [Just (DList a)$^1$, Just (DList a)$^2$]
		\end{lstlisting}
	\end{subfigure}\par\medskip
\caption{After replacing the root node the refactoring must resolve the types of its subtrees.}
\label{enumStateL1}
\end{figure} 

When the refactoring reaches the call to \texttt{enumerate}\DIFaddbegin \DIFadd{, }\DIFaddend it realises that this is a recursive call so the result type is already \texttt{DList a} so no change needs to be made and the top of the goal type stack can be popped off. Also since \texttt{enumerate}'s argument is not a target of the refactoring\DIFdelbegin \DIFdel{that }\DIFdelend \DIFaddbegin \DIFadd{, those }\DIFaddend subtrees will not need to be traversed\DIFdelbegin \DIFdel{through, this }\DIFdelend \DIFaddbegin \DIFadd{. This }\DIFaddend is indicated by pushing \texttt{Nothing} onto the stack (see \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{enumStateL2}). This means that the entire left child of the root node is correct after the change to the root node and the refactoring can begin working on the right child of the root node.

\begin{figure}[t]
	\begin{subfigure}{\linewidth}
		\includegraphics[scale=.5]{graphVis/Chapter4/enumerateL2.png}
	\end{subfigure}\par\medskip

	\begin{subfigure}{\linewidth}
		\begin{lstlisting}[mathescape]
			typeStack = [Nothing$^1$, Just (DList a)$^2$]
		\end{lstlisting}
	\end{subfigure}\par\medskip
\caption{The \texttt{Nothing} on top of the type stack means that the right hand side of the application of \texttt{enumerate} doesn't need to be checked. The \texttt{Nothing} can be popped off the type stack and the traversal continues upwards.}
\label{enumStateL2}
\end{figure}

The first node that the refactoring encounters when traversing the right child of the root node is another application of \texttt{++}. This is handled in the same that its parent node was, with \texttt{DList.append} replacing \texttt{++} and the new types of its subtrees (\texttt{DList a} in this case) pushed onto the stack. In the same way that the root node was handled\DIFaddbegin \DIFadd{, }\DIFaddend the refactoring begins by traversing the left subtree. The state of the refactoring at this point is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{enumStateR1}.

\begin{figure}[t]
	\begin{subfigure}{\linewidth}
		\includegraphics[scale=.5]{graphVis/Chapter4/enumerateR1.png}
	\end{subfigure}\par\medskip

	\begin{subfigure}{\linewidth}
		\begin{lstlisting}[mathescape]
			typeStack = [Just (DList a)$^1$, Just (DList a)$^2$]
		\end{lstlisting}
	\end{subfigure}\par\medskip
\caption{Traversing the left child again to fix the calls to the newly inserted \texttt{DList.append}.}
\label{enumStateR1}
\end{figure}

The node that the refactoring is at is a singleton list expression and according to the goal type stack it must be re-typed to \texttt{DList a}. Fortunately the \texttt{DList} library provides a \texttt{singleton} function that produces a Hughes list of length one from a single parameter. The explicit list can be replaced with a call to \texttt{DList.singleton} and this subtree is now correctly typed and the top of the goal type stack can be popped (see \DIFdelbegin \DIFdel{figure }\DIFdelend \DIFaddbegin \DIFadd{Figure }\DIFaddend ~\ref{enumStateR2}).

\begin{figure}[t]
	\begin{subfigure}{\linewidth}
		\includegraphics[scale=.4]{graphVis/Chapter4/enumerateR2.png}
	\end{subfigure}\par\medskip

	\begin{subfigure}{\linewidth}
		\begin{lstlisting}[mathescape]
			typeStack = [Just (DList a)$^1$]
		\end{lstlisting}
	\end{subfigure}\par\medskip
\caption{The subtree representing the explicit list has been replaced with a call to \texttt{DList.singleton}.}
\label{enumStateR2}
\end{figure}

The only part of the AST that hasn't been refactored is the right most expression ``\texttt{(enumerate right)}''. This expression will be handled in the same way that the first recursive call to \texttt{enumerate} was. The refactoring will descend to the call to \texttt{enumerate}\DIFaddbegin \DIFadd{, }\DIFaddend pop the top of the goal type stack and push \texttt{Nothing} onto the stack because none of the arguments need to change. The refactoring will ascend to the \texttt{HsApp} node and pop off the \texttt{Nothing} value\DIFaddbegin \DIFadd{, }\DIFaddend and therefore not descend to check the subtree constisting only of the \texttt{right} variable. At this point the goal type stack is empty so the refactoring is finished and can return the modified AST.

This section has given a concrete example of how an abstract syntax tree is traversed and how the goal type stack is used. The next section will go through the code that handles each of the different types of expressions.

\subsection{Traversing an expression, the implementation of \texttt{doEmbRefact'}}

Though the top level \texttt{doEmbRefact} function determines if a subtree should be changed at all, \texttt{doEmbRefact'} determines how the transformation traverses the subtree and the changes that occur based on the constructor of the current abstract syntax node. This section discusses each of the cases of \texttt{doEmbRefact'}.

\begin{figure}[t]
\begin{lstlisting}
        doEmbRefact' (GHC.L l (GHC.HsApp le re)) = do
          le' <- doEmbRefact le
          wrapWithProj <- shouldInsertProj       
          re' <- doEmbRefact re
          lift $ logm "POST RHS REFACT IN APP CASE"
          let newApp = (GHC.L l (GHC.HsApp le' re'))
          if wrapWithProj
            then do
            projRdr <- getProjFun
            let var = GHC.HsVar projRdr
            pApp <- lift $ wrapInPars newApp
            lVar <- lift $ locate var
            lift $ addAnnVal lVar
            let fullApp = GHC.HsApp lVar pApp
            lift $ locate fullApp
            else return newApp
\end{lstlisting}
\caption{The case of \texttt{doEmbRefact'} that handles function applications.}
\label{embRefactApp}
\end{figure}

The way that function applications are transformed is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{embRefactApp}. The refactoring will transform the left hand side of the application first, then it checks if the entire application needs to be wrapped with a call to the projection function because the leftmost child of the subtree does not have an equivalent function over the target type. After that the right sub tree is refactored and this refactored expression is applied to the refactored left hand expression.

If the projection function needs to be introduced at this layer then another application is wrapped around the already modified syntax tree. Otherwise the application with its modified \DIFdelbegin \DIFdel{subtrees }\DIFdelend \DIFaddbegin \DIFadd{sub trees }\DIFaddend can just be returned.

\begin{figure}[t]
\begin{lstlisting}
        doEmbRefact' (GHC.L l (GHC.OpApp le op rn re)) = do
          op' <- doEmbRefact op
          wrapWithProj <- shouldInsertProj
          lift $ addBackquotes op'
          le' <- doEmbRefact le
          re' <- doEmbRefact re
          let newOp = (GHC.L l (GHC.OpApp le' op' rn re'))
          wrapProjIfNeeded wrapWithProj newOp
\end{lstlisting}
\caption{The case of \texttt{doEmbRefact'} that handles operator applications}
\label{embRefactOp}
\end{figure}

The next case, operator application, is handled much like function application is\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{: }\DIFaddend whether or not the operator can be replaced by a target type equivalent determines if the projection function needs to be added. Otherwise the \DIFdelbegin \DIFdel{left and right hand }\DIFdelend \DIFaddbegin \DIFadd{left- and right-hand }\DIFaddend expressions can be refactored. 

The new operator may not be allowed in an infix position and in this case backquotes (\texttt{`}) are added, which in Haskell makes a prefix function into an infix version of that function. A good example of this is \texttt{DList.append}. \texttt{DList.append} is paired with \texttt{(++)} which is an operator whereas \texttt{DList.append} is a prefix function, therefore the refactoring of ``\texttt{leftList ++ rightList}'' is ``\texttt{leftListRef `DList.append` rightListRef}'' where \DIFdelbegin \DIFdel{left and right list ``}\DIFdelend \DIFaddbegin \texttt{\DIFadd{leftListRef}} \DIFadd{and }\DIFaddend \texttt{\DIFdelbegin \DIFdel{Ref}\DIFdelend \DIFaddbegin \DIFadd{rightListRef}\DIFaddend } \DIFdelbegin \DIFdel{'' }\DIFdelend are the refactored parameters to the append function.

\begin{figure}[!t]
\begin{lstlisting}
        doEmbRefact' var@(GHC.L l (GHC.HsVar rdr)) = do
          st <- get          
          let ts = typeStack st
              fs = funcs st              
          typed <- lift getRefactTyped
          mId <- lift (getIdFromVar var)
          let id = gfromJust ("Tried to get id for: " ++ SYB.showData SYB.Parser 3 rdr) mId
              currTy = GHC.idType id
              keyOcc = GHC.rdrNameOcc rdr
              mVal = (GHC.occNameString keyOcc) `M.lookup` (eqFuns fs)
          case mVal of
            Nothing -> do
              popTS
              dontSearchSubTrees currTy
              lift $ logm "Nothing case of mVal"
              printStack
              insertProjToT
              return var
            (Just (oNm, ty)) -> do
                let changedTypes = typeDifference ty currTy
                    newE = (GHC.L l (GHC.HsVar oNm))
                oldAnns <- lift fetchAnnsFinal
                case M.lookup (mkAnnKey var) oldAnns of
                  Nothing -> lift (mergeRefactAnns $ copyAnn var newE oldAnns)
                  Just v -> do
                    let dp = annEntryDelta v
                    lift $ addAnnValWithDP newE dp
                popTS
                addToTS changedTypes                
                return newE
\end{lstlisting}
\caption{The case of \texttt{doEmbRefact'} that handles variables}
\label{embRefactVar}
\end{figure}

The major changes that the refactoring makes are performed when \texttt{doEmbRefact'} encounters a variable. This logic is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{embRefactVar}. The first six lines of this case (lines 2 through 7 in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{embRefactVar}) retrieve the type of the current variable from the typed abstract syntax. 

Next (line 10 of \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{embRefactVar}) the refactoring checks if there is a possible replacement function. If a replacement \DIFdelbegin \DIFdel{isn't }\DIFdelend \DIFaddbegin \DIFadd{is not }\DIFaddend available then the transformation can pop the top element from the stack and any parameters that are applied to this value don't have to be searched. This is done through the call to \texttt{dontSearchSubTrees} which puts \texttt{Nothing} values onto the type stack. For example\DIFaddbegin \DIFadd{, }\DIFaddend if the current variable was ``\texttt{f :: a -> b -> c}'' and there was no possible replacement for \texttt{f}\DIFdelbegin \DIFdel{then }\DIFdelend \DIFaddbegin \DIFadd{, then }\\ \DIFaddend \texttt{dontSearchSubTrees} would push two \texttt{Nothing}s onto the type stack so that the subtrees of type \texttt{a} and \texttt{b} aren't modified. Finally in this case the boolean ``\texttt{insertProj}'' in the refactoring state is flipped\DIFdelbegin \DIFdel{that }\DIFdelend \DIFaddbegin \DIFadd{, which }\DIFaddend informs the refactoring that higher up the tree the application of this value needs to \DIFaddbegin \DIFadd{be }\DIFaddend wrapped with the projection function. 

When a possible replacement for the current value is found (this case begins on line 19 of \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{embRefactVar})\DIFaddbegin \DIFadd{, }\DIFaddend the refactoring first calculates which parameters of the new function have different types from the original function which is stored in the \texttt{changedTypes} variable. This variable is a list of \texttt{Maybe GHC.Type}s where the \texttt{Nothing} values mean that the type of that parameter has not changed and \DIFaddbegin \DIFadd{``}\DIFaddend \texttt{Just \_}\DIFaddbegin \DIFadd{'' }\DIFaddend values indicate the new type that \DIFdelbegin \DIFdel{that }\DIFdelend \DIFaddbegin \DIFadd{the }\DIFaddend parameter must become. This list represents each parameter of the variable from left to right order. This list of type changes that need to be handled is pushed onto the type stack in line 29 of \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{embRefactVar} after the current goal type is popped from the stack. 

The rest of this case constructs the new expression and copies over the annotation from the old piece of abstract syntax so that any formatting associated with the original call is preserved in the new source code. Finally the new expression can be returned.

\begin{figure}[t]
\begin{lstlisting}
        doEmbRefact' eLst@(GHC.L l (GHC.ExplicitList ty mSyn lst)) = do
          if (length lst) == 1
            then do
            st <- get
            let fs = funcs st
                singletonRdr = mkQualifiedRdrName (GHC.mkModuleName "DList") "singleton"
                singletonVar = (GHC.HsVar singletonRdr)
            lVar <- lift $ locate singletonVar
            lift $ addAnnVal lVar
            lift $ zeroDP lVar
            let rhs = head lst
            lift $ setDP (DP (0,1)) rhs
            lApp <- lift $ locate (GHC.HsApp lVar rhs)
            parApp <- lift $ wrapInPars lApp
            return parApp
            else do
            st <- get
            let fs = funcs st
                projRdr = projFun fs
            lVar <- lift $ locate (GHC.HsVar projRdr)
            lApp <- lift $ locate (GHC.HsApp lVar eLst)
            lift $ wrapInPars lApp
            return lApp
\end{lstlisting}
\caption{The case that handles explicit list syntax}
\label{embRefactExpLst}
\end{figure}

One of the goals that influenced the design of this system was to produce a reusable system so that more embeddable type refactorings could easily be created. A tricky situation arises with Haskell's support for explicit list syntax (e.g. ``\texttt{[1,2,3]}''). Because lists are the source type of the ``list to Hughes list'' refactoring\DIFaddbegin \DIFadd{, }\DIFaddend the explicit list syntax may need to be transformed during the refactoring, so this \DIFdelbegin \DIFdel{type specific }\DIFdelend \DIFaddbegin \DIFadd{type-specific }\DIFaddend bit of code remains in the otherwise generic function. \DIFdelbegin \DIFdel{Removing this case is a major development priority}\DIFdelend \DIFaddbegin \DIFadd{Ideally this function could remain completely type agnostic but because the ``list to hughes list'' refactoring works over the list type it needs to descend into this type specific constructor of the abstract syntax tree}\DIFaddend .

The case that handles explicit lists is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{embRefactExpLst} and is simple enough. If the explicit list is just a single element long (e.g. ``\texttt{[3]}'') then that value will be passed to the difference list's \texttt{singleton} function, otherwise the projection function is wrapped around longer lists. 

Finally \texttt{doEmbRefact'} has a simple catch-all case that handles all the other constructors of \texttt{HsExpr}. This case shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{doEmbCatchAll} uses the \texttt{gmapM} generic combinator to recursively call \texttt{doEmbRefact} on all of the given expression's children. 

\begin{figure}[t]
\begin{lstlisting}
doEmbRefact' ex = gmapM (SYB.mkM doEmbRefact) ex
\end{lstlisting}
\caption{The catch all case of \texttt{doEmbRefact'}}
\label{doEmbCatchAll}
\end{figure}

\section{Other enhancements made to HaRe}
\label{hareAPI}

Another contribution \DIFdelbegin \DIFdel{that was }\DIFdelend made in the process of creating the refactorings described in this thesis \DIFdelbegin \DIFdel{were }\DIFdelend \DIFaddbegin \DIFadd{was }\DIFaddend many enhancements to HaRe's API. With the switch to the GHC API and the introduction of ghc-exactprint\DIFaddbegin \DIFadd{, }\DIFaddend a large part of HaRe's original API was reimplemented but the functionality it provided was designed for aiding the development of refactorings that used \DIFdelbegin \DIFdel{Programmatica}\DIFdelend \DIFaddbegin \DIFadd{Programatica}\DIFaddend 's abstract syntax tree instead of GHC's. Additionally ghc-exactprint has a very different way of formatting source code and the current API did not help refactoring developers use this library. This section will describe contributions I made to the HaRe's API that help working with both GHC's abstract syntax and ghc-exactprint.

These contributions took two forms ``queries'' and ``transformations.'' Queries extract parts or determine properties of an AST, whereas transformations are small modifications to source code that are useful to many refactorings. This section will briefly describe some of the functionality provided by HaRe's API that I implemented while developing the refactorings described in this thesis. 

\subsection{The \texttt{Query} module}

One thing that many refactorings have in common is the need to extract the same AST elements. In an effort to reduce the amount of duplicated code\DIFaddbegin \DIFadd{, }\DIFaddend the \texttt{Query} module was created to hold functions that extract or check properties of syntax elements.

Common queries do things like retrieve the function binding based on function name, as seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{getHsBind}. Many refactorings have function names as a input argument so \texttt{getHsBind} is useful for retrieving the appropriate binding.

\begin{figure}[t]
\begin{lstlisting}
getHsBind :: (Data a) => GHC.RdrName -> a -> Maybe (GHC.HsBind GHC.RdrName)
getHsBind nm a = SYB.something  (Nothing `SYB.mkQ` isBind) a
    where
#if __GLASGOW_HASKELL__ <= 710
        isBind ((bnd@(GHC.FunBind (GHC.L _ name) _ _ _ _ _)) :: GHC.HsBind GHC.RdrName)
#else
        isBind ((bnd@(GHC.FunBind (GHC.L _ name) _ _ _ _)) :: GHC.HsBind GHC.RdrName)
#endif
            | name == nm = (Just bnd)
        isBind _ = Nothing
\end{lstlisting}
\caption{This function retrieves the function binding of the given name from the provided syntax tree.}
\label{getHsBind}
\end{figure}

Currently the \texttt{Query} module defines the following functions:

\begin{itemize}
\item \texttt{getVarAndRHS} retrieves the pattern and \DIFdelbegin \DIFdel{right hand }\DIFdelend \DIFaddbegin \DIFadd{right-hand }\DIFaddend side expression from a function match
\item \texttt{getHsBind}
\item \texttt{getFunName} takes in a string representation of a function name and gets the \texttt{Name} of that function
\item \texttt{getTypedHsBind} given an \texttt{OccName} retrieves the typed syntax tree for a function binding
\item \texttt{getTypeSig} gets the type signature of a function based on the location of the function binding and its name
\item \texttt{isHsVar} given a string and an expression this function checks if the expression is a variable with the same name as the value of the string
\item \texttt{astCompare} does a rough estimate \DIFdelbegin \DIFdel{if }\DIFdelend \DIFaddbegin \DIFadd{of whether }\DIFaddend two abstract syntax trees are the same. This is done by checking that the trees are the same shape and have the same constructors\DIFaddbegin \DIFadd{, }\DIFaddend and that any names that appear in the trees are the same 
\item \texttt{lookupByLoc} retrieves the syntax element at a given location
\item \texttt{getIdFromVar} takes in a parsed expression that uses the \texttt{HsVar} constructor and retrieves the typed \texttt{Id} of that variable
\end{itemize}


\subsection{The \texttt{Transform} module}

The other major contribution made to HaRe's API is the addition of the \texttt{Transform} module. This module collects small changes to the syntax tree and annotations that are not specific to any particular refactoring \DIFaddbegin \DIFadd{and need to be performed by multiple refactorings}\DIFaddend .

Abstract syntax changes like adding a import declaration to the target module, or replacing the original binding of a function with another one \DIFdelbegin \DIFdel{.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \DIFadd{are small changes to source programs that many refactorings need to do. }\DIFaddend The other type of \DIFdelbegin \DIFdel{transformation that is common across multiple refactorings are those that }\DIFdelend \DIFaddbegin \DIFadd{transformations that refactorings all perform }\DIFaddend do not affect the abstract syntax tree but change the formatting of the program. \DIFdelbegin \DIFdel{This is done through }\DIFdelend \DIFaddbegin \DIFadd{In HaRe this formatting work is done by }\DIFaddend modifying the annotations associated with the syntax elements. 

The \texttt{Transform} module defines the following functions:

\begin{itemize}
\item \texttt{addSimpleImportDecl} adds a new import to the current module based on a string module name and an optional qualifier\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{wrapInLambda} creates a lambda expression from a pattern and \DIFdelbegin \DIFdel{right hand side expression , }\DIFdelend \DIFaddbegin \DIFadd{right-hand side expression and }\DIFaddend also creates all the necessary annotations so that the expression displays correctly\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{wrapInParsWithDPs} wraps the given expression in \DIFdelbegin \DIFdel{parenthesis }\DIFdelend \DIFaddbegin \DIFadd{parentheses }\DIFaddend and offsets the new expression based on the given ``DPs'' (delta position)\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{wrapInPars} is similar to \texttt{wrapInParsWithDPs} but defaults to a single column offset between the new expression and the syntax element that comes before it\DIFaddbegin \DIFadd{.
}\DIFaddend \item  \texttt{removePars} removes \DIFdelbegin \DIFdel{parenthesis }\DIFdelend \DIFaddbegin \DIFadd{parentheses }\DIFaddend from an expression and makes sure that the new expression is offset \DIFdelbegin \DIFdel{on }\DIFdelend \DIFaddbegin \DIFadd{one }\DIFaddend column from the previous syntax element\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{addNewLines} adds newlines before the given syntax element\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{replaceTypeSig} replaces the existing type signature of a function with the given one\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{replaceFunBind} replaces the source binding of a function with the given one\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{addBackquotes} adds backquote characters around an expression\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{constructHsVar} constructs an \texttt{HsVar} expression from a name\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{constructLHsTy} constructs a type variable from a name\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{insertNewDecl} inserts a new declaration into the current module from a given string that represents that declaration\DIFaddbegin \DIFadd{.
}\DIFaddend \item \texttt{rmFun} removes a function from the current module based on a name\DIFaddbegin \DIFadd{.
}\DIFaddend \end{itemize}


\section{Summary}

This chapter has described \DIFdelbegin \DIFdel{two of the data driven refactorings were developed for HaRe. It has also introduced how refactorings are implemented in HaReand the functionality provided by }\DIFdelend \DIFaddbegin \DIFadd{how the generalising }\texttt{\DIFadd{Maybe}} \DIFadd{and the list to Hughes list refactorings have been implemented in HaRe. In addition to the specific implementations  of these two refactorings this chapter also described the additions to HaRe's API that were made to support the refactorings developed for this thesis. In addition to the enhancements made to }\DIFaddend HaRe's \DIFdelbegin \DIFdel{API}\DIFdelend \DIFaddbegin \DIFadd{general API for refactoring, this chapter also covers the API that supports reversibly embeddable type refactorings such as the list to Hughes list refactoring}\DIFaddend .

\DIFdelbegin \DIFdel{The next chapters will continue to introduce more complex data driven refactorings, starting with the ``generalising Monads to Applicative'' refactoring}\DIFdelend \DIFaddbegin \DIFadd{Now that the the basic design and implementation of data-driven refactorings have been introduced the thesis will proceed by looking at two more refactorings, generalising monadic code to applicative functors (Chapter~\ref{chp:applicative}) and the monadification of pure code (Chapter~\ref{chp:monadification})}\DIFaddend .


\DIFdelbegin %DIFDELCMD < \renewcommand{\floatpagefraction}{.66}
%DIFDELCMD < %%%
\DIFdelend %DIF > \renewcommand{\floatpagefraction}{.66}

\chapter{Generalising \DIFdelbegin \DIFdel{Monads }\DIFdelend \DIFaddbegin \DIFadd{Monadic Code }\DIFaddend to Applicative \DIFaddbegin \DIFadd{Functors}\DIFaddend }
\DIFaddbegin \chaptermark{Generalising Monads to Applicative}
\DIFaddend \label{chp:applicative}

The previous chapter introduced the concept of a functional data refactoring and gave \DIFdelbegin \DIFdel{two examples , }\DIFdelend \DIFaddbegin \DIFadd{three examples of data driven refactorings: }\DIFaddend introducing a type synonym\DIFdelbegin \DIFdel{and generalising Maybeto MonadPlus}\DIFdelend \DIFaddbegin \DIFadd{, generalising Maybe, and list to Hughes lists}\DIFaddend . This chapter will cover another generalising refactoring\DIFdelbegin \DIFdel{in more depth, rewriting }\DIFdelend \DIFaddbegin \DIFadd{, generalising }\DIFaddend monadic functions to use applicative functors \DIFaddbegin \DIFadd{if certain conditions are met}\DIFaddend . 

\DIFaddbegin \section{Applicative Functors}

\DIFaddend In their 2008 functional pearl \DIFdelbegin \DIFdel{"Applicative programming with effects" }\DIFdelend \DIFaddbegin \DIFadd{``Applicative Programming with Effects,~\mbox{%DIFAUXCMD
\citep{mcbrideIdioms}}\hspace{0pt}%DIFAUXCMD
'' }\DIFaddend Conor McBride and Ross Paterson introduced a new \DIFdelbegin \DIFdel{typeclass }\DIFdelend \DIFaddbegin \DIFadd{type class }\DIFaddend that they called Idioms but are \DIFdelbegin \DIFdel{also }\DIFdelend \DIFaddbegin \DIFadd{more commonly }\DIFaddend known as Applicative Functors\DIFdelbegin \DIFdel{~\mbox{%DIFAUXCMD
\citep{mcbrideIdioms}}\hspace{0pt}%DIFAUXCMD
. Idioms }\DIFdelend \DIFaddbegin \DIFadd{. Applicative functors, like monads, }\DIFaddend provide a way to run \DIFdelbegin \DIFdel{effectful computations and collect them in some way. They }\DIFdelend \DIFaddbegin \DIFadd{computations within some context, and collect their results. Applicative functors }\DIFaddend are more expressive than functors but more general than \DIFdelbegin \DIFdel{Monads, further work was done in~\mbox{%DIFAUXCMD
\citep{arrowsAndIdioms} }\hspace{0pt}%DIFAUXCMD
to prove that Idioms are also less powerful than Arrows}\DIFdelend \DIFaddbegin \DIFadd{monads}\DIFaddend . 

Applicative functors \DIFdelbegin \DIFdel{were }\DIFdelend \DIFaddbegin \DIFadd{are }\DIFaddend implemented in GHC as the \DIFdelbegin \DIFdel{typeclass }\DIFdelend \DIFaddbegin \DIFadd{type class }\DIFaddend \texttt{Applicative}. \DIFdelbegin \DIFdel{An interesting part of the history of GHC is that despite }\DIFdelend \DIFaddbegin \DIFadd{Interestingly, even though }\DIFaddend McBride and Paterson \DIFdelbegin \DIFdel{proving in their original functional pearl }\DIFdelend \DIFaddbegin \DIFadd{proved }\DIFaddend that all monads are also applicative functors \DIFdelbegin \DIFdel{, however,  }\DIFdelend \DIFaddbegin \DIFadd{in their original functional pearl~\mbox{%DIFAUXCMD
\citep{mcbrideIdioms}}\hspace{0pt}%DIFAUXCMD
, }\DIFaddend GHC did not actually require instances of \DIFdelbegin \DIFdel{monad }\DIFdelend \DIFaddbegin \texttt{\DIFadd{Monad}} \DIFaddend to also be instances of \DIFdelbegin \DIFdel{Applicative }\DIFdelend \DIFaddbegin \texttt{\DIFadd{Applicative}} \DIFaddend until GHC's 7.10.1 release~\citep{ghc7.10Release}. Now that every monad must also be an applicative functor there \DIFdelbegin \DIFdel{now exists a large amount }\DIFdelend \DIFaddbegin \DIFadd{is now a large body }\DIFaddend of code which could \DIFaddbegin \DIFadd{potentially }\DIFaddend be rewritten using the applicative operators rather than the monadic ones. 

This chapter will discuss the design and implementation of a refactoring which \DIFdelbegin \DIFdel{will automatically refactor }\DIFdelend \DIFaddbegin \DIFadd{automatically refactors }\DIFaddend code written in a monadic style to use the applicative operators instead \DIFaddbegin \DIFadd{given it meets certain conditions that we will spell out}\DIFaddend . Section~\ref{sec:appOverview} is a brief overview of the \DIFaddbegin \DIFadd{operations of the }\DIFaddend \texttt{Applicative} typeclass\DIFdelbegin \DIFdel{'s operators, section~\ref{sec:appProgStyle} will discuss }\DIFdelend \DIFaddbegin \DIFadd{, Section~\ref{sec:appProgStyle} discusses }\DIFaddend the applicative programming style and, in general, how programs are constructed using the applicative operators\DIFdelbegin \DIFdel{, next, section}\DIFdelend \DIFaddbegin \DIFadd{. Next, Section}\DIFaddend ~\ref{sec:appInPractice} describes how the Haskell community uses the applicative typeclass\DIFdelbegin \DIFdel{. Then section}\DIFdelend \DIFaddbegin \DIFadd{, and Section}\DIFaddend ~\ref{sec:appRefact} describes in detail the refactoring \DIFaddbegin \DIFadd{and its preconditions}\DIFaddend . Section~\ref{sec:refacImp} discusses the \DIFdelbegin \DIFdel{refactorings }\DIFdelend \DIFaddbegin \DIFadd{refactoring's }\DIFaddend implementation in HaRe and some additional refactorings that can be used to \DIFdelbegin \DIFdel{get around the }\DIFdelend \DIFaddbegin \DIFadd{transform the code to meet the }\DIFaddend preconditions. Finally some applications that are good candidates for this refactoring are discussed in \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{sec:appApps}.

\DIFdelbegin %DIFDELCMD < \section{The applicative Typeclass}
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \section{The Applicative Type Class}
\DIFaddend \label{sec:appOverview}
This section \DIFdelbegin \DIFdel{will be a quick }\DIFdelend \DIFaddbegin \DIFadd{is a short }\DIFaddend introduction to the \texttt{Applicative} \DIFdelbegin \DIFdel{typeclass and some other }\DIFdelend \DIFaddbegin \DIFadd{type class and some related }\DIFaddend functions that are commonly used to write functions in an applicative style. 
\DIFdelbegin \DIFdel{We begin by discussing a superclass of }\texttt{\DIFdel{Applicative}}%DIFAUXCMD
\DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{To discuss applicative functors we first have to cover functors. Functors originate from category theory and are a natural generalisation of maps over lists. The definition of GHC's }\DIFaddend \texttt{Functor} \DIFdelbegin \DIFdel{, shown in figure}\DIFdelend \DIFaddbegin \DIFadd{type class is shown in Figure}\DIFaddend ~\ref{functor}. The \texttt{Functor} \DIFdelbegin \DIFdel{typeclass }\DIFdelend \DIFaddbegin \DIFadd{type class }\DIFaddend defines a single function that must be implemented\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{: }\DIFaddend \texttt{fmap}.

\begin{figure}
\begin{lstlisting}
class Functor f where
	fmap :: (a -> b) -> f a -> f b
\end{lstlisting}
\caption{The \texttt{Functor} \DIFdelbeginFL \DIFdelFL{typeclass}\DIFdelendFL \DIFaddbeginFL \DIFaddFL{type class}\DIFaddendFL }
\label{functor}
\end{figure}

\DIFdelbegin \DIFdel{The }\DIFdelend \texttt{fmap} \DIFdelbegin \DIFdel{function }\DIFdelend allows for a \DIFaddbegin \DIFadd{pure }\DIFaddend function to be applied to the \DIFdelbegin \DIFdel{contents of the Functor f. The functor could be considered as a context and }\DIFdelend \DIFaddbegin \DIFadd{values inside the functor }\texttt{\DIFadd{f}}\DIFadd{. Essentially }\DIFaddend \texttt{fmap} as a function that \DIFdelbegin \DIFdel{allows other functions to run }\DIFdelend \DIFaddbegin \DIFadd{runs other functions }\DIFaddend within the \texttt{Functor}'s context. \DIFdelbegin \DIFdel{Consider the }\DIFdelend \DIFaddbegin \DIFadd{Functors do have a serious limitation, consider Figure~\ref{gxy} which shows the types of the }\DIFaddend function \texttt{g} and \DIFdelbegin \DIFdel{the }\DIFdelend two values \texttt{x} and \texttt{y}\DIFdelbegin \DIFdel{whose types are shown in figure~\ref{gxy}, where }\DIFdelend \DIFaddbegin \DIFadd{, which are some functor }\DIFaddend \texttt{f} \DIFdelbegin \DIFdel{is some functor }\DIFdelend \DIFaddbegin \DIFadd{that contains values of type }\texttt{\DIFadd{a}} \DIFadd{and }\texttt{\DIFadd{b}} \DIFadd{respectively}\DIFaddend . With these three \DIFdelbegin \DIFdel{things its simple to imagine a situation where its desirable to }\DIFdelend \DIFaddbegin \DIFadd{values the obvious goal would be to }\DIFaddend compute a value of type \texttt{f c}. When \texttt{g} is \DIFdelbegin \DIFdel{``fmapped'' }\DIFdelend \DIFaddbegin \DIFadd{mapped }\DIFaddend over   \texttt{x}\DIFaddbegin \DIFadd{, }\DIFaddend a value of \DIFaddbegin \DIFadd{type }\DIFaddend \texttt{\DIFdelbegin \DIFdel{(}\DIFdelend f (b -> c)\DIFdelbegin \DIFdel{)}\DIFdelend } is returned.

\begin{figure}[t]
\begin{lstlisting}
g :: a -> b -> c
x :: f a
y :: f b
\end{lstlisting}
\caption{A function and \DIFdelbeginFL \DIFdelFL{some values in a functor}\DIFdelendFL \DIFaddbeginFL \DIFaddFL{two functors}\DIFaddendFL .}
\label{gxy}
\end{figure}

Unfortunately there is no way to either extract the function of type \texttt{(b -> c)} from the \DIFdelbegin \DIFdel{functor or apply the }\DIFdelend \DIFaddbegin \DIFadd{fuctor or apply }\DIFaddend \texttt{\DIFdelbegin \DIFdel{(}\DIFdelend f \DIFdelbegin \DIFdel{b)}%DIFDELCMD < } %%%
\DIFdel{value to the }%DIFDELCMD < \texttt{%%%
\DIFdelend (\DIFdelbegin \DIFdel{f (}\DIFdelend b -> c)\DIFdelbegin \DIFdel{)}\DIFdelend \DIFaddbegin } \DIFadd{to the values in }\texttt{\DIFadd{y}\DIFaddend } \DIFdelbegin \DIFdel{value }\DIFdelend using the \texttt{Functor} \DIFdelbegin \DIFdel{typeclass }\DIFdelend \DIFaddbegin \DIFadd{type class }\DIFaddend alone. Sequencing commands in this way requires a more powerful abstraction\DIFdelbegin \DIFdel{, applicative functors ~\mbox{%DIFAUXCMD
\cite{realWorldHaskell}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{. Applicative functors allow for this sort of sequencing of commands within a context~\mbox{%DIFAUXCMD
\citep{realWorldHaskell}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . 

In Haskell applicative functors are implemented by the \texttt{Applicative} \DIFdelbegin \DIFdel{typeclass}\DIFdelend \DIFaddbegin \DIFadd{type class}\DIFaddend . \texttt{Applicative} declares two functions, \texttt{pure} and \texttt{(<*>)}\DIFdelbegin \DIFdel{. The }\DIFdelend \DIFaddbegin \DIFadd{, the }\DIFaddend types of these two functions are shown in \DIFdelbegin \DIFdel{figure~\ref{appTypes}where }\texttt{\DIFdel{f}} %DIFAUXCMD
\DIFdel{is the applicative functor}\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{appTypes}}\DIFaddend . 

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < 

%DIFDELCMD < pure :: a -> f a
%DIFDELCMD < (<*>) :: f (a -> b) -> f a -> f b
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
class Functor f => Applicative f where
   pure :: a -> f a
   (<*>) :: f (a -> b) -> f a -> f b
\end{lstlisting}
\DIFaddendFL \caption{\DIFdelbeginFL \DIFdelFL{Applicative}\DIFdelendFL \DIFaddbeginFL \texttt{\DIFaddFL{Applicative}}\DIFaddendFL 's minimal complete definition}
\label{appTypes}
\end{figure}

The \texttt{pure} function is the \DIFdelbegin \DIFdel{equivalent of }\DIFdelend \DIFaddbegin \DIFadd{same type as }\DIFaddend monad's \texttt{return}\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{: }\DIFaddend it simply lifts a value into the applicative context. The other function \texttt{(<*>)}\DIFdelbegin \DIFdel{(}\DIFdelend \DIFaddbegin \DIFadd{, }\DIFaddend which is typically pronounced "applied over" or just "apply"\DIFdelbegin \DIFdel{). Apply take }\DIFdelend \DIFaddbegin \DIFadd{, takes }\DIFaddend in two arguments, both of which are applicative values\DIFaddbegin \footnote{\DIFadd{That is these values are of a type that is an instance of applicative.}}\DIFaddend . The first argument is a function, from types \texttt{a} to \texttt{b}, within the applicative context, and the second argument is an \texttt{Applicative} over type \texttt{a}. Apply will return an \texttt{Applicative} over \texttt{b}.

\subsection{Other useful functions}
\label{sec:useful}
Though \texttt{pure} and apply are the only \DIFdelbegin \DIFdel{two }\DIFdelend functions that are required to be defined to declare an instance of \DIFdelbegin \DIFdel{applicative }\DIFdelend \DIFaddbegin \texttt{\DIFadd{Applicative}} \DIFaddend there are several other useful functions that can either be derived from these two functions or come from other \DIFdelbegin \DIFdel{typeclasses }\DIFdelend \DIFaddbegin \DIFadd{type classes }\DIFaddend which will be briefly covered here. First\DIFaddbegin \DIFadd{, }\DIFaddend there are two \DIFdelbegin \DIFdel{variations }\DIFdelend \DIFaddbegin \DIFadd{special cases }\DIFaddend of apply, whose \DIFdelbegin \DIFdel{types are listed in figure~\ref{appVars} .
}\DIFdelend \DIFaddbegin \DIFadd{definitions are shown in Figure~\ref{appVars} along with the definitions of some helper functions.}\footnote{\texttt{\DIFadd{liftA2}} \DIFadd{and }\texttt{\DIFadd{(<\$)}} \DIFadd{are used in these definitions for performance reasons. The reasoning behind these decisions is described in the comments in the declaration of the }\texttt{\DIFadd{Applicative}} \DIFadd{type class, which can be found here: }\url{http://hackage.haskell.org/package/base-4.11.1.0/docs/src/GHC.Base.html\#Applicative}}
\DIFaddend 

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < (*>) :: f a -> f b -> f b
%DIFDELCMD < (<*) :: f a -> f b -> f a
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
(*>) :: Applicative f => f a -> f b -> f b
a1 *> a2 = (id <$ a1) <*> a2

(<*) :: Applicative f => f a -> f b -> f a
(<*) = liftA2 const

(<$) :: Functor f => a -> f b -> f a
(<$) = fmap . const

liftA2 :: Applicative f => (a -> b -> c) -> f a -> f b -> f c
liftA2 f x = (<*>) (fmap f x)

\end{lstlisting}
\DIFaddendFL \caption{Variations on apply.}
\label{appVars}
\end{figure}

These functions sequence actions and still perform the contextual effects of both of their arguments but discard the value of the first and second argument respectively. These functions are used when \DIFdelbegin \DIFdel{some }\DIFdelend \DIFaddbegin \DIFadd{an }\DIFaddend operation affects the applicative context but their result will not affect the overall result of the applicative expression. 

\begin{figure}[t]
\begin{lstlisting}
f :: Int -> IO Customer
f id = writeToLog id *> getCustomerById id
\end{lstlisting}
\caption{A function that logs its argument before returning the result.}
\label{logApp}
\end{figure}

The function \texttt{f}, from \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{logApp}, looks up a customer \DIFdelbegin \DIFdel{datatype }\DIFdelend \DIFaddbegin \DIFadd{data type }\DIFaddend from the \texttt{IO} context and logs all of the customer ids that are looked up. We don't really care what \texttt{writeToLog} returns but we do want the effects that it causes (writing a log file) so we compose the calls to \texttt{writeToLog} and \texttt{getCustomerById} with the \texttt{*>} operator to cause both of the functions' effects to happen but only return the value of \texttt{getCustomerById}.

Another useful function is the infix version of \texttt{fmap}, \texttt{(<\$>)}. A consequence of the applicative laws is that \DIFdelbegin \DIFdel{every applicative's functor instance }\DIFdelend \DIFaddbegin \DIFadd{the functor instance of every }\texttt{\DIFadd{Applicative}} \DIFaddend will satisfy the equation in  \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{fmapLaw}~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{control.applicative}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{control.applicative}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . This means that instead of explicitly lifting \DIFdelbegin \DIFdel{every pure function}\DIFdelend \DIFaddbegin \DIFadd{a pure function, }\DIFaddend the infix version of \texttt{fmap} can be used instead. The next section will cover how these functions can be used in an applicative style of programming.

\begin{figure}[t]
\begin{lstlisting}
f <$> x = pure f <*> x
\end{lstlisting}
\caption{All \texttt{Applicative}s have this property.}
\label{fmapLaw}
\end{figure}

 
\section{The Applicative Programming Style}
\label{sec:appProgStyle}

\begin{figure}[t]
\begin{lstlisting}
pure f <*> is_1 <*> ... <*> is_n
\end{lstlisting}
\caption{The applicative functor \DIFaddbeginFL \DIFaddFL{in }\DIFaddendFL canonical form.}
\label{canonForm}
\end{figure}

McBride and Paterson proved that any expression built from the applicative combinators can take the \DIFdelbegin \DIFdel{following }\DIFdelend canonical form in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{canonForm}~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{mcbrideIdioms}}\hspace{0pt}%DIFAUXCMD
:}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{mcbrideIdioms}}\hspace{0pt}%DIFAUXCMD
:}\DIFaddend \footnote{This notation is taken from~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{mcbrideIdioms} }\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{mcbrideIdioms} }\hspace{0pt}%DIFAUXCMD
}\DIFaddend where applicatives were called "idioms", hence the use of "\texttt{is}"\DIFaddbegin \DIFadd{.}\DIFaddend } \DIFdelbegin \DIFdel{Where the }\DIFdelend \DIFaddbegin \DIFadd{The }\DIFaddend \texttt{is}'s \DIFaddbegin \DIFadd{in Figure~\ref{canonForm} }\DIFaddend are computations within the applicative context\DIFdelbegin \DIFdel{applied to }\DIFdelend \DIFaddbegin \DIFadd{. These computations are the arguments of }\DIFaddend some pure function \DIFaddbegin \DIFadd{``}\DIFaddend \texttt{f}\DIFdelbegin \DIFdel{which }\DIFdelend \DIFaddbegin \DIFadd{'' that }\DIFaddend has been lifted into the context \DIFaddbegin \DIFadd{with the }\texttt{\DIFadd{pure}} \DIFadd{function}\DIFaddend . 

Context-free parsing is a good \DIFdelbegin \DIFdel{use case }\DIFdelend \DIFaddbegin \DIFadd{``use case'' }\DIFaddend of the applicative style and many examples in this chapter are taken from parsers defined using the parsec library~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{parsec}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{parsec}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . The first example of the applicative programming style is a function, from \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{parseMoney}, that parses money amounts of the form\DIFaddbegin \DIFadd{:
 }

 \DIFaddend \texttt{<currency symbol><whole currency amount>.<decimal amount>}
  \DIFdelbegin \DIFdel{. }\DIFdelend \DIFaddbegin 

\DIFaddend This parser would recognise "\$4.59" or "\textsterling64.56" and parse it into the \DIFdelbegin \DIFdel{data type in figure}\DIFdelend \DIFaddbegin \texttt{\DIFadd{Money}} \DIFadd{data type as defined in Figure}\DIFaddend ~\ref{parseMoney}. The \texttt{parseMoney} function is in the canonical form as defined by \DIFdelbegin \DIFdel{~\mbox{%DIFAUXCMD
\cite{mcbrideIdioms}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{McBride and Paterson}\DIFaddend . The pure function \texttt{M} is lifted into the \texttt{CharParser} context and its three arguments are provided by three smaller parsers that handle the currency symbol, the whole amount, and the decimal amount respectively. 


\begin{figure}[t]
 \begin{lstlisting}%[frame=tblr]

data Currency = Dollar
                          | Pound
                          | Euro

data Money = M Currency Integer Integer

parseMoney :: CharParser () Money
parseMoney = M <$> parseCurrency <*> readWhole <*> readDecimal
 \end{lstlisting}
 \caption{Parsing Money}
 \label{parseMoney}
 \end{figure}

The only difference between the \texttt{readWhole} and \texttt{readDecimal} is that \texttt{readDecimal} has to consume the decimal point before reading the number. Instead of duplicating that number code\DIFaddbegin \DIFadd{, }\DIFaddend a small refactoring that lifts the parsing of the decimal into the \texttt{parseMoney} function \DIFdelbegin \DIFdel{which }\DIFdelend will allow us to reuse the \texttt{readWhole} function\DIFaddbegin \DIFadd{, }\DIFaddend as seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{altParseMoney}. In this definition of \texttt{parseMoney} the parsed decimal character is discarded because of the use of \texttt{<*} rather than the normal apply.

\begin{figure}[t]
 \begin{lstlisting}
parseMoney :: CharParser () Money
parseMoney = M <$> parseCurrency <*> readWhole <* char '.' <*> readWhole
 \end{lstlisting}
 \caption{An alternate \texttt{parseMoney} definition.}
 \label{altParseMoney}
\end{figure}

 
 \begin{figure}[t] 
  \begin{lstlisting}
parseMoney :: CharParser () Money
parseMoney = M <$> parseCurrency <*> readWhole <*> char '.' *> readWhole
 \end{lstlisting}
 \caption{\DIFdelbeginFL \DIFdelFL{The }\DIFdelendFL \DIFaddbeginFL \DIFaddFL{A non-well typed }\DIFaddendFL definition of \texttt{parseMoney}\DIFdelbeginFL \DIFdelFL{is not well typed}\DIFdelendFL .}
 \label{typeParseErr}
 \end{figure}

All of the variations of apply\footnote{As in \texttt{(<*>)}, \texttt{(<*)}, and \texttt{(*>)}.} are left associative so \DIFaddbegin \DIFadd{that }\DIFaddend the definition of \texttt{parseMoney} in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{typeParseErr} causes a type error. This error can be corrected by parenthesising the subexpression\DIFdelbegin %DIFDELCMD < \\ %%%
\DIFdel{"}\DIFdelend \DIFaddbegin \DIFadd{:
}

\begin{center}
	 \DIFaddend \texttt{char \textquotesingle.\textquotesingle~*> readWhole}
\DIFdelbegin \DIFdel{".
 }\DIFdelend \DIFaddbegin \end{center}
 \DIFaddend 

The canonical style of applicative functions\DIFaddbegin \DIFadd{, as defined by~\mbox{%DIFAUXCMD
\citep{mcbrideIdioms}}\hspace{0pt}%DIFAUXCMD
, }\DIFaddend may not be the most idiomatic way to define things. The function from \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{strParse} parses strings surrounded by double quotes. \texttt{parseStr} does not match the canonical form because no lifted pure function is applied to the rest of the applicative chain. This function could be transformed to canonical form by prepending "\texttt{id <\$>}\DIFdelbegin \DIFdel{.}\DIFdelend "\DIFaddbegin \DIFadd{, in practice this has no purpose but it shows that any expression constructed using the applicative operators can be transformed to the canonical form.
}\DIFaddend 


\begin{figure}[t]
\begin{lstlisting}
parseStr :: CharParser () String 
parseStr = char '"' *> (many1 (noneOf "\"")) <* char '"'
\end{lstlisting}
\caption{A parser for string literals.}
\label{strParse}
\end{figure}


The examples covered in this section give a basic introduction to programming in an applicative style. Next we will discuss how the Haskell community has begun to use \texttt{Applicative}s in practice. 

\section{Applicative in practice}
\label{sec:appInPractice}

\DIFdelbegin \DIFdel{To inform the development of this refactoring, is is }\DIFdelend \DIFaddbegin \DIFadd{It is }\DIFaddend helpful to understand how the Haskell community has adopted and \DIFdelbegin \DIFdel{used }\DIFdelend \DIFaddbegin \DIFadd{uses }\DIFaddend the applicative interface \DIFdelbegin \DIFdel{. }\DIFdelend \DIFaddbegin \DIFadd{to help inform the development of the refactorings in this chapter. If the Haskell community has fully embraced the addition of applicative functors and use them regularly, then a refactoring that rewrites the monadic operations to use applicative ones is not as useful.}\footnote{\DIFadd{It could still be useful for updating a legacy code base.}} 

\DIFaddend The simplest question we wanted to answer was how many projects are actually taking advantage of the \texttt{Applicative} interface versus using the suggested implementation. When \texttt{Applicative} was made a superclass of \texttt{Monad} an implementation of the \texttt{Applicative} interface was suggested\DIFdelbegin \DIFdel{, which }\DIFdelend \DIFaddbegin \DIFadd{~\mbox{%DIFAUXCMD
\citep{applicativeProposal}}\hspace{0pt}%DIFAUXCMD
. This implementation }\DIFaddend allowed programmers to get their existing \texttt{Monad} instances to re-compile successfully with minimum effort. We are interested in how many instances use this suggested implementation\DIFaddbegin \DIFadd{, }\DIFaddend which is defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{appSugImp}.

\begin{figure}[t]
\begin{lstlisting}
pure = return
(<*>) = ap
\end{lstlisting}
\caption{The suggested implementation of \texttt{Applicative}}
\label{appSugImp}
\end{figure}

\subsection{Methodology}

Hackage is the main package repository for open source Haskell code~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{hackage}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{hackage}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . The code in Hackage is broken up into \DIFdelbegin \DIFdel{10,203 packages}\DIFdelend \DIFaddbegin \DIFadd{12,601 packages}\DIFaddend \footnote{As of \DIFdelbegin \DIFdel{10 September 2016.}\DIFdelend \DIFaddbegin \DIFadd{14 May 2018.}\DIFaddend } and within these packages there are \DIFdelbegin \DIFdel{102,597 }\DIFdelend \DIFaddbegin \DIFadd{144,413 }\DIFaddend Haskell source files\DIFdelbegin \DIFdel{. We made a local mirror of all of these Hackage packages so that the source files could be easily searched.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{We searched each package in parallel. Within each package every "}\texttt{\DIFdel{.hs}}%DIFAUXCMD
\DIFdel{" file was parsed using the }\textit{\DIFdel{ghc-exactprint}} %DIFAUXCMD
\DIFdel{library.}\footnote{\DIFdel{Unfortunately }\textit{\DIFdel{ghc-exactprint}} %DIFAUXCMD
\DIFdel{cannot parse literate Haskell ("}\texttt{\DIFdel{.lhs}}%DIFAUXCMD
\DIFdel{") files so they were excluded}} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{Once we had the abstract syntax tree we could use generic traversal combinators provided by }\textit{\DIFdel{Scrap Your Boilerplate}} %DIFAUXCMD
\DIFdel{(SYB) to search the parsed files for implementations of the }\texttt{\DIFdel{Monad}} %DIFAUXCMD
\DIFdel{and }\texttt{\DIFdel{Applicative}} %DIFAUXCMD
\DIFdel{typeclasses}\DIFdelend \DIFaddbegin \DIFadd{, of which 133,246 (about 92\% of the total) were successfully parsed and searched}\DIFaddend . 

\DIFdelbegin \DIFdel{Unfortunately of all the source files, 6,590 failed to parse. These failures were due to }\DIFdelend \DIFaddbegin \DIFadd{Parsing failures were caused by }\DIFaddend two main reasons. First \textit{ghc-exactprint} cannot parse C or C++ code so any files with  \DIFaddbegin \DIFadd{embedded }\DIFaddend C/C++ \DIFdelbegin \DIFdel{codeblocks }\DIFdelend \DIFaddbegin \DIFadd{code blocks }\DIFaddend were not searched. The second major reason for failure was modules not including language pragmas at the top of each file. A feature of the Cabal build system allows turning on language extensions at the package level rather than the module level: since we are parsing individual files rather than entire projects the parser does not realise that certain language extensions may be turned on. \DIFdelbegin %DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Even with these failures we were still able to parse more than 90}\DIFdelend \DIFaddbegin \DIFadd{Parsing failures only happened about in about 8}\DIFaddend \% of the \DIFdelbegin \DIFdel{Haskell source files contained in Hackage.
}\DIFdelend \DIFaddbegin \DIFadd{files so there was no great need to modify ghc-exactprint to handle these cases.
}

\DIFadd{Within each package every "}\texttt{\DIFadd{.hs}}\DIFadd{" file was parsed using the }\textit{\DIFadd{ghc-exactprint}} \DIFadd{library.}\footnote{\DIFadd{Unfortunately }\textit{\DIFadd{ghc-exactprint}} \DIFadd{cannot parse literate Haskell ("}\texttt{\DIFadd{.lhs}}\DIFadd{") files so they were excluded}} \DIFadd{Once we had the abstract syntax tree we could use generic traversal combinators provided by }\textit{\DIFadd{Scrap Your Boilerplate}} \DIFadd{(SYB) to search the parsed files for implementations of the }\texttt{\DIFadd{Monad}} \DIFadd{and }\texttt{\DIFadd{Applicative}} \DIFadd{type classes. }\DIFaddend From the successfully parsed files a total of \DIFdelbegin \DIFdel{1,810 }\DIFdelend \DIFaddbegin \DIFadd{2,376 }\DIFaddend \texttt{Applicative} and \DIFdelbegin \DIFdel{1,727 }\DIFdelend \DIFaddbegin \DIFadd{2,126 }\DIFaddend \texttt{Monad} instance declarations were found.

\subsection{Search Results}

\DIFdelbegin \DIFdel{The found }\DIFdelend \DIFaddbegin \DIFadd{Once all the }\DIFaddend \texttt{Applicative} instances \DIFdelbegin \DIFdel{in 549 cases were defined using the suggested implementation of }\DIFdelend \DIFaddbegin \DIFadd{were discovered it was much easier to search through them to try and understand how the Haskell community has been using this type class. The }\texttt{\DIFadd{Applicative}} \DIFadd{type class is an interesting case of programming language design and management. A language feature, in this case }\texttt{\DIFadd{Monads}}\DIFadd{, was accepted and even celebrated by the language's community. Then, twenty-five years into the language's life, to continue use this popular feature users have to modify their code. 
}

\DIFadd{The question that this search is trying to answer is: how has the Haskell community adapted to having to write }\texttt{\DIFadd{Applicative}} \DIFadd{instances for every }\texttt{\DIFadd{Monad}} \DIFadd{instance? Are programmers taking advantage of what the new type class can offer? 
}

\DIFadd{So, how has the Haskell community adapted to the forced introduction to applicative functors? The reaction seems to be mixed. Of those 2,376 }\texttt{\DIFadd{Applicative}} \DIFadd{instances the search discovered 841 directly defined apply as: }\DIFaddend \texttt{(<*>) = ap}. \DIFaddbegin \DIFadd{This is the suggested implementation for apply. 
 }\DIFaddend 

Beyond the instances of \texttt{Applicative} that \DIFdelbegin \DIFdel{literally }\DIFdelend \DIFaddbegin \DIFadd{simply }\DIFaddend define apply to be \texttt{ap} there are several other \DIFdelbegin \DIFdel{ways that effectively do the same thing. For example }\texttt{\DIFdel{Applicative}} %DIFAUXCMD
\DIFdel{instances can be automatically derived using the }\texttt{\DIFdel{GeneralizedNewtypeDeriving}} %DIFAUXCMD
\DIFdel{language extension. 
}%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < instance Monad m => Applicative (Sink i m) where
%DIFDELCMD <     pure = return
%DIFDELCMD <     af <*> s = do 
%DIFDELCMD <         f <- af
%DIFDELCMD <         v <- s
%DIFDELCMD <         return (f v)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{The applicative instance of }\texttt{\DIFdelFL{Sink}} %DIFAUXCMD
\DIFdelFL{from }\textit{\DIFdelFL{Sousit}} %DIFAUXCMD
\DIFdelFL{version 0.4}}
%DIFAUXCMD
%DIFDELCMD < \label{sousitAp}
%DIFDELCMD < \end{figure}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{There are also ways to define }\texttt{\DIFdel{(<*>)}} %DIFAUXCMD
\DIFdel{that duplicate }\DIFdelend \DIFaddbegin \DIFadd{instances that essentially copy }\DIFaddend the definition of \texttt{ap}\DIFdelbegin \DIFdel{without literally defining }\texttt{\DIFdel{(<*>) = ap}}%DIFAUXCMD
\DIFdelend . For example\DIFdelbegin \DIFdel{one definition that was found is shown in figure~\ref{sousitAp} , this }\DIFdelend \DIFaddbegin \DIFadd{, Figure~\ref{sousitAp} shows the }\texttt{\DIFadd{Applicative}} \DIFadd{definition of }\texttt{\DIFadd{Sink}} \DIFadd{from the Sousit library. This definition }\DIFaddend is an exact \DIFdelbegin \DIFdel{copy }\DIFdelend \DIFaddbegin \DIFadd{duplicate }\DIFaddend of the definition of \texttt{ap}. \DIFdelbegin \DIFdel{This means that the number of }\DIFdelend \DIFaddbegin \DIFadd{There are other ways that this functionality can be duplicated, for example, the applicative instance in Figure~\ref{appWithBind} shows another pattern that several packages used. 
}

\DIFadd{There could be other }\texttt{\DIFadd{Monad}} \DIFadd{and }\DIFaddend \texttt{Applicative} instances that \DIFdelbegin \DIFdel{actually use the suggested implementation is higher than the number found by our search }\DIFdelend \DIFaddbegin \DIFadd{this search couldn't pick up. With the }\texttt{\DIFadd{GeneralizedNewtypeDeriving}} \DIFadd{language extension }\texttt{\DIFadd{Applicative}} \DIFadd{and }\texttt{\DIFadd{Monad}} \DIFadd{instances can be automatically derived by the compiler with their most obvious implementations~\mbox{%DIFAUXCMD
\citep{genDeriveBlog}}\hspace{0pt}%DIFAUXCMD
. Though it is unclear how often this extension is used, as there seems to be some community pushback about using it because it could cause unsafe type coercions~\mbox{%DIFAUXCMD
\citep{genDeriveUnsafe}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . 
\DIFdelbegin \DIFdel{Note that by defining apply to be }\texttt{\DIFdel{ap}}%DIFAUXCMD
\DIFdel{, this instance of Applicative actually monadic.
}\DIFdelend 

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < %[frame=tblr]
%DIFDELCMD < 

%DIFDELCMD < ap :: (Monad m) => m (a -> b) -> m a -> m b
%DIFDELCMD < ap m1 m2 = m1 >>=
%DIFDELCMD <       (\x1 -> m2 >>=
%DIFDELCMD <         (\x2 -> return (x1 x2)))
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
instance Monad m => Applicative (Sink i m) where
    pure = return
    af <*> s = do 
        f <- af
        v <- s
        return (f v)
\end{lstlisting}
\DIFaddendFL \caption{The \DIFdelbeginFL \DIFdelFL{definition }\DIFdelendFL \DIFaddbeginFL \DIFaddFL{applicative instance }\DIFaddendFL of \DIFdelbeginFL \texttt{\DIFdelFL{ap}}%DIFAUXCMD
\DIFdelendFL \DIFaddbeginFL \texttt{\DIFaddFL{Sink}} \DIFaddFL{from }\textit{\DIFaddFL{Sousit}} \DIFaddFL{version 0.4.}\DIFaddendFL }
\DIFdelbeginFL %DIFDELCMD < \label{apDefn}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \label{sousitAp}
\DIFaddendFL \end{figure}

\DIFdelbegin \DIFdel{The definition of }\DIFdelend \DIFaddbegin \begin{figure}[t]
\begin{lstlisting}
instance Applicative TypeCheckMonad where
    pure x    = TCM $ \_ _ _ -> Right x
    mf <*> mx = mf >>= \f -> fmap f mx
\end{lstlisting}
\caption{\DIFaddFL{The applicative instance of }\texttt{\DIFaddFL{TypeCheckMonad}} \DIFaddFL{from }\textit{\DIFaddFL{Hakaru}} \DIFaddFL{version 0.6.0.}}
\label{appWithBind}
\end{figure}

\DIFadd{One interesting thing that the search discovered is that only 697 instances of }\texttt{\DIFadd{Applicative}} \DIFadd{defined }\texttt{\DIFadd{pure}} \DIFadd{as }\texttt{\DIFadd{return}}\DIFadd{. This is fewer than the number of instances that define apply as }\DIFaddend \texttt{ap}\DIFdelbegin \DIFdel{(see figure~\ref{apDefn}) forces }\texttt{\DIFdel{m1}} %DIFAUXCMD
\DIFdel{to be evaluated strictly before evaluating }\texttt{\DIFdel{m2}}%DIFAUXCMD
\DIFdel{, whereas apply has no such restriction. The arguments to apply could be computed in parallel, as in~\mbox{%DIFAUXCMD
\cite{haxl}}\hspace{0pt}%DIFAUXCMD
. Even if not evaluated in parallel, using }\DIFdelend \DIFaddbegin \DIFadd{. It is surprising that implementers take the time to define a pure instance but then define apply as }\texttt{\DIFadd{ap}}\DIFadd{. Though, it's impossible to know exactly why people are doing this but it maybe has to do with the Haskell communities understanding and comfort with how to define }\texttt{\DIFadd{pure}} \DIFadd{for their types versus apply. 
}

\DIFadd{In total this search discovered 868 instances of }\texttt{\DIFadd{Applicative}} \DIFadd{where either apply was directly defined to be }\texttt{\DIFadd{ap}} \DIFadd{or }\texttt{\DIFadd{ap}}\DIFadd{'s definition was replicated in some way. This means that roughly 35\% of }\DIFaddend the \texttt{Applicative} \DIFdelbegin \DIFdel{interface rather than the }\texttt{\DIFdel{Monadic}} %DIFAUXCMD
\DIFdel{can keep static values from being re-evaluated because the two sides of the apply operator are not dependent on one another which saves heap space~\mbox{%DIFAUXCMD
\cite{errParsers}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{definitions simply use the suggested implementation. The following subsection will look at some other common patterns that go beyond the most basic }\texttt{\DIFadd{Applicative}} \DIFadd{implementation}\DIFaddend .

\subsection{Common Implementation Patterns}

In addition to searching how many instances use the standard implementation, the search also was used to discover the different ways \texttt{Applicative} instances were being implemented. Are there any general patterns that emerge?

Finding general patterns like this is a much more complex task than just searching for calls to \texttt{ap} in the definition of apply. All of the Applicative implementations were written into a report and the report was manually analysed for implementation patterns that cropped up multiple times. 

The first common implementation pattern simply pattern matches on the type's constructor to unwrap inner values and apply them to each other. The \texttt{Applicative} instance definition in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{sgplusPair} is an example of this approach. 

\begin{figure}[t]
\begin{lstlisting}
instance Applicative Pair where
 pure a = Pair (a, a)
 (<*>) (Pair (fa, fb)) (Pair (a, b)) = Pair (fa a, fb b)
\end{lstlisting}
\caption{The applicative instance from \textit{SGplus} version 1.1}
\label{sgplusPair}
\end{figure}

Another pattern that was used several times constrains an inner type to be \texttt{Applicative} as well and just passes off the work to that inner \texttt{Applicative} instance. The example in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{dockerApp} shows how this can be defined.

\begin{figure}[t]
\begin{lstlisting}
instance Applicative m => Applicative (DockerT m) where
    pure a = DockerT $ pure a
    (<*>) (DockerT f) (DockerT v) =  DockerT $ f <*> v
\end{lstlisting}
\caption{Applicative instance from \textit{Docker} version 0.3.0.0}
\label{dockerApp}
\end{figure}

\DIFaddbegin \subsection{Conclusions}

\DIFadd{The search results are inconclusive. It is hard to make firm judgements about people's familiarity with a complex type class just from the implementation of the type class alone. However taken in combination with other results, such as the work done in~\mbox{%DIFAUXCMD
\cite{applicativeDo}}\hspace{0pt}%DIFAUXCMD
, which takes the approach that a single universal notation is easier to use, and the long history of people having trouble understanding monads~\mbox{%DIFAUXCMD
\citep{monadTutorialFallacy}}\hspace{0pt}%DIFAUXCMD
, it appears that programmers do struggle with type classes like monads and applicative functors. Additionally, the popularity of the default }\texttt{\DIFadd{Applicative}} \DIFadd{definition could suggest that the Haskell community has not completely adjusted to the type class.
}

\DIFadd{At the same time it is possible that most monad instances are more monadic than applicative and so spending time defining unique applicative instances may only be useful for a few types. The rest of this chapter will describe this refactoring, its implementation, preconditions, and some related refactorings that transforms code so that it may be able to pass the preconditions. The next section will describe the design of this refactoring.
}

\DIFaddend \section{Refactoring Monadic Programs to use Applicative}
\label{sec:appRefact}

This section will describe how the refactoring of monadic code to the applicative style works via a series of examples. Many of these examples are taken from \DIFaddbegin \DIFadd{case studies of }\DIFaddend the parser for money amounts and a JSON parser.\footnote{The full source code for these parsers can be found at \url{https://goo.gl/mrCdFh} and \url{https://goo.gl/ysWmSS}} 

\begin{figure}[t]
\begin{lstlisting}
parseStr :: CharParser () String
parseStr = do
	char '"'
	str <- many1 (noneOf "\"")
	char '"'
	return str
\end{lstlisting}
\caption{A string literal parser.}
\label{strLit}
\end{figure}

The first example will refactor a parser for string literals, which is defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{strLit}. This parser first consumes a double quote character(\texttt{char \textquotesingle"\textquotesingle}) then parses one or more characters that are not double quotes and assigns those characters to the variable named \texttt{str}\footnote{This line is composed of two parser combinators, \texttt{many1}, and \texttt{noneOf}. \texttt{many1} takes another parser as its argument and applies it one or more times returning a list of the results. \texttt{noneOf} takes in a list of characters and succeeds if the current character is not in the provided list; that character is then returned.}. Finally the closing quote is consumed and \texttt{str} is returned. The refactored version of the function is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{strLit_ref}.

\begin{figure}[t]
\begin{lstlisting}
parseStr :: CharParser () String
parseStr = char '"' *> (many1 (noneOf "\"")) <* char '"'
\end{lstlisting}
\caption{The refactored string literal parser}
\label{strLit_ref}
\end{figure}

The refactoring in this case is relatively simple. Since the third and fifth lines of the original function are not bound to a variable\DIFaddbegin \DIFadd{, }\DIFaddend we know that the values returned by those functions will be ignored, hence the use of the \texttt{*>} and \texttt{<*} operators. This way the double \DIFdelbegin \DIFdel{quotes }\DIFdelend \DIFaddbegin \DIFadd{quote }\DIFaddend characters will still be parsed but the values returned by those parsers will be discarded. The original 
function \DIFdelbegin \DIFdel{just }\DIFdelend returns the variable \texttt{str}, without calling any pure functions \DIFaddbegin \DIFadd{on it}\DIFaddend . There will therefore be no pure function application to the left of the effectful functions. 

This is a \DIFdelbegin \DIFdel{fairly simple }\DIFdelend \DIFaddbegin \DIFadd{straightforward }\DIFaddend function to convert to applicative style. The next example is a little more complex. The function in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{pMoney2} is a slight modification of the money parser from Section~\ref{sec:appProgStyle} as well. This version of the parser makes the decimal amount optional, so it will recognize both "\textsterling 10.35" and "\texteuro 4".

\begin{figure}[t]
\begin{lstlisting}

parseMoney :: CharParser () Money
parseMoney = do
   currency <- parseCurrency 
   whole <- many1 digit
   decimal <- (option "0" (do { 
                           char '.';
                           d <- many1 digit;
                           return d}))
   return $ M currency (read whole) (read decimal)
\end{lstlisting}
\caption{\texttt{parseMoney} version 2.}
\label{pMoney2}
\end{figure}

The \texttt{parseMoney} function parses text into the \DIFdelbegin \DIFdel{same }\DIFdelend \texttt{Money} data type from Section~\ref{sec:appProgStyle}. The \texttt{option} parser allows for the decimial to become optional. The \texttt{option} combinator will \DIFaddbegin \DIFadd{first }\DIFaddend attempt to apply the parser which is given as its second argument\DIFdelbegin \DIFdel{first}\DIFdelend . If this parser fails then \texttt{option} returns the value it was given in \DIFdelbegin \DIFdel{it's }\DIFdelend \DIFaddbegin \DIFadd{its }\DIFaddend first argument.

\begin{figure}[t]
\begin{lstlisting}
parseMoney :: CharParser () Money
parseMoney = M <$> parseCurrency 
		             <*> (read <$> many1 digit) 
		             <*> (read <$> option "0" (do { 
                           char '.';
                           d <- many1 digit;
                           return d}))
\end{lstlisting}
\caption{The first attempt at refactoring \texttt{parseMoney}.}
\label{parseMon_ref1}
\end{figure}


The return statement in the monadic definition of \texttt{parseMoney} calls the \DIFdelbegin \DIFdel{function }\DIFdelend \DIFaddbegin \DIFadd{constructor }\DIFaddend \texttt{M} with the three values that come from the monadic context. Since \texttt{M} is a pure function, it is placed at the front of the chain of applicative operations, composed with \texttt{(<\$>)}. The \texttt{whole} and \texttt{decimal} variables are \DIFdelbegin \DIFdel{being applied }\DIFdelend \DIFaddbegin \DIFadd{passed as arguments }\DIFaddend to \texttt{read}, a pure function, so these values will be composed with \texttt{read} using the \texttt{(<\$>)} operator. The refactored definition of \texttt{parseMoney} is defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{parseMon_ref1}.

The inner \texttt{do} block that is passed as the second argument to \texttt{option} is also a valid target of this refactoring. The result from applying this refactoring to this \texttt{do} block is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{parseMon_ref2}.

\begin{figure}[t]
 \begin{lstlisting}%[frame=tlrb]

parseMoney :: CharParser () Money
parseMoney = 
   M <$> parseCurrency 
	   <*> (read <$> many1 digit) 
	   <*> (read <$> option "0" (char '.' *> many1 digit))
\end{lstlisting}
\caption{\texttt{parseMoney} after inner do refactoring.}
\label{parseMon_ref2}
\end{figure}

\subsection{Splitting the Applicative Chain}
As chains of applicative computations can get more complicated it is useful or indeed mandatory to parenthesize parts of the function. The \texttt{objEntry} \DIFdelbegin \DIFdel{f unction in figure}\DIFdelend \DIFaddbegin \DIFadd{function in Figure}\DIFaddend ~\ref{objEntry} parses a single entry from a JSON object. Each entry consists of a string key and a value which can be any valid JSON value, separated by a colon, and stores the key and value in a tuple. 

\begin{figure}[t]
\begin{lstlisting}
objEntry = (,) 
        <$> (spaces *> parseStr <* spaces <* char ':') 
        <*> (spaces *> parseJVal <* spaces)
\end{lstlisting}
\caption{A JSON object parser.}
\label{objEntry}
\end{figure}

When \DIFdelbegin \DIFdel{there are a large amount of computations that do not affect the final value of the function there can be }\DIFdelend \DIFaddbegin \DIFadd{the applicative chain has a number of functions composed with the }\texttt{\DIFadd{(*>)}} \DIFadd{or }\texttt{\DIFadd{(<*)}} \DIFadd{operators, there are often }\DIFaddend multiple valid ways to add \DIFdelbegin \DIFdel{parenthesis }\DIFdelend \DIFaddbegin \DIFadd{parentheses }\DIFaddend to the function. \DIFdelbegin \DIFdel{The }\DIFdelend \DIFaddbegin \DIFadd{For example, the }\DIFaddend two definitions of \texttt{objEntry} in Figure~\ref{objEntry2} are equivalent.

The refactoring will produce the first version of this function, and in general will group each of the value-producing expressions (\texttt{parseStr} and \texttt{parseJVal} in this case) with the closest non-value producing statements. Every group contains a single value producing expression and zero or more non-value producing expressions on either side of the value producing expression. Each of these groups will then be surrounded by parentheses if it contains non-value producing statements, and the groups will be composed using the \texttt{<*>} operator. A more formal description of this is provided in Section~\ref{sec:refacImp}.


\begin{figure}[t]
\begin{lstlisting}%[frame=tblr]

objEntry = (,) 
        <$> (spaces *> parseStr <* spaces <* char ':') 
        <*> (spaces *> parseJVal <* spaces)

objEntry = (,) 
        <$> (spaces *> parseStr) 
        <*> (spaces *> char ':' *> spaces *> parseJVal <* spaces)
\end{lstlisting}
\caption{Different ways to separate the applicative chain.}
\label{objEntry2}
\end{figure}

\subsection{Preconditions}
\label{sec:appPrecons}

This refactoring has two \DIFdelbegin \DIFdel{fairly simple}\DIFdelend \DIFaddbegin \DIFadd{(simple) }\DIFaddend preconditions: first, the target function must be definable with the applicative interface \DIFdelbegin \DIFdel{not just the monadic interface }\DIFdelend \DIFaddbegin \DIFadd{(which will be explained below) }\DIFaddend and secondly, the order that variables are bound in the \texttt{do} block must correspond to the order in which they are used in the return statement.

\begin{figure}[t]
\begin{lstlisting}
(<*>) :: Applicative f => 
	f (a -> b) -> f a -> f b

(>>=) :: Monad m => 
   m a -> (a -> m b) -> m b
\end{lstlisting}
\caption{The types of apply and bind}
\label{appBind}
\end{figure} 

\DIFdelbegin \DIFdel{How can the first precondition be enforced}\DIFdelend \DIFaddbegin \DIFadd{What does this first condition actually mean}\DIFaddend ? Where is the line between applicative \DIFdelbegin \DIFdel{and monadic}\DIFdelend \DIFaddbegin \DIFadd{functors and monads}\DIFaddend ? Let's start by \DIFdelbegin \DIFdel{looking at }\DIFdelend \DIFaddbegin \DIFadd{comparing }\DIFaddend the type signatures of the bind and apply functions\DIFdelbegin \DIFdel{in figure}\DIFdelend \DIFaddbegin \DIFadd{, they are shown in Figure}\DIFaddend ~\ref{appBind}. One thing to keep in mind is that these two functions' arguments are in opposite order where the function type is the first argument to apply whereas the second argument to bind is a function. The difference between bind and apply is in the type of this function argument. The second argument of bind is a function that takes in a value of type \texttt{a} and returns an \texttt{m b} whereas apply receives an applicative functor that contains a function from \texttt{a} to \texttt{b}. This means that within a monadic context bind allows access to the pure value returned by a computation in the monad, while all of the arguments to apply are fully within the applicative context.

What does this mean in practice? Take for example the function \texttt{f} from \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{parseNThings}. The function \texttt{f} first parses a number \texttt{n} then parses \texttt{n} ``things.'' This function is not definable using applicative functors because \texttt{n} is \DIFdelbegin \DIFdel{applied }\DIFdelend \DIFaddbegin \DIFadd{an argument }\DIFaddend to \texttt{parseNThings} as a pure value. \DIFdelbegin \DIFdel{This first precondition will check }\DIFdelend \DIFaddbegin \DIFadd{The implementation of the first precondition checks }\DIFaddend that any values bound in the \texttt{do} statement are not used in \DIFdelbegin \DIFdel{the }\DIFdelend \DIFaddbegin \DIFadd{a }\DIFaddend right hand side \DIFdelbegin \DIFdel{of any expression other than }\DIFdelend \DIFaddbegin \DIFadd{expression before }\DIFaddend the \texttt{return} statement. 


\begin{figure}[t]
\begin{lstlisting}
f = do
	n <- parseN
	xs <- parseNThings n
	return xs
\end{lstlisting} 
\caption{A context dependent parser.}
\label{parseNThings}
\end{figure}

The second precondition is a bit more intuitive to grasp. Consider the function \texttt{g} in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{outOrd}. The naive implementation of the refactoring is in \DIFdelbegin \DIFdel{lines one and two of figure~\ref{outOrd_ref}}\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{outOrd_ref1}}\DIFaddend . This version of the refactoring maintains the order of statements from the \texttt{do} block but this causes a type error. Instead, if the refactoring swaps the order of the statements (\DIFdelbegin \DIFdel{lines five and six of figure~\ref{outOrd_ref}}\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{outOrd_ref2}}\DIFaddend ) the order that the contextual effects are performed changes, and the meaning of the refactored program is no longer the same as the source program so the refactoring is not valid. 

\begin{figure}[t]
\begin{lstlisting}
g :: Monad m => m (A,B)
g = do
	b <- getB
	a <- getA
	return (a,b)
\end{lstlisting}
\caption{A function with out of order bindings}
\label{outOrd}
\end{figure}

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < g :: Applicative a => a (A,B)
%DIFDELCMD < g = (,) <*> getB <*> getA
%DIFDELCMD < 

%DIFDELCMD < g :: Applicative a => a (A,B)
%DIFDELCMD < g = (,) <*> getA <*> getB
%DIFDELCMD < \end{lstlisting} 
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
g :: Applicative a => a (A,B)
g = (,) <*> getB <*> getA
\end{lstlisting}
\DIFaddendFL \caption{\DIFdelbeginFL \DIFdelFL{Two attempts }\DIFdelendFL \DIFaddbeginFL \DIFaddFL{This attempt }\DIFaddendFL at refactoring \DIFdelbeginFL \texttt{\DIFdelFL{g}}%DIFAUXCMD
\DIFdelendFL \DIFaddbeginFL \DIFaddFL{causes a type error}\DIFaddendFL .}
\DIFdelbeginFL %DIFDELCMD < \label{outOrd_ref}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \label{outOrd_ref1}
\DIFaddendFL \end{figure}

\DIFaddbegin \begin{figure}[t]
\begin{lstlisting}
g :: Applicative a => a (A,B)
g = (,) <*> getA <*> getB
\end{lstlisting} 
\caption{\DIFaddFL{This refactoring changes the order the effects happen in.}}
\label{outOrd_ref2}
\end{figure}

\DIFadd{These preconditions enforce subtly different things. The first precondition, that the target function needs to be expressible with the }\texttt{\DIFadd{Applicative}} \DIFadd{interface, is the obvious requirement that the target function needs to meet for the refactoring to be able to be performed successfully. The entire premise of this refactoring is to convert programs using the monadic interface to instead use the more general applicative one, so the target function must be expressible using that interface.
}

\DIFadd{The second precondition, especially when described as naively as it has been in this section, seems to also be mandatory for the refactoring to not change the behaviour of the target function. However, an additional transformation could be applied to }\texttt{\DIFadd{g}} \DIFadd{that would allow the generalisation refactoring to continue without type errors and preserve the ordering of effects, the introduction of a lambda expression into the }\texttt{\DIFadd{return}} \DIFadd{statement. The details of this transformation are described in Section~\ref{lambdaIntro}.
}

\DIFadd{The next section will discuss further refactorings that help modify potential target functions so that they can meet the preconditions discussed in this chapter. The second precondition can always be ``avoided'' if the programmer so chooses by applying an additional refactoring that wraps the expression that is applied to }\texttt{\DIFadd{return}} \DIFadd{in a lambda expression. The first precondition on the other hand cannot be simply avoided. The next section will discuss refactorings that can help maximise the amount of code that can be refactored, but code that does not pass the first precondition cannot be refactored.
}

\DIFaddend \subsection{Additional Refactorings}
\label{sec:otherRefs}
The preconditions described in the previous section, and the second one in particular, may seem overly strict and needlessly limit the scope of code to which the refactoring could be applied. Instead of developing a single monolithic refactoring that contains many separate cases\DIFdelbegin \DIFdel{the approach }\DIFdelend \DIFaddbegin \DIFadd{, the approach that }\DIFaddend HaRe takes is to develop multiple small refactorings that can be composed together. Composing refactorings together in this way is known as a composite refactoring~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{wranglerDomain}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{wranglerDomain}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend .

This section will describe a number of refactorings that will help transform (or ``prefactor'') code so that it is capable of passing the preconditions for the generalising applicative refactoring. 

\subsubsection{Extract monadic code}
\label{subSec:extract}

\begin{figure}[t]
\begin{lstlisting}%[frame=tblr]

f = do
	x <- getX
	b <- getB
	y <- if b then getY1 else getY2
	log y
	return (x,y)	
\end{lstlisting}
\caption{The function \texttt{f}, lines three through five can be extracted.}
\label{fMonad}
\end{figure}


The \texttt{f} function \DIFdelbegin \DIFdel{from figure}\DIFdelend \DIFaddbegin \DIFadd{in Figure}\DIFaddend ~\ref{fMonad} will not pass the preconditions because both \texttt{b} and \texttt{y} are extracted from the monadic context and used in a right hand side expression. However, lines three through five do not really affect the rest of the function so they could be refactored to their own function\DIFdelbegin \DIFdel{and then f could be rewritten applicatively. }\DIFdelend \DIFaddbegin \DIFadd{. The result of extracting these lines into their own function, }\texttt{\DIFadd{g}}\DIFadd{, is shown in Figure~\ref{gExtracted}. Now }\texttt{\DIFadd{f}} \DIFadd{is a valid target for generalising to the }\texttt{\DIFadd{Applicative}} \DIFadd{interface, the final version of this function is in Figure~\ref{fToApp}.
}\DIFaddend 

The extraction of monadic code does not have to create a top level variable; if the developer preferred the extracted definition could be in a \texttt{let} or \texttt{where} clause \DIFaddbegin \DIFadd{instead}\DIFaddend .

\DIFdelbegin %DIFDELCMD < \begin{figure}[h]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < %[frame=tblr]
%DIFDELCMD < 

%DIFDELCMD < f = (,) <$> getX <*> g
%DIFDELCMD < 

%DIFDELCMD < g = do
%DIFDELCMD < 	b <- getB
%DIFDELCMD < 	y <- if b then getY1 else getY2
%DIFDELCMD < 	log y
%DIFDELCMD < 	return y
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{figure}[t]
\begin{lstlisting}
f = do
	x <- getX
	y <- g
	return (x,y)

g = do
	b <- getB
	y <- if b then getY1 else getY2
	log y
	return y
\end{lstlisting}
\DIFaddendFL \caption{\texttt{g} was extracted from the original function \texttt{f} in Figure~\ref{fMonad}\DIFdelbeginFL \DIFdelFL{. Now }\texttt{\DIFdelFL{f}} %DIFAUXCMD
\DIFdelFL{can be rewritten using applicative.}\DIFdelendFL }\DIFaddbeginFL \DIFaddFL{.
}\label{gExtracted}
\DIFaddendFL \end{figure}
\DIFaddbegin 

\begin{figure}[t]
\begin{lstlisting}
f = (,) <$> getX <*> g

g = do
	b <- getB
	y <- if b then getY1 else getY2
	log y
	return y
\end{lstlisting}
\caption{\DIFaddFL{The final result of the refactoring with }\texttt{\DIFaddFL{f}} \DIFaddFL{rewritten using the }\texttt{\DIFaddFL{Applicative}} \DIFaddFL{interface.}}
\label{fToApp}
\end{figure}

\DIFaddend \subsubsection{Inline do blocks}

\begin{figure}[h]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < %[frame=tblr]
%DIFDELCMD < f = do
%DIFDELCMD < 	x <- result1
%DIFDELCMD < 	y <- result2
%DIFDELCMD < 	z <- result3
%DIFDELCMD < 	log z
%DIFDELCMD < 	return (x,y)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
f = do
	x <- result1
	y <- result2
	z <- result3
	log z
	return (x,y)
\end{lstlisting}
\DIFaddendFL \caption{Lines four and five can be merged into a single do block.}
\DIFaddbeginFL \label{logZFun}
\DIFaddendFL \end{figure}

Instead of extracting an entire function as in \DIFdelbegin \DIFdel{subsection~\ref{subSec:extract} }\DIFdelend \DIFaddbegin \DIFadd{the previous section, }\DIFaddend a developer may prefer just to inline a do block. This is useful if the monadic section of code is small.

\DIFdelbegin \DIFdel{Normally the }\DIFdelend \DIFaddbegin \DIFadd{Consider the function in Figure~\ref{logZFun}, normally the }\DIFaddend variable \texttt{z} being used again in line five would prevent the function from being refactored. Merging lines four and five into an inline do block would allow this function pass the preconditions as seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{inlineDo}. Now the entire function can be refactored to the version \DIFdelbegin \DIFdel{defined in figure}\DIFdelend \DIFaddbegin \DIFadd{shown in Figure}\DIFaddend ~\ref{inlineDo_ref}. Additional refactoring can \DIFaddbegin \DIFadd{then }\DIFaddend take place with the inner \texttt{do} block also undergoing refactoring\DIFdelbegin \DIFdel{.
}\DIFdelend \DIFaddbegin \DIFadd{, see Figure~\ref{zLog}
}\DIFaddend 

\begin{figure}[t]
\begin{lstlisting}
f = do
	x <- result1
	y <- result2
	do{z <- result3; log z}
	return (x,y)
\end{lstlisting}
\caption{\texttt{f} with two lines merged into an inline do block.}
\label{inlineDo}
\end{figure}

\begin{figure}[t]
\begin{lstlisting}
f = (,) 
    <$> result1 
    <*> (result2 <* do{z <- result3; log z})
\end{lstlisting}
\caption{The inline do allows the function to be rewritten with the applicative operators.}
\label{inlineDo_ref}
\end{figure}

\DIFaddbegin \begin{figure}[t]
\begin{lstlisting}
f = (,) 
    <$> result1
    <*> (result2 <* (log <*> result3))
\end{lstlisting}
\caption{\DIFaddFL{The inline do block can also be refactored to use the applicative interface.}}
\label{zLog}
\end{figure}

\DIFaddend \subsubsection{Introduce lambdas to reorder statements}
\DIFaddbegin \label{lambdaIntro}
\DIFaddend 

The other precondition to the \DIFdelbegin \DIFdel{generalise applicative }\DIFdelend \DIFaddbegin \DIFadd{``generalise applicative'' }\DIFaddend refactoring is the condition that all bound variables need to be extracted from the monadic context in the order they appear in the return statement. \DIFdelbegin \DIFdel{Take }\DIFdelend \DIFaddbegin \DIFadd{Reconsider }\DIFaddend the example given at the end of Section~\ref{sec:appPrecons} \DIFdelbegin \DIFdel{repeated in figure~\ref{ordering}}\DIFdelend \DIFaddbegin \DIFadd{(repeated in Figure~\ref{ordering})}\DIFaddend .

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < %[frame=tblr]
%DIFDELCMD < 

%DIFDELCMD < f = do
%DIFDELCMD < 	b <- getB
%DIFDELCMD < 	a <- getA
%DIFDELCMD < 	return (a,b)
%DIFDELCMD < \end{lstlisting} 
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}

f = do
	b <- getB
	a <- getA
	return (a,b)
\end{lstlisting} 
\DIFaddendFL \caption{The order that \texttt{a} and \texttt{b} are bound fails the precondition}
\label{ordering}
\end{figure}


Since \texttt{a} appears before \texttt{b} in the return statement but \texttt{b} is bound before \texttt{a} this function would not pass the ordering precondition. If this return statement was refactored to include a lambda that flipped the arguments it could pass this precondition. \DIFaddbegin \DIFadd{The result of applying this refactoring to the function from Figure~\ref{ordering} can be seen in Figure~\ref{reordered}.
}\DIFaddend 

\DIFaddbegin \begin{figure}[t]
\DIFaddendFL \begin{lstlisting}
f = do
	b <- getB
	a <- getA
	return (\ b a -> (a,b)) b a
\end{lstlisting}
\DIFaddbeginFL \caption{\DIFaddFL{The lambda expression in the }\texttt{\DIFaddFL{return}} \DIFaddFL{statement allows this function to pass the preconditions.}}
\label{reordered}
\end{figure}
\DIFaddend 

\DIFdelbegin \DIFdel{The new definition of f would be generalised like so:
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \begin{figure}[t]
\DIFaddendFL \begin{lstlisting}
f = (\ b a -> (a,b)) <$> getB <*> getA
\end{lstlisting}
\DIFaddbeginFL \caption{\DIFaddFL{The final }\texttt{\DIFaddFL{Applicative}} \DIFaddFL{implementation of }\texttt{\DIFaddFL{f}}\DIFaddFL{.}}
\label{finalF}
\end{figure}
\DIFaddend 

\DIFaddbegin \DIFadd{Finally the function }\texttt{\DIFadd{f}}\DIFadd{, after applying the lambda reordering refactoring, can be fully rewritten using the }\texttt{\DIFadd{Applicative}} \DIFadd{interface. The final result of this chain of refactorings can be seen in Figure~\ref{finalF}.
}


\DIFaddend The previous three refactorings are designed to help expand the scope over which the \DIFdelbegin \DIFdel{refactoring for generalising to applicative}\DIFdelend \DIFaddbegin \DIFadd{``generalising applicative'' refactoring }\DIFaddend would work. As we said at the beginning of this section, we feel that the composite refactoring approach is better than for generalise applicative automatically to apply the refactorings described in this section. In many cases, though technically correct, the refactorings in this section can produce \DIFdelbegin \DIFdel{very }\DIFdelend verbose and complicated code: for example, \DIFdelbegin \DIFdel{imagine }\DIFdelend a lambda expression with many more arguments than two, or \DIFdelbegin \DIFdel{trying to inline more complicated }\DIFdelend \DIFaddbegin \DIFadd{an inlined complex }\DIFaddend \texttt{do} \DIFdelbegin \DIFdel{expressions}\DIFdelend \DIFaddbegin \DIFadd{expression}\DIFaddend . Producing less readable code  defeats the purpose of refactoring in the first place. By taking this composite approach,  a software engineer is required to be \DIFdelbegin \DIFdel{much more }\DIFdelend involved in each step of the refactoring, and so to be \DIFdelbegin \DIFdel{more aware }\DIFdelend \DIFaddbegin \DIFadd{in control }\DIFaddend of the output at each step, too. 

The composite approach also makes it much easier for the refactoring to be extended by others. If there are other similar refactorings than we discussed here it is much simpler for another programmer to \DIFdelbegin \DIFdel{just }\DIFdelend implement their own stand-alone refactoring  than \DIFdelbegin \DIFdel{figure }\DIFdelend \DIFaddbegin \DIFadd{Figure }\DIFaddend out where in our implementation of the generalise applicative refactoring their code should be inserted. 

\section{Implementation of the Refactoring}
\label{sec:refacImp}

The previous section informally discussed \DIFdelbegin \DIFdel{how the }\DIFdelend \DIFaddbegin \DIFadd{the ``}\DIFaddend generalise monad to applicative\DIFdelbegin \DIFdel{refactoring in general}\DIFdelend \DIFaddbegin \DIFadd{'' refactoring}\DIFaddend . In practice there are quite a few options that implementers will make when writing this refactoring, including how parentheses are inserted, for example. This section will discuss \DIFdelbegin \DIFdel{more formally }\DIFdelend \DIFaddbegin \DIFadd{in more detail }\DIFaddend how HaRe implements the ``generalise to applicative'' refactoring.

\DIFdelbegin %DIFDELCMD < \subsection{Implementation of the Generalising to Applicative Refactoring}
%DIFDELCMD < %%%
\DIFdelend \begin{figure}[t]
	\begin{math}
		\begin{aligned}
\textbf{Expressions}\\
e \in Expr\ &::=\ v\qquad &Variable\\
&|\ e_1 e_2\\
&|\ \lambda p \rightarrow e\\
&|\ (e_1, \ldots, e_n) &n \geq 2\\
&|\ \textbf{do}\ l\ e\\
&|\ \ldots\\
\textbf{Patterns}\\
p \in Pat\ &::=\ v\\
&|\ (p_1, \ldots, p_n) &n \geq 2\\
&|\ \ldots \\
\textbf{Statement Sequences}\\
l \in Stmts\ &::=\ \{s_1, \ldots, s_n\} &n \geq 1\\
\textbf{Statements}\\
s \in Stmt\ &::=\ p \leftarrow e\\
&|\ e
		\end{aligned}
	\end{math}
	\caption{\DIFdelbeginFL \DIFdelFL{Syntax }\DIFdelendFL \DIFaddbeginFL \DIFaddFL{Input syntax for }\DIFaddendFL the refactoring\DIFdelbeginFL \DIFdelFL{works over.}\DIFdelendFL }
	\label{syntax}
\end{figure}

The refactoring takes two arguments: the filepath of the file to be refactored, and the row and column in the file of the start of the function to be refactored. The rest of this section will go through the checking of preconditions and building the applicative expression more thoroughly. The syntax of the target function \DIFdelbegin \DIFdel{has been }\DIFdelend \DIFaddbegin \DIFadd{is }\DIFaddend defined in Figure~\ref{syntax}.


\DIFdelbegin \subsubsection{\DIFdel{Checking the Preconditions}}
%DIFAUXCMD
\addtocounter{subsubsection}{-1}%DIFAUXCMD
\DIFdelend \DIFaddbegin \subsection{Checking the Preconditions}
\DIFaddend 

\begin{figure}[t]
\textbf{let}\ $s_1, \ldots, s_n$ be\ the\ do\ block\ to\ refactor\\
\DIFdelbeginFL \DIFdelFL{and}\DIFdelendFL \ \DIFaddbeginFL \DIFaddFL{~\ ~\ ~\ ~\       }\DIFaddendFL $s_n$\ is\ the\ return\ statement\DIFaddbeginFL \DIFaddFL{.}\DIFaddendFL \\
\DIFdelbeginFL \DIFdelFL{this}\DIFdelendFL \DIFaddbeginFL \DIFaddFL{This}\DIFaddendFL \ block\ must\ pass\ the\ following\ preconditions\\
\DIFdelbeginFL %DIFDELCMD < \hrule
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \\
\DIFaddendFL \begin{math}
\DIFdelbeginFL %DIFDELCMD < \begin{aligned}  	
%DIFDELCMD < &\textbf{Bound Variables are only used in return statement}\\
%DIFDELCMD < &\forall x.1\leq x < n.&\\
%DIFDELCMD < &\qquad bv\ s_x \cap fv \{s_y |\ y \in \{1, \ldots, n-1\}, y \neq x\} = \emptyset \\
%DIFDELCMD < \\
%DIFDELCMD < &\textbf{The order variable are bound}\\
%DIFDELCMD < &\textbf{and appear in the return statement are the same}\\
%DIFDELCMD < &\textbf{let}\ rv_1, \ldots, rv_m\ be\ the\ variables\ bound\ in\ s\ as\ they\ appear&\\
%DIFDELCMD < &\qquad from\ left\ to\ right\ in\ the\ return\ statement\ \textbf{in}&\\
%DIFDELCMD < &\forall i. 1 \leq i \leq m .&\\
%DIFDELCMD < &\qquad \{bNum\ rv_i < bNum\ rv_j\ |\ j \in \{i+1, \ldots, m\} \}&\\
%DIFDELCMD < \\
%DIFDELCMD < &--bNum\ returns\ the\ index\ of\ the\ statement\\ 
%DIFDELCMD < &--in\ which\ a\ variable\ is\ bound\ \\
%DIFDELCMD < &bNum\ v\ \{s_1, \ldots, s_n \} = y &\\
%DIFDELCMD < &\qquad \textbf{where}\ v \in bv\ s_y&\\ 
%DIFDELCMD < &bv\ \{s_1, \ldots, s_n \}\ =\ the\ bound\ variables\ of\ \{s_1, \ldots, s_n\}\\
%DIFDELCMD < &fv\ \{s_1, \ldots, s_n \}\ =\ the\ free\ variables\ of\ \{s_1, \ldots, s_n\}\\&
%DIFDELCMD < 		\end{aligned}
%DIFDELCMD < 	%%%
\DIFdelendFL \DIFaddbeginFL \begin{aligned}  	
&\textbf{Bound Variables are only used in return statement}\\
&\forall x.1\leq x < n.&\\
&\qquad bv\ s_x \cap fv \{s_y |\ y \in \{1, \ldots, n-1\}, y \neq x\} = \emptyset \\
\\
&\textbf{The order variables are bound in}\\
&\textbf{and appear in the return statement are the same}\\
&\textbf{let}\ rv_1, \ldots, rv_m\ be\ the\ variables\ bound\ in\ s\ as\ they\ appear&\\
&\qquad from\ left\ to\ right\ in\ the\ return\ statement\ \textbf{in}&\\
&\forall i. 1 \leq i \leq m .&\\
&\qquad \{bNum\ rv_i < bNum\ rv_j\ |\ j \in \{i+1, \ldots, m\} \}&\\
\\
&\textit{bNum returns the index of the statement}\\ 
&\textit{in which a variable is bound}\\
&bNum\ v\ \{s_1, \ldots, s_n \} = y &\\
&\qquad \textbf{where}\ v \in bv\ s_y&\\ 
&bv\ \{s_1, \ldots, s_n \}\ =\ the\ bound\ variables\ of\ \{s_1, \ldots, s_n\}\\
&fv\ \{s_1, \ldots, s_n \}\ =\ the\ free\ variables\ of\ \{s_1, \ldots, s_n\}\\&
		\end{aligned}
	\DIFaddendFL \end{math}
	\caption{Preconditions}
	\label{preconditions}
\end{figure}

\DIFdelbegin \DIFdel{Now that the relevant parts of the function have been extracted we can check if the preconditions pass}\DIFdelend \DIFaddbegin \DIFadd{The two arguments that the refactoring takes are enough to identify and extract the target function so that precondition checking can begin}\DIFaddend . These preconditions \DIFdelbegin \DIFdel{have been }\DIFdelend \DIFaddbegin \DIFadd{are }\DIFaddend defined in Figure~\ref{preconditions}. 

The first precondition, checking whether any of the variables bound within the do block is used in another statement, is fairly straightforward \DIFaddbegin \DIFadd{to verify}\DIFaddend : the refactoring \DIFdelbegin \DIFdel{just }\DIFdelend recurses through the list of statements and checks the right hand side of each statement to see whether a bound variable is used in it. \DIFaddbegin \DIFadd{If one of the bound variables is found in a right hand side expression then the entire do expression is not context-free and so cannot be rewritten using the applicative interface alone.
}\DIFaddend 

The second precondition is checked using a similar method. First we construct a list of the variables in the order that they are used in the return statement\DIFdelbegin \DIFdel{from left to right. Then going through the do statements we check that }\DIFdelend \DIFaddbegin \DIFadd{. Then we search through the }\texttt{\DIFadd{do}} \DIFadd{statements in order and }\DIFaddend whenever a binding statement is found \DIFdelbegin \DIFdel{that it }\DIFdelend \DIFaddbegin \DIFadd{we check that the variable being bound }\DIFaddend is the leftmost variable in the return statement that \DIFdelbegin \DIFdel{we haven't already found.
%DIF < \clearpage
}\DIFdelend \DIFaddbegin \DIFadd{has not already been processed.
}\DIFaddend 


\DIFdelbegin \subsubsection{\DIFdel{Constructing the Effectful Expression}}
%DIFAUXCMD
\addtocounter{subsubsection}{-1}%DIFAUXCMD
\DIFdelend \DIFaddbegin \subsection{Constructing the Effectful Expression}
\DIFaddend \begin{figure}[t]
\begin{math}
\begin{aligned}
&buildEffects :: Stmts \rightarrow Expr&\\
&buildEffects\ stmts = (e_1 <*> e_2 <*> \ldots e_m)\\
&\quad \textbf{where}\ \{e_1, \ldots, e_m \} = map\ buildSingleExpr\ clusters\\
&\qquad \quad \quad \quad\ \ clusters = clusterStmts\ stmts\\
\\
&--BuildSingleExpr\ takes\ in\ Stmts\ that\ contains\ a\ single\\
&--bind\ Stmt\ and\ makes\ an\ applicative\ expression\\
&buildSingleExpr :: Stmts -> Expr\\
&buildSingleExpr \{bf_1, \ldots, bf_n, (p \leftarrow e), af_1, \ldots, af_m\} = \\
&\quad(bf_1 *> \ldots *> bf_n *> e <* af_1 <* \ldots <* af_m)\\
\\
&--clusterStmts\ segments\ statements\ into\ multiple\ sets\ of\\
&--statements\ each\ set\ contains\ one\ bind\ statement\\
&--and\ its\ surrounding\ statements\\
&clusterStmts :: Stmts \rightarrow \{Stmts\}\\
&clusterStmts\ \{s_1, \ldots, s_n \} = map\ (\lambda is \rightarrow map\ (\lambda i \rightarrow s_i))\ cs\\
&\quad \textbf{where}\\
&\qquad indices = \{i | s_i \in \{s_1, \ldots, s_n\}, s_i = (p \leftarrow e)\}\\
&\qquad cs = cluster\ indices\ n\ 0\\
&\qquad cluster\ \{i\}\ l\ c = \{\{c, \ldots, (l-1)\}\}\\
&\qquad cluster\ \{i_1,i_2,\ldots \}\ l\ c = \textbf{let}\ b = i_1 + ((i_2-i_1)\ `div`\ 2)\ \textbf{in}\\
&\qquad \quad \{c, \ldots, b\}:(cluster\ \{i_2, \ldots \}\ l\ (b+1))
\end{aligned}
\end{math}
\caption{Building the effects}
\label{effects}
\end{figure}


Once the preconditions have passed we can start constructing the ``applicative chain'' that will make up the right hand side of the refactored function. This expression is composed of two different types of subexpressions, binding expressions that contribute to the returned value of the function and effectful expressions that are just used for their effects on the applicative context. The code that constructs this \DIFdelbegin \DIFdel{effectul }\DIFdelend \DIFaddbegin \DIFadd{effectful }\DIFaddend part of the applicative chain is \DIFdelbegin \DIFdel{contained }\DIFdelend \DIFaddbegin \DIFadd{given }\DIFaddend in Figure~\ref{effects}.

\begin{figure}[t]
\begin{lstlisting}%[frame=tblr]

objEntry :: CharParser () (String, String)
objEntry = do
  spaces
  str <- parseStr
  spaces
  char ':'
  spaces
  i <- many1 digit
  spaces
  return (str, i)
\end{lstlisting}
\caption{The \texttt{objEntry} parser}
\label{objEntry}
\end{figure}

In the \texttt{objEntry} parser in Figure~\ref{objEntry} only lines four and eight contribute to the final result. The other expressions just consume input and the returned values of those combinators are discarded. The first step in constructing the applicative chain involves building clusters of statements where each cluster contains a single binding statement (those matching $(p \leftarrow e) :: Stmt$) and its nearest effectful statements (those matching $e :: Stmt$). In the \DIFdelbegin \DIFdel{above }\DIFdelend example we would create two clusters, the first cluster consists of lines three through six and the second of lines seven through nine.\footnote{When there are an odd number of effectful expressions between binding expressions the extra expression is added to the leftmost cluster.}

Within each cluster we then decide which applicative operator goes between each expression. Since there is only one binding statement per cluster we know that the only options are \texttt{(*>)} or \texttt{(<*)}. We simply go through the list and if \DIFdelbegin \DIFdel{an }\DIFdelend \DIFaddbegin \DIFadd{a }\DIFaddend statement occurs before the binding statement we know that it will be followed by a \texttt{(*>)} operator and once the binding statement \DIFdelbegin \DIFdel{comes up }\DIFdelend \DIFaddbegin \DIFadd{occurs }\DIFaddend all the remaining expressions are composed with the \texttt{(<*)} operator. The first cluster from the \texttt{objEntry} example is: \texttt{(spaces *> parseStr <* spaces <* char ':')}

Once every cluster has been refactored separately all of the clusters are wrapped in parentheses and then composed with the full apply operator (\texttt{<*>}), as seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{effectObjEntry},

\begin{figure}[t]
\begin{lstlisting}
    (spaces *> parseStr <* spaces <* char ':') 
<*> (spaces *> many1 digit <* spaces)
\end{lstlisting}
\caption{The effectful expressions for the refactored definition of \texttt{objEntry}}
\label{effectObjEntry}
\end{figure}


\DIFdelbegin \subsubsection{\DIFdel{Building the Pure Expression}}
%DIFAUXCMD
\addtocounter{subsubsection}{-1}%DIFAUXCMD
\DIFdelend \DIFaddbegin \subsection{Building the Pure Expression}
\DIFaddend 

\vspace{5pt}
\DIFdelbegin %DIFDELCMD < \begin{figure}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{figure}[h!]
\DIFaddendFL \begin{math}
\begin{aligned}
&--This\ takes\ the\ expression\ being\ returned\\
&--originally\ and\ the\ statements\ and\ returns\ the\ expression\\
&--to\ go\ on\ the\ front\ of\ the\ applicative\ chain\\
&buildPureExpr :: Expr \rightarrow Stmts \rightarrow Maybe\ Expr\\
&buildPureExpr\ (e_1,\ldots, e_n)\ \_\ = Just\ (,^n)\\
&buildPureExpr\ e\ stmts\ = removeBoundVars\ e\ (bv\ stmts)\\
\\
&removeBoundVars :: Expr \rightarrow {Variable} \rightarrow Maybe\ Expr\\
&removeBoundVars\ v\ vars =\\
&\quad |\ v \in vars =\ Nothing\\
&\quad |\ otherwise =\ Just\ v\\
&removeBoundVars\ (e\ v)\ vars =\\
&\quad |\ v \in vars =\ removeBoundVars\ e\ vars >>=\\ 
&\qquad \qquad(\lambda\ e' \rightarrow Just\ e')\\
&\quad |\ otherwise =\ removeBoundVars\ e\ vars >>=\\ 
&\qquad \qquad(\lambda\ e' \rightarrow Just\ (e'\ v))\\
&removeBoundVars\ (e_1\ e_2)\ vars =\ \textbf{do}\\
&\qquad \qquad e_1' \leftarrow removeBoundVars\ e_1\ vars\\
&\qquad \qquad e_2' \leftarrow removeBoundVars\ e_2\ vars\\
&\qquad \qquad return\ (e_1'\ e_2')\\
&removeBoundVars\ (\lambda p \rightarrow\ e)\ vars =\\
&\quad |\ (fv\ e) \cap vars \neq \emptyset\ =\ Nothing\\
&\quad |\ otherwise =\ Just\ (\lambda p \rightarrow\ e)\\
&removeBoundVars\ (\textbf{do}\ l\ e)\ vars =\\
&\quad |\ (fv\ l) \cap vars \neq \emptyset\ =\ Nothing\\
&\quad |\ (fv\ e) \cap vars \neq \emptyset\ =\ Nothing\\
&\quad |\ otherwise =\ Just\ (\textbf{do}\ l\ e)\\
&removeBoundVars\ \_\ \_ =\ Nothing\\
\end{aligned}
\end{math}
\caption{Building the pure expression}
\label{pure}
\end{figure}
Now that the effectful part of the chain has been constructed we must build the pure expression that will be attached to the front of the applicative chain. The definition in Figure~\ref{pure} builds this pure expression. 

In the \DIFdelbegin \DIFdel{following }\DIFdelend \DIFaddbegin \DIFadd{running }\DIFaddend example the returned values are wrapped in a tuple, which is a special case, and the pair constructor (\texttt{(,)}) will be added to the front of the chain with the infix \texttt{fmap} operator (\texttt{<\$>}).

\begin{figure}[t]
\begin{lstlisting}
zipperM :: [a] -> IO [(a, b)]
zipperM lst = do
  lst2 <- getOtherList
  return $ zip lst lst2
\end{lstlisting}
\caption{The \texttt{zipperM} function}
\label{zipperM}
\end{figure}

If the pure expression is \DIFdelbegin \DIFdel{just }\DIFdelend a function call such as in \texttt{zipperM} from \DIFdelbegin \DIFdel{figure~\ref{zipperM}}\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{zipperM}, }\DIFaddend then the pure part of the expression is built by extracting the expression being returned and removing all of the bound variables from this expression. In the \texttt{zipperM} example this is\DIFdelbegin \DIFdel{just}\DIFdelend ~\texttt{zip lst} after removing the \texttt{lst2} which was bound in the \texttt{do} block. Finally this expression is \DIFdelbegin \DIFdel{what is }\DIFdelend added to the front of the chain.

Once the pure expression has been added to the front of the applicative chain the new function definition can replace the \DIFdelbegin \DIFdel{source function's }\DIFdelend right hand side \DIFaddbegin \DIFadd{of the source function}\DIFaddend . The refactored \DIFdelbegin \DIFdel{functions from }\DIFdelend \DIFaddbegin \DIFadd{versions of }\DIFaddend both examples used in this section are shown in Figure \ref{final-fns}.

\begin{figure}[t]
\begin{lstlisting}
objEntry :: CharParser () (String, String)
objEntry = (,) <$> 
		  (spaces *> parseStr <* spaces <* char ':') 
	<*> (spaces *> many1 digit <* spaces)

zipperM :: [a] -> IO [(a, b)]
zipperM lst = zip lst <$> getOtherList
\end{lstlisting}
\caption{The refactored functions from the two examples in this section}
\label{final-fns}
\end{figure}

\section{Case Studies}
\label{sec:appApps}

There are two things that make a particular application a good candidate for this refactoring. First, and most obviously, the application must be able to be defined using the applicative interface. Secondly, a good candidate will have a large corpus of code that is already written in the monadic style. If a particular library is already defined using applicative functors rather than monads then there is little work for the refactoring to do.
This section will discuss some libraries where this refactoring could be useful.

\subsection{Parsing}
Many of the examples in this paper are parsers using parser combinator libraries. This has been a classic domain of applicative functors. The first examples of applicative-like developments come from papers on parsing~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{efficientParsing,errParsers}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{efficientParsing,errParsers}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend .

Indeed, when looking through projects on Hackage that are labeled as parsers, examples of where our refactoring could be applied are numerous. Html-tokenizer is a project that tokenizes HTML code to provide a base for HTML parsers~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{htmlTok}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{htmlTok}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . Within it there were several functions that are can be generalised. These functions look very much like what we have already seen before. Take \DIFdelbegin \DIFdel{for example the following function that matches }\DIFdelend \DIFaddbegin \DIFadd{the example of matching }\DIFaddend an opening tag\DIFdelbegin \DIFdel{in figure}\DIFdelend \DIFaddbegin \DIFadd{: the original definition is in Figure}\DIFaddend ~\ref{openingTag} and the same function \DIFdelbegin \DIFdel{refactoring using this refactoring in figure}\DIFdelend \DIFaddbegin \DIFadd{after it has been refactored is shown in Figure}\DIFaddend ~\ref{openingTag_ref}.

\begin{figure}[t]
\begin{lstlisting}
openingTag :: Parser OpeningTag
openingTag =
  do
    char '<'
    skipSpace
    theIdentifier <- identifier
    attributes <- many $ space *> skipSpace *> attribute
    skipSpace
    closed <- convert <$> optional (char '/')
    char '>'
    return (theIdentifier, attributes, closed)
\end{lstlisting}
\caption{The \texttt{openingTag} function.}
\label{openingTag}
\end{figure}

\begin{figure}[t]
\begin{lstlisting}
openingTag :: Parser OpeningTag
openingTag = (,,) 
   <$> (char '<' *> skipSpace *> identifier) 
   <*> ((many $ space *> skipSpace *> attribute) <* skipSpace) 
   <*> ((convert <$> optional (char '/')) <* char '>')
\end{lstlisting}
\caption{\texttt{openingTag} refactored}
\label{openingTag_ref}
\end{figure}

Another parser that has several functions that can be refactored is \DIFdelbegin \DIFdel{in }\DIFdelend the implementation of pi-forall, a simple dependently typed language created by Stephanie Weirich~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{piForall}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{piForall}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . The function that parses a module definition and its refactored equivalent are shown in \DIFdelbegin \DIFdel{figures}\DIFdelend \DIFaddbegin \DIFadd{Figures}\DIFaddend ~\ref{piParse} and ~\ref{piParse_ref}.

\begin{figure}[t]
\begin{lstlisting}
moduleDef :: LParser Module
moduleDef = do
  reserved "module"
  modName <- identifier
  reserved "where"
  imports <- layout importDef (return ())
  decls <- layout decl (return ())
  cnames <- get
  return $ Module modName imports decls     cnames
\end{lstlisting}
\caption{pi-forall's module definition parser.}
\label{piParse}
\end{figure}

\begin{figure}[t]
\begin{lstlisting}
moduleDef :: LParser Module
moduleDef = Module
   <$> (reserved "module" *> identifier <* reserved "where") 
   <*> layout importDef (return ()) 
   <*> layout decl (return ()) 
   <*> get
\end{lstlisting}
\caption{The refactored module definition parser}
\label{piParse_ref}
\end{figure}


\subsection{Data Store Access}

One of the \DIFdelbegin \DIFdel{big }\DIFdelend \DIFaddbegin \DIFadd{main }\DIFaddend motivations behind adopting the applicative interface rather than using the monadic interface \DIFdelbegin \DIFdel{all the time }\DIFdelend is that the applicative operators are not inherently sequential\DIFdelbegin \DIFdel{. Essentially }\DIFdelend \DIFaddbegin \DIFadd{: }\DIFaddend the left and right hand sides of a call to apply could be \DIFdelbegin \DIFdel{executed }\DIFdelend \DIFaddbegin \DIFadd{evaluated }\DIFaddend in parallel. 

Facebook's Haxl project simplifies accessing remote data stores such as databases and web-services~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{haxl}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{haxl}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . In the background when requests to the data store are composed with the \texttt{(<*>)} operator, concurrency is implicit~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{haxl}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{haxl}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . With the generalise applicative refactoring programmers will be able to write their data queries in the do notation that they are familiar with and then refactor their code to gain the concurrency benefits. Haxl is the motivating behind the \textit{applicativeDo} project~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{applicativeDo}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{applicativeDo}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend , as discussed \DIFdelbegin \DIFdel{further }\DIFdelend \DIFaddbegin \DIFadd{earlier }\DIFaddend in Chapter~\ref{chp:related}.

\subsection{Yesod}
Another possible application of this refactoring is to parts of the code used to define Yesod webservers~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{yesod}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{yesod}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . The preferred way to handle the creation and processing of web forms uses the applicative interface~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\cite{yesodBook}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\citep{yesodBook}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . Yesod doesn't force forms to be handled applicatively because a monad instance is provided as well but it is the \textit{idiomatic} way to handle web forms. This refactoring would allow people to write in the more familiar do notation and then refactor their code to fit the community standard.

\section{Summary}

This refactoring is another example of a generalisation refactoring that rewrites code to work with a \DIFdelbegin \DIFdel{typeclass}\DIFdelend \DIFaddbegin \DIFadd{type class}\DIFaddend . The previous examples of this type of refactoring, the Maybe to MonadPlus or the Maybe to Monad refactorings generalised a concrete type to one of its \DIFdelbegin \DIFdel{typeclasses}\DIFdelend \DIFaddbegin \DIFadd{type classes}\DIFaddend . This refactoring can accept code from any instance of a \DIFdelbegin \DIFdel{typeclass }\DIFdelend \DIFaddbegin \DIFadd{type class }\DIFaddend and generalise it to use the operations from a superclass instead (assuming that code passes the preconditions of course). This refactoring opens the door for possible performance increases if the \texttt{Applicative} instance definition allows for parallel execution of the arguments to apply.

Additionally there are cases where the applicative versions of functions are cleaner and easier to read. Parsers in particular can become very descriptive in the applicative style with function calls being read from left to right in the same order as the syntax elements they are parsing appear in the input. 

In both of these cases the type of data the program is processing will drive the desire to refactor. The first case the parts of a program's result are computed under or extracted from a monadic context but these parts are independently computed and don't depend on the result of one part to compute other parts. This opens the opportunity for parallelism by switching from the strictly sequential monadic interface to the applicative one. With some changes to how apply is evaluated, each of its arguments can be computed in parallel if the applicative instance is defined to take advantage of this fact.\footnote{See~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\citep{haxl} }\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\cite{haxl} }\hspace{0pt}%DIFAUXCMD
}\DIFaddend for an example of this.} The other case, the actual format of the data can inspire the refactoring. Haskell's \texttt{do} syntax has strict formatting requirements. Every statement of a \texttt{do} block must begin on the same column in the source file. The applicative operators are not as strict and would allow for parsing code to be formatted in unique ways to increase readability. 

\DIFaddbegin \DIFadd{Improving readability is a valid reason to refactor code, but can a refactoring tool be preferable over a compiler optimization? The }\texttt{\DIFadd{ApplicativeDo}}  \DIFadd{desugarer from~\mbox{%DIFAUXCMD
\cite{applicativeDo} }\hspace{0pt}%DIFAUXCMD
that was previously described in Chapter~\ref{chp:related} automatically introduces applicative operators into a }\texttt{\DIFadd{do}} \DIFadd{statement, why would a refactoring be preferable to just letting the compiler optimize your program for you? Programmers may prefer for the semantics of their programs to be explicitly written rather than trusting the compiler to make the correct decisions for them.
}

   
\DIFaddend \chapter{Introducing Monads}
\label{chp:monadification}

Up to this point the data refactorings that have been discussed are \DIFdelbegin \DIFdel{changing }\DIFdelend \DIFaddbegin \DIFadd{making changes to }\DIFaddend abstractions that already \DIFdelbegin \DIFdel{existed }\DIFdelend \DIFaddbegin \DIFadd{exist }\DIFaddend in the source code. This chapter \DIFdelbegin \DIFdel{will explore refactorings that introduce }\DIFdelend \DIFaddbegin \DIFadd{explores refactorings that }\textit{\DIFadd{introduce}} \DIFaddend effectful abstractions into pure code. In particular this chapter \DIFdelbegin \DIFdel{will focus }\DIFdelend \DIFaddbegin \DIFadd{focuses on }\DIFaddend monadification, the process of adding monads to pure code. 

Monadification of a Haskell program is a common transformation that must be performed \DIFdelbegin \DIFdel{if a problem becomes more complicated than is easily expressed without them}\DIFdelend \DIFaddbegin \DIFadd{when a program is explicitly effectful}\DIFaddend , such as \DIFdelbegin \DIFdel{exception handling or state}\footnote{\DIFdel{I've had a colleague remark to me: "I can't count the number of times I've been in the middle of something and realized I needed some state."}}%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdelend \DIFaddbegin \DIFadd{using }\texttt{\DIFadd{Maybe}} \DIFadd{to handle failure or passing the state of a computation as a parameter}\DIFaddend . The Haskell Community's guidelines also advise that monadic and pure code should be separated if possible\DIFdelbegin \DIFdel{so }\DIFdelend \DIFaddbegin \DIFadd{, so that }\DIFaddend Haskell programs should be written as pure code until it is \DIFdelbegin \DIFdel{certain }\DIFdelend \DIFaddbegin \DIFadd{clear }\DIFaddend that monads are required~\citep{guidelines}. This makes the process of monadification a common transformation that Haskell \DIFdelbegin \DIFdel{programs undergo}\DIFdelend \DIFaddbegin \DIFadd{programmers undertake}\DIFaddend , making it a compelling refactoring target.

This chapter \DIFdelbegin \DIFdel{will describe }\DIFdelend \DIFaddbegin \DIFadd{describes }\DIFaddend a monadification refactoring. First \DIFdelbegin \DIFdel{will be a very brief definition }\DIFdelend \DIFaddbegin \DIFadd{there is a brief review }\DIFaddend of the \texttt{Monad} type class from \DIFdelbegin \DIFdel{GHC }\DIFdelend \DIFaddbegin \DIFadd{Haskel }\DIFaddend (section~\ref{monTC}). There are multiple styles of monadification\DIFaddbegin \DIFadd{, }\DIFaddend as described in~\citep{monadSurvey}, and \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{monadStyles} will describe these styles and discuss their relative merits and usefulness to \DIFdelbegin \DIFdel{the Haskell community}\DIFdelend \DIFaddbegin \DIFadd{Haskell programmers}\DIFaddend . Finally the implementation of the monadification refactoring within \DIFdelbegin \DIFdel{the HaRe project }\DIFdelend \DIFaddbegin \DIFadd{HaRe }\DIFaddend is described in section~\ref{monadImp}.

\DIFdelbegin %DIFDELCMD < \section{The Monad Typeclass}
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \section{The Monad type class}
\DIFaddend \label{monTC}

The monad \DIFdelbegin \DIFdel{typeclass as it exists in GHC }\DIFdelend \DIFaddbegin \DIFadd{type class }\DIFaddend is defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{monadDef} along with the \texttt{Functor} and \texttt{Applicative} \DIFdelbegin \DIFdel{typeclasses }\DIFdelend \DIFaddbegin \DIFadd{type classes }\DIFaddend for reference. The two canonical monad functions are \texttt{return} and \DIFdelbegin \DIFdel{bind (}\DIFdelend \DIFaddbegin \DIFadd{``}\DIFaddend \texttt{(>>=)}\DIFdelbegin \DIFdel{)}\DIFdelend \DIFaddbegin \DIFadd{'' which is pronounced bind}\DIFaddend . \texttt{return} is the function that brings some pure value into the monadic context; the default definition is the function \texttt{pure} from the \texttt{Applicative} instance of the type. Bind allows for computation to occur within the monadic context.

\begin{figure}[t]
\begin{lstlisting}
class Functor f where
	fmap :: (a -> b) -> f a -> f b

class Functor f => Applicative f where
	pure :: a -> f a

	(<*>) :: f (a -> b) -> f a -> f b

class Applicative m => Monad m where
	(>>=) :: m a -> (a -> m b) -> m b

	(>>) :: m a -> m b -> m b
	m >> k = m >>= \_ -> k

	return :: a -> m a
	return = pure

	fail :: String -> m a
	fail s = errorWithoutStackTrace s
\end{lstlisting}
\caption{The monad \DIFdelbeginFL \DIFdelFL{typeclass}\DIFdelendFL \DIFaddbeginFL \DIFaddFL{type class}\DIFaddendFL }
\label{monadDef}
\end{figure}

The other two functions are part of the Haskell language standard for the \texttt{Monad} \DIFdelbegin \DIFdel{typeclass}\DIFdelend \DIFaddbegin \DIFadd{type class}\DIFaddend . The \texttt{(>>)} operator allows for sequential composition, when the second operation is not dependent on the outcome of the first. \texttt{fail} is called when a pattern match failure occurs inside of a \texttt{do} statement. In \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{failCall} \texttt{fail} would be called if the function \texttt{f} returns \DIFdelbegin \DIFdel{a }\DIFdelend \texttt{Nothing} rather than \DIFdelbegin \DIFdel{a }\DIFdelend \DIFaddbegin \DIFadd{(}\DIFaddend \texttt{Just \DIFaddbegin \DIFadd{n}\DIFaddend }\DIFaddbegin \DIFadd{) for some }\texttt{\DIFadd{n}}\DIFaddend .

\begin{figure}[t]
\begin{lstlisting}
do{
(Just n) <- f;
return (n+1)
}
\end{lstlisting}
\caption{A \texttt{do} statement with a possible call to \texttt{fail} on line 2.}
\label{failCall}
\end{figure}

\DIFaddbegin \DIFadd{Haskell has identified monads as the standard framework for building I/O and stateful computations. This section has described the monad type class. Despite the number of tutorials written about how to understand it, the monad type class only consists of the }\texttt{\DIFadd{return}} \DIFadd{and }\texttt{\DIFadd{(>>=)}} \DIFadd{operations. The following section will discuss the different ways that monadic programs can be constructed. 
}

\DIFaddend \section{Styles of Monadification}
\label{monadStyles}

Up to this point this thesis has \DIFdelbegin \DIFdel{been fairly vague }\DIFdelend \DIFaddbegin \DIFadd{not gone into detail }\DIFaddend about what monadification is\DIFaddbegin \DIFadd{, }\DIFaddend beyond the process of introducing monads into a previously pure type. Monadifying a single type changes it from type \texttt{a} to type \texttt{Monad m => m a}. Monadifying a function \DIFdelbegin \DIFdel{changes }\DIFdelend \DIFaddbegin \DIFadd{will always change }\DIFaddend the result type of \DIFdelbegin \DIFdel{the }\DIFdelend \DIFaddbegin \DIFadd{a target }\DIFaddend function because there is no way to extract values out of any \DIFdelbegin \DIFdel{given }\DIFdelend monadic context. Monadification can also affect any combination of parameters. Which parameters are affected depends on the \textit{style} of monadification.

The different styles of monadification can change how a function is evaluated. In~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\citep{monadSurvey} }\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\cite{monadSurvey} }\hspace{0pt}%DIFAUXCMD
}\DIFaddend five different styles of monadification are described as part of a survey \DIFdelbegin \DIFdel{on }\DIFdelend \DIFaddbegin \DIFadd{of }\DIFaddend the Haskell community's use of monads. The monadification styles are:

\begin{itemize}
\item Full Call-by-Value
\item Full Call-by-Name
\item Restricted Call-by-Name
\item Data-Directed
\item Restricted Call-by Value \DIFaddbegin \DIFadd{(This is the style implemented in HaRe)
}\DIFaddend \end{itemize}

The rest of this \DIFdelbegin \DIFdel{chapter will describe }\DIFdelend \DIFaddbegin \DIFadd{section will describe each of }\DIFaddend these monadification styles in more detail. There are two examples used\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{: }\DIFaddend one is an implementation of \DIFdelbegin \DIFdel{mergesort }\DIFdelend \DIFaddbegin \DIFadd{merge sort }\DIFaddend that was developed for this thesis\DIFaddbegin \DIFadd{, and }\DIFaddend the other is an expression evaluator which is taken from~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\citep{monadSurvey}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\cite{monadSurvey}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . 

\subsection{Full Call-by-Value Monadification}

A call-by-value monadification \DIFaddbegin \DIFadd{of a function }\DIFaddend will ensure that all arguments are evaluated before being passed to a function and \DIFdelbegin \DIFdel{a }\DIFdelend \DIFaddbegin \DIFadd{the }\DIFaddend function will also be evaluated before having arguments passed to it. Consider the definition of mergesort in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{mergesrt}.

\begin{figure}[t]
\begin{lstlisting}
mergesort :: (Ord a) => [a] -> [a]
mergesort lst = let (l,r) = splitAt ((length lst) `div` 2) lst in
	merge (mergesort l) (mergesort r)

merge :: (Ord a) => [a] -> [a] -> [a]
merge [] r = r
merge l [] = l
merge (x:xs) (y:ys) = 
	case (x < y) of
	True  -> x:(merge xs (y:ys))
	False -> y:(merge (x:xs) ys)						
\end{lstlisting}
\caption{Mergesort}
\label{mergesrt}
\end{figure}

The full call-by-value monadification\DIFaddbegin \DIFadd{, }\DIFaddend when applied to \texttt{mergesort}\DIFaddbegin \DIFadd{, }\DIFaddend is defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{fcbv}

\begin{figure}[t]
\begin{lstlisting}

mergesort :: (Ord a, Monad m) => m ([a] -> m [a])
mergesort = return (\ lst -> case lst of
                 [] -> return []
                 [x] -> return [x]
                 _ -> do
                   let (l,r) = splitAt ((length lst) `div` 2) lst
                   mm1 <- mergesort
                   v1 <- mm1 l
                   mm2 <- mergesort
                   v2 <- mm2 r
                   return (merge v1 v2))					
\end{lstlisting}
\caption{Full call-by-value monadification of mergesort}
\label{fcbv}
\end{figure}

\subsection{Full Call-by-Name Monadification}

The full call-by-name monadification \DIFaddbegin \DIFadd{of a function }\DIFaddend ensures arguments are unevaluated when they are passed to the function. The full call-by-name version of \texttt{mergesort} is defined in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{fcbn}.


\begin{figure}[t]
\begin{lstlisting}
mergesort :: (Ord a, Monad m) => m (m [a] -> m [a])
mergesort = return (\ mlst -> do
		lst <- mlst			
		res <- case lst of
                   [] -> return []
                   [x] -> return [x]
                   _ -> do
                     let (l,r) = splitAt ((length lst) `div` 2) lst
                     mm1 <- mergesort
                     v1 <- mm1 (return l)
                     mm2 <- mergesort
                     v2 <- mm2 (return r)
                     return (merge v1 v2)
		return res)					
\end{lstlisting}
\caption{Full call-by-name monadification of mergesort}
\label{fcbn}
\end{figure}

\DIFaddbegin \begin{figure}[t]
	\begin{lstlisting}
		> let lst = [3,2,1]
		> fcbvMergesort >>= (\f -> f lst)
		[1,2,3]
		> fcbnMergesort >>= (\f -> f (return lst))
		[1,2,3]
	\end{lstlisting}
	\caption{\DIFaddFL{Using the monadified versions of mergsort. }\texttt{\DIFaddFL{fcbvMergsort}} \DIFaddFL{is defined in Figure~\ref{fcbv} and }\texttt{\DIFaddFL{fcbnMergesort}} \DIFaddFL{is defined in Figure~\ref{fcbn}.}}
	\label{fullMonadExs}
\end{figure}

\DIFaddend The two "full" monadification styles monadify not only the arguments to a function but also the function itself. \DIFdelbegin \DIFdel{The remaining styles are restrictions of these full modifications where only some of the arguments are modified not the functions}\DIFdelend \DIFaddbegin \DIFadd{That is the refactored functions return a monadic value that computes another function with, depending on the style, parameters that may be monadic. 
}

\DIFadd{The full monadification styles are a bit ``too monadified'' to be broadly useful. Consider the full call-by-value monadification of }\texttt{\DIFadd{mergesort}} \DIFadd{in Figure~\ref{fcbv}, to sort a list, first the function needs to be extracted via the monadic context then it can be applied to the list. The full call-by-name version of the function takes an additional step to use, this time the list value needs to be lifted into the monadic context before it can be passed to the extracted function. Examples of how to use these full monadifications are shown in Figure~\ref{fullMonadExs}.
}

\DIFadd{The remaining monadification styles are ``restricted'' versions of the two full monadification styles. All of these styles do not wrap the function inside of the monadic context, they differ in which of the target function's parameters are monadified}\DIFaddend . 

\subsection{Restricted Call-by-Name Monadification}
This style of monadification is where every argument is contained within the monad. Using the mergesort example again, \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{resCBN} shows the restricted call-by-name monadification.

\begin{figure}[t]
\begin{lstlisting}
mergesort :: (Ord a, Monad m) => m [a] -> m [a]
mergesort ml = do
	lst <- ml
	res <- case lst of
		[] -> return []
		[x] -> return [x]
		_ -> do 
			let (l,r) = splitAt ((length lst) `div` 2) lst
			v1 <- mergesort (return l)
			v2 <- mergesort (return r)
			return (merge v1 v2)
	return res	
\end{lstlisting}
\caption{The restricted call-by-name monadification of mergesort.}
\label{resCBN}
\end{figure}

\DIFdelbegin \DIFdel{All of the arguments }\DIFdelend \DIFaddbegin \DIFadd{The argument passed }\DIFaddend to \texttt{mergesort} in \DIFdelbegin \DIFdel{figure~\ref{resCBN} are }\DIFdelend \DIFaddbegin \DIFadd{Figure~\ref{resCBN} is }\DIFaddend within the monad so \DIFdelbegin \DIFdel{the arguments will be evaluated; hence the }\DIFdelend \DIFaddbegin \DIFadd{it will not be evaluated until it is extracted from the monadic context on line 3 of Figure~\ref{resCBN}. This gives the refactored function its }\DIFaddend call-by-name \DIFdelbegin \DIFdel{"flavour" as~\mbox{%DIFAUXCMD
\citep{monadSurvey} }\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{``flavour'' as~\mbox{%DIFAUXCMD
\cite{monadSurvey} }\hspace{0pt}%DIFAUXCMD
}\DIFaddend describes it. 

\subsection{Data-Directed Monadification}
This style refactors a certain type to be replaced by a monadic computation over that type. This example becomes clearer when working with functions that have more than a single \DIFdelbegin \DIFdel{type }\DIFdelend \DIFaddbegin \DIFadd{argument }\DIFaddend like the \texttt{mergesort} example. This style will be explained using the example from~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\citep{monadSurvey}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\cite{lammelReuse}}\hspace{0pt}%DIFAUXCMD
}\DIFaddend . The example \DIFdelbegin \DIFdel{given in~\mbox{%DIFAUXCMD
\citep{monadSurvey} }\hspace{0pt}%DIFAUXCMD
}\DIFdelend is a simple arithmetic expression interpreter as seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{fig:interpreter}.

\begin{figure}[t]
\begin{lstlisting}
data Expr = Lit Int
	| Bin Op Op Expr Expr

data Op = Add | Div

eval :: Expr -> Int
eval (Lit n) = n
eval (BinOp op e1 e2)
	= evalOp op (eval e1) (eval e2)

evalOp :: Op -> Int -> Int -> Int
evalOp Add v1 v2 = v1 + v2
evalOp Div v1 v2 = v1 `div` v2
\end{lstlisting}
\caption{The pure interpreter implementation}
\label{fig:interpreter}
\end{figure}

With this particular example the programmer wants the expressions to be evaluated within a monad to add effects. Since \texttt{Int} represents the result of an expression in this program this type is monadified as seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{fig:monInterp}. Importantly this refactoring makes the type of \texttt{evalOp} change to \texttt{Monad m => Op -> m Int -> m Int -> m Int} in addition to monadifying the result type of \texttt{eval}. 

\begin{figure}[t]
\begin{lstlisting}
eval :: Monad m => Expr -> m Int
eval (Lit n) = return n
eval (BinOp op e1 e2)
	= evalOp op (eval e1) (eval e2)

evalOp :: Monad m => Op -> m Int -> m Int -> m Int
evalOp Add v1 v2 = do 
	l <- v1
	r <- v2	
	return (l + r)
evalOp Div v1 v2 = do
	l <- v1
	r <- v2
	return (l `div` r)
\end{lstlisting}
\caption{A data-directed monadification of the interpreter.}
\label{fig:monInterp}
\end{figure}

Some arguments are passed \DIFdelbegin \DIFdel{unevaluated, the monadic typed ones, whereas }\DIFdelend \DIFaddbegin \DIFadd{unevaluated- those of monadic type- whereas arguments of }\DIFaddend other types are non-monadic. Therefore this style is a mixture of the call-by-name and call-by-value styles~\citep{monadSurvey}. 

Data-directed monadification is useful if a particular type can always represent the result of a computation that should be effectful. \DIFaddbegin \DIFadd{This makes it a difficult style of monadification to implement, because there is not enough information in the type system to always determine which parameters are targets of the refacoring. }\DIFaddend In the interpreter case, \texttt{Int} is always the result of an evaluation. Even in the only call to \texttt{evalOp} the second and third arguments are computed from recursive calls to \texttt{eval} so those \texttt{Int} types also represent expression evaluation. \DIFdelbegin %DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend This style of monadification would pair well with the ``introduce a type synonym'' refactoring to differentiate instances of types that should be monadified. 
\DIFdelbegin \DIFdel{In the interpreter example if it was anticipated that another }\texttt{\DIFdel{Int}} %DIFAUXCMD
\DIFdel{type that didn't represent the result of an evaluation needed to be added to the program.
}\DIFdelend 

\subsection{Restricted Call-by-Value Monadification}

The final monadification style \DIFdelbegin \DIFdel{simply }\DIFdelend monadifies the result type of the target function(s). This is the style of monadification that is produced by the algorithm described in~\DIFdelbegin \DIFdel{\mbox{%DIFAUXCMD
\citep{monadification} }\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\cite{monadification} }\hspace{0pt}%DIFAUXCMD
}\DIFaddend and that is implemented in HaRe as part of this thesis work.

Returning to the mergesort example \DIFdelbegin \DIFdel{from earlier in the section}\DIFdelend \DIFaddbegin \DIFadd{that was used earlier in this section, }\DIFaddend the restricted call-by-value monadification implementation of this can be seen in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{rCBV}.

This style of monadification meshes nicely with the bind operator which expects functions whose arguments are pure values and returns a monadic type. This style of monadification has been implemented in HaRe and the next section will go into greater detail about the implementation of this refactoring.

\begin{figure}[t]
\begin{lstlisting}
mergesort :: (Ord a, Monad m) => [a] -> m [a]
mergesort lst = do
	let (l,r) = splitAt ((length lst) `div` 2) lst
	v1 <- mergesort l
	v2 <- mergesort r
	merge v1 v2

merge :: (Ord a, Monad m) => [a] -> [a] -> m [a]
merge [] r = return r
merge l [] = return l
merge (x:xs) (y:ys) = 
	case (x < y) of
	True -> do 
		rst <- (merge xs (y:ys)) 
		return (x:rst)
	False -> do
		rst <- (merge (x:xs) ys)	
	 	return (y:rst)
\end{lstlisting}
\caption{Restricted call-by-value monadification}
\label{rCBV}
\end{figure}

\DIFaddbegin \DIFadd{This section has been a summary of the different ways that monadic programs can be written. These styles were first identified by~\mbox{%DIFAUXCMD
\cite{lammelReuse}}\hspace{0pt}%DIFAUXCMD
,~\mbox{%DIFAUXCMD
\cite{monadification}}\hspace{0pt}%DIFAUXCMD
, and~\mbox{%DIFAUXCMD
\cite{monadSurvey}}\hspace{0pt}%DIFAUXCMD
. This thesis provides an implementation of the restricted call-by-value monadification. This style fits well with how the passed to bind is a pure value and it returns a monadic value. This style is also the obvious way to make a non-effectful computation effectful. The other styles assume that there is an already existing monadic context that either produces the arguments that are passed to the target function, in the call-by-name cases, or to extract the function into, as in the ``full'' monadification styles. The restricted call-by-value style of monadification fits easily into an otherwise pure program, which is what the refactoring is targeting.  
}

\DIFaddend \section{Implementation of the Monadification Refactoring}
\label{monadImp}

The implementation of Monadification in HaRe is a \DIFdelbegin \DIFdel{fairly straightforward }\DIFdelend top down transformation. The monadification refactoring takes \DIFdelbegin \DIFdel{in }\DIFdelend a list of \DIFdelbegin \DIFdel{positions }\DIFdelend \DIFaddbegin \DIFadd{target functions }\DIFaddend as input. \DIFdelbegin \DIFdel{Those positions define where the functions that should be monadified are defined}\DIFdelend \DIFaddbegin \DIFadd{In practice the refactoring receives a list of positions in a file and these positions indicate which functions should be targeted by the refactoring}\DIFaddend .

The monadification refactoring works over sets of functions simultaneously. Figure~\ref{fgMon} defines two functions \texttt{f} and \texttt{g} and the two possible monadifications (\DIFaddbegin \DIFadd{``}\DIFaddend \texttt{*\_m1}\DIFdelbegin \DIFdel{and }\DIFdelend \DIFaddbegin \DIFadd{'' and ``}\DIFaddend \texttt{*\_m2}\DIFaddbegin \DIFadd{''}\DIFaddend ). The \texttt{f\_m1} function is how \texttt{f} would be rewritten if it was the sole target of the refactoring, \DIFdelbegin \DIFdel{since the right hand side of }\DIFdelend \DIFaddbegin \DIFadd{and }\texttt{\DIFadd{f\_m2}} \DIFadd{and }\texttt{\DIFadd{g\_m2}} \DIFadd{are the definitions if both functions were a target of the refactoring.
}

\DIFadd{When only making }\DIFaddend \texttt{f} \DIFdelbegin \DIFdel{is composed from pure values and non-monadic functions }\texttt{\DIFdel{f\_m1}} %DIFAUXCMD
\DIFdel{is defined by applying }\DIFdelend \DIFaddbegin \DIFadd{effectful the refactoring can simply lift its original definition into the monad using the }\DIFaddend \texttt{return} \DIFdelbegin \DIFdel{to the original definition of }\DIFdelend \DIFaddbegin \DIFadd{function. Since }\texttt{\DIFadd{g}} \DIFadd{is a pure function it can still be used in the body of }\DIFaddend \texttt{f\DIFdelbegin %DIFDELCMD < }%%%
\DIFdel{. }%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{When the refactoring targets both }\texttt{\DIFdel{f}} %DIFAUXCMD
\DIFdel{and }\DIFdelend \DIFaddbegin \DIFadd{\_m1}} \DIFadd{without any changes. When both of the functions are targets of the refactoring }\DIFaddend \texttt{g}\DIFdelbegin \DIFdel{these functionswill be rewritten to their "}\texttt{\DIFdel{*\_m2}}%DIFAUXCMD
\DIFdel{" equivalents. In this case }\texttt{\DIFdel{g}} %DIFAUXCMD
\DIFdel{is }\DIFdelend \DIFaddbegin \DIFadd{, because it is not dependent on any other monadic functions, can just be }\DIFaddend handled in the same way \texttt{f\_m1} was \DIFdelbegin \DIFdel{defined, because it is defined entirely with non-monadic values the new right hand side is just the old one composed with }\DIFdelend \DIFaddbegin \DIFadd{by lifting its definition into the monadic context using }\DIFaddend \texttt{return}. \DIFdelbegin \DIFdel{The definition of }\texttt{\DIFdel{f\_m2}} %DIFAUXCMD
\DIFdel{is slightly more interesting, however, due to the call to the monadified function }\texttt{\DIFdel{g\_m2}}%DIFAUXCMD
\DIFdel{. The refactored version of }\DIFdelend \DIFaddbegin \DIFadd{However, }\texttt{\DIFadd{f}} \DIFadd{is dependent on the now effectful }\DIFaddend \texttt{g} \DIFdelbegin \DIFdel{in this scenario produces a monadic value of type }\DIFdelend \DIFaddbegin \DIFadd{so the expression ``}\texttt{\DIFadd{(g x) + 1}}\DIFadd{'' will now throw a type error because }\texttt{\DIFadd{(g x)}} \DIFadd{is of type ``}\DIFaddend \texttt{\DIFdelbegin \DIFdel{Monad }\DIFdelend m \DIFdelbegin \DIFdel{=> m }\DIFdelend Int}\DIFdelbegin \DIFdel{so a call to this function can no longer be passed to }\DIFdelend \DIFaddbegin \DIFadd{'' not a number which was it's previous type. Instead the call to }\texttt{\DIFadd{g\_m2}} \DIFadd{needs to be bound to the rest of }\DIFaddend \texttt{\DIFdelbegin \DIFdel{(+)}%DIFDELCMD < }%%%
\DIFdel{.
Instead that whole sub-expression is replaced with a new variable "}\texttt{\DIFdel{v1}}%DIFAUXCMD
\DIFdel{." The result of the , now monadic, computation (}\texttt{\DIFdel{g\_m2 x}}%DIFAUXCMD
\DIFdel{) is bound to this new variable via the bind (}\DIFdelend \DIFaddbegin \DIFadd{f\_m2}}\DIFadd{'s definition so that its result can be extracted from the monadic context.
}

\DIFadd{It's important to note that }\texttt{\DIFadd{g}} \DIFadd{alone is not a valid target of the refactoring. This is because }\texttt{\DIFadd{f}} \DIFadd{is dependent on }\texttt{\DIFadd{g}}\DIFadd{, if }\texttt{\DIFadd{g}} \DIFadd{becomes effectful then it can no longer be used the pure function }\DIFaddend \texttt{\DIFdelbegin \DIFdel{>>=}\DIFdelend \DIFaddbegin \DIFadd{f}\DIFaddend }\DIFdelbegin \DIFdel{) operator and lambda expression}\DIFdelend . 

\begin{figure}[t]
\begin{lstlisting}
f :: Int -> Int
f x = (g x) + 1

g :: Int -> Int
g y = y * 2

f_m1 :: (Monad m) => Int -> m Int
f_m1 x = return ((g x) + 1)

f_m2 :: (Monad m) => Int -> m Int
f_m2 x = g_m2 x >>= (\v1 -> return (v1 + 1))

g_m2 :: (Monad m) => Int -> m Int
g_m2 y = return (y * 2)
\end{lstlisting}
\caption{Possible monadification refactorings for the \texttt{f} and \texttt{g} functions.}
\label{fgMon}
\end{figure} 

\subsection{Preconditions}

There are two preconditions that must be met before this refactoring can be applied. First, \DIFaddbegin \DIFadd{all of call points of }\DIFaddend the target functions must be \DIFdelbegin \DIFdel{applied to sufficient arguments to be of non-functional type at all call points}\DIFdelend \DIFaddbegin \DIFadd{``fully-saturated.'' This means that every call point will have a named variable passed to the target function}\DIFaddend . This allows the refactoring to assume that the type of a subtree whose leftmost child is a monadified function is a monadic value and not something of type \texttt{Monad m => a -> m a}. 

\begin{figure}[t]
\begin{lstlisting}
f :: Int -> String -> String
f n str = (take n str, drop n str)


stringHandler :: Maybe Int -> String -> (String, String)
stringHandler mi s = 
	let g = case mi of
		Nothing -> f ((length s) `div` 2)
		(Just i) -> f i
	in g s
\end{lstlisting}
\caption{The \texttt{stringHandler} function will be rejected by the monadification refactoring.}
\label{strHan}
\end{figure}

Like the many of the preconditions for the ``generalise monad to applicative'' refactoring\DIFaddbegin \DIFadd{, }\DIFaddend other refactorings can help transform programs to pass the preconditions. The \texttt{stringHandler} in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{strHan} has calls to \texttt{f} that are not fully saturated. Fortunately $\eta$-expansion can be used to fully saturate the calls to \texttt{f} (see \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{etaHan}) allowing \texttt{stringHandler} and \texttt{f} to be monadified.\footnote{The original definition of \texttt{stringHandler} could be monadified alone.} \DIFaddbegin \DIFadd{This expansion involves introducing abstraction over a function so given some function: }\texttt{\DIFadd{f :: a -> b -> c}} \DIFadd{which is used in the expression: }\texttt{\DIFadd{f x}}\DIFadd{, this can be $\eta$-expanded to: }\texttt{\DIFadd{(\textbackslash y -> f x y)}} \DIFadd{so now all of }\texttt{\DIFadd{f}}\DIFadd{'s parameters at this call site are named variables. 
}\DIFaddend 

\begin{figure}[t]
\begin{lstlisting}
stringHandler :: Maybe Int -> String -> (String, String)
stringHandler mi s = 
	let g = case mi of
		Nothing -> (\x -> f ((length s) `div` 2) x)
		(Just i) -> (\x -> f i x)
	in g s
\end{lstlisting}
\caption{The new version of \texttt{stringHandler} can be monadified.}
\label{etaHan}
\end{figure}

The second precondition that needs to hold before monadification can take place is that \DIFdelbegin \DIFdel{none of the target functions can be called by non-monadic functions.
This is due to the fact that a target function}\DIFdelend \DIFaddbegin \DIFadd{functions that are outside the scope of the refactoring cannot call any of the target functions.
}

\DIFadd{The second precondition of this refactoring is that the target functions cannot be called outside the scope of the refactoring. Once a function has been monadified it}\DIFaddend 's result \DIFdelbegin \DIFdel{type will change from some pure value }\texttt{\DIFdel{a}} %DIFAUXCMD
\DIFdel{to a monadic computation over }\texttt{\DIFdel{a}} %DIFAUXCMD
\DIFdelend \DIFaddbegin \DIFadd{cannot be used within a pure function any longer because }\DIFaddend there is no \DIFdelbegin \DIFdel{function that retrieves values out of any given monad so the target function, once it is refactored, will be unusable by non-monadic functions}\DIFdelend \DIFaddbegin \DIFadd{way to retrieve the result of a monadic computation from the monadic context}\DIFaddend .

 \DIFdelbegin \DIFdel{Because this precondition can't be avoided for the refactoring to happen, the set of target functions will have to be changed. Either all functions that reference a target function become target functions as well or the dependent function is dropped from the }\DIFdelend \DIFaddbegin \DIFadd{In the }\texttt{\DIFadd{stringHandler}} \DIFadd{example from Figure~\ref{etaHan}, }\texttt{\DIFadd{stringHandler}} \DIFadd{alone and }\texttt{\DIFadd{stringHandler}} \DIFadd{and }\texttt{\DIFadd{f}} \DIFadd{are valid target sets of the refactoring, but trying to monadify }\texttt{\DIFadd{f}} \DIFadd{alone is not because it is used outside the scope of the }\DIFaddend refactoring. 

\subsection{The Transformation}

The simplest case of this refactoring is when the target function is defined using only pure functions then the refactored version of the function is \DIFdelbegin \DIFdel{just }\DIFdelend \DIFaddbegin \DIFadd{simply }\texttt{\DIFadd{return}} \DIFadd{applied to }\DIFaddend the original body \DIFdelbegin \DIFdel{applied to }\texttt{\DIFdel{return}}%DIFAUXCMD
\DIFdelend \DIFaddbegin \DIFadd{of the function}\DIFaddend . Otherwise the refactoring works over the body of a target function in \DIFdelbegin \DIFdel{three }\DIFdelend \DIFaddbegin \DIFadd{four }\DIFaddend steps:

\begin{enumerate}
\item \DIFdelbegin \DIFdel{Replace any right hand sub-trees }\DIFdelend \DIFaddbegin \DIFadd{The refactoring begins by extracting any sub-expressions }\DIFaddend that are calls to \DIFdelbegin \DIFdel{functions that are targeted in the refactoring with variables in }\DIFdelend \DIFaddbegin \DIFadd{a target function. The extraction happens in }\DIFaddend a top-down left to right manner\DIFdelbegin \DIFdel{. For example the body of }\texttt{\DIFdel{(f x = f (g x))}} %DIFAUXCMD
\DIFdel{becomes }\texttt{\DIFdel{(f v1)}} %DIFAUXCMD
\DIFdel{assuming }\texttt{\DIFdel{f}} %DIFAUXCMD
\DIFdel{and }\texttt{\DIFdel{g}} %DIFAUXCMD
\DIFdel{are targets of the refactoring.
}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{If the expression is not }\DIFdelend \DIFaddbegin \DIFadd{, when a sub-expression is found that is }\DIFaddend a call to one of the \DIFdelbegin \DIFdel{target functions then it }\DIFdelend \DIFaddbegin \DIFadd{refactoring's target functions it is replaced with a variable, so  }\texttt{\DIFadd{(f x = f (g x))}} \DIFadd{would become }\texttt{\DIFadd{(f v1)}}\DIFadd{.
}

\item \DIFadd{Next a bind expression }\DIFaddend needs to be \DIFdelbegin \DIFdel{applied to return. }\DIFdelend \DIFaddbegin \DIFadd{constructed from the extracted monadic sub-expressions and the expressions they were extracted from. The extracted sub-expressions become the first argument to bind and a newly constructed lambda expression makes up the second argument. The lambda expression is made from the expression that had the monadic expression replaced with some variable, the only argument of the lambda expression is that variable. }\DIFaddend From the example in \DIFdelbegin \DIFdel{step one, the body of }\texttt{\DIFdel{f}} %DIFAUXCMD
\DIFdel{had become }\DIFdelend \DIFaddbegin \DIFadd{the previous step: }\texttt{\DIFadd{(g x)}} \DIFadd{is the extracted monadic sub-expression and }\DIFaddend \texttt{(f v1)} \DIFaddbegin \DIFadd{is the expression it was replaced with the }\texttt{\DIFadd{v1}} \DIFadd{variable. In this example the constructed bind expression is defined as }\texttt{\DIFadd{(g x) >>= (\textbackslash v1 -> f v1)}}\DIFaddend .
\DIFdelbegin \DIFdel{Since }\texttt{\DIFdel{f}} %DIFAUXCMD
\DIFdel{is }\DIFdelend \DIFaddbegin 

\item \DIFadd{If the right hand side expressions of }\DIFaddend one of the \DIFdelbegin \DIFdel{target functions this expression is monadic and doesn't need to be returned. }%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{In the order that the monadic sub-trees were extracted in. Bind those expressions to a lambda that accepts a single argument, }\DIFdelend \DIFaddbegin \DIFadd{lambda expressions is not a monadic value then }\texttt{\DIFadd{return}} \DIFadd{will have to be applied to this expression. In }\DIFaddend the \DIFdelbegin \DIFdel{variable that replaced that expression in the original body. The example expression becomes }\DIFdelend \DIFaddbegin \DIFadd{example from the previous step, if }\texttt{\DIFadd{f}} \DIFadd{is a target of the refactoring then the bind statement does not need to be modified further. If }\texttt{\DIFadd{f}} \DIFadd{is not one of the target functions then the bind expression needs to be rewritten to: }\DIFaddend \texttt{(g x) >>= (\DIFdelbegin %DIFDELCMD < \\%%%
\DIFdelend \DIFaddbegin \DIFadd{\textbackslash }\DIFaddend v1 -> \DIFaddbegin \DIFadd{return (}\DIFaddend f v1)\DIFaddbegin \DIFadd{)}\DIFaddend }.
\DIFaddbegin 

\DIFaddend \item \DIFdelbegin \DIFdel{Replace }\DIFdelend \DIFaddbegin \DIFadd{Finally }\DIFaddend the body of the target function \DIFdelbegin \DIFdel{with the generated expressionfrom step three. The final result of the refactoring is }\texttt{\DIFdel{f x = (g x) >>= (}%DIFDELCMD < \\%%%
\DIFdel{v1 -> f v1)}}
%DIFAUXCMD
\DIFdelend \DIFaddbegin \DIFadd{can be replaced with the constructed bind expression. This target function has now been successfully refactored. These four steps need to be performed over the body of each of the target functions. 
}\DIFaddend \end{enumerate}

A more \DIFdelbegin \DIFdel{in depth example given in figure}\DIFdelend \DIFaddbegin \DIFadd{concrete example is shown in Figure}\DIFaddend ~\ref{simpleMon}. The functions \texttt{f}, \texttt{g}, and \texttt{h} are all targets of the refactoring.

\begin{figure}[t]
\begin{lstlisting}
f x y = f (h x) (g y)
\end{lstlisting}
\caption{A simple target function for monadification}
\label{simpleMon}
\end{figure}  

\begin{figure}[t]
	\begin{center}
		\includegraphics[scale=.5]{graphVis/Chapter6/simpF.png}
	\end{center}
	\caption{The syntax tree of \texttt{f} from \DIFdelbeginFL \DIFdelFL{figure}\DIFdelendFL \DIFaddbeginFL \DIFaddFL{Figure}\DIFaddendFL ~\ref{simpleMon}}
	\label{simpF}
\end{figure}

The syntax tree of the right hand side of \texttt{f} is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{simpF}. The \DIFdelbegin \DIFdel{transformation starts by top down checking the right subtrees for monadic computations. It will first check }\DIFdelend \DIFaddbegin \DIFadd{first step of the refactoring begins by extracting all the monadic sub-expressions of the body of }\texttt{\DIFadd{f}} \DIFadd{and replacing them with variables. This step works in a top down left to right manner so }\DIFaddend the call to \texttt{\DIFdelbegin \DIFdel{g}\DIFdelend \DIFaddbegin \DIFadd{h}\DIFaddend } \DIFdelbegin \DIFdel{(shaded with grey in figure~\ref{simpF}) then it will check }\DIFdelend \DIFaddbegin \DIFadd{is discovered first, the relevant sub-tree is outlined in Figure~\ref{simpF}. Next }\DIFaddend the call to \texttt{\DIFdelbegin \DIFdel{h}%DIFDELCMD < } %%%
\DIFdel{(the corresponding nodes are inside the box). Both of these subtrees are monadic calls so they will be replaced in the tree with unique generated variables. The refactoring implementation }\DIFdelend \DIFaddbegin \DIFadd{g}} \DIFadd{will also be extracted. The implementation of the refactoring }\DIFaddend contains a queue \DIFdelbegin \DIFdel{where }\DIFdelend \DIFaddbegin \DIFadd{that keeps track of }\DIFaddend the monadic expressions \DIFdelbegin \DIFdel{are paired with }\DIFdelend \DIFaddbegin \DIFadd{and }\DIFaddend the variables that \DIFdelbegin \DIFdel{replace themand then stored. After this step the expression is }\texttt{\DIFdel{f v2 v1}} %DIFAUXCMD
\DIFdel{and the }\DIFdelend \DIFaddbegin \DIFadd{replaced them. At this point in the refactoring, after step one, the }\DIFaddend queue contains two elements\DIFaddbegin \DIFadd{: }\DIFaddend \texttt{[(v1,\DIFdelbegin \DIFdel{"}\DIFdelend (\DIFdelbegin \DIFdel{g y}\DIFdelend \DIFaddbegin \DIFadd{h x}\DIFaddend )\DIFdelbegin \DIFdel{"}\DIFdelend ), (v2,\DIFdelbegin \DIFdel{"}\DIFdelend (\DIFdelbegin \DIFdel{h x}\DIFdelend \DIFaddbegin \DIFadd{g y}\DIFaddend )\DIFdelbegin \DIFdel{"}\DIFdelend )]} \DIFdelbegin \DIFdel{. The final step checks if the left most child of the current expression is a call to a monadic function. In this case it is because }\DIFdelend \DIFaddbegin \DIFadd{and the body of }\DIFaddend \texttt{f} \DIFdelbegin \DIFdel{is a target of the refactoring but if it wasn't then the expression would be composed with }\DIFdelend \DIFaddbegin \DIFadd{has become: }\DIFaddend \texttt{\DIFdelbegin \DIFdel{return}\DIFdelend \DIFaddbegin \DIFadd{f v1 v2}\DIFaddend }.

\DIFdelbegin \DIFdel{At this point the expression is ready to be part of the right hand side of a lambda expression. The first element is popped from }\DIFdelend \DIFaddbegin \DIFadd{Next in step two the bind expression will be constructed from the contents of the queue and the rewritten body of }\texttt{\DIFadd{f}}\DIFadd{. The refactoring begins by popping each expression off of }\DIFaddend the queue, \DIFdelbegin \DIFdel{in this case it 's }\texttt{\DIFdel{(v1,"(g y)")}}%DIFAUXCMD
\DIFdel{, the variable name becomes the argument in a lambda expression and the expression is the left argument to bind, }\DIFdelend \DIFaddbegin \DIFadd{composing it with bind and creating  a lambda expression from the corresponding variable name. This process has been illustrated in Figure~\ref{queueProc}.
}

\begin{figure}[t]
\begin{lstlisting}
-- Initial state of the queue: [(v1,(h x)), (v2,(g y))]
-- Begin by popping (v1, (h x)) off
(h x) >>= (\ v1 -> ...)
-- The right hand side of the lambda is constructed from what remains in the queue
-- Pop (v2, (g y)) off
(h x) >>= (\ v1 -> (g y) >>= (\ v2 -> ...))
-- The queue is now empty so the top level expression is inserted here
-- The final result of step 2: 
(h x) >>= (\ v1 -> (g y) >>= (\ v2 -> f v1 v2))
\end{lstlisting}
\caption{\DIFaddFL{Working through the queue of monadic expressions.}}
\label{queueProc}
\end{figure}

\DIFadd{Once the bind expression has been produced as shown in line 9 of Figure~\ref{queueProc} the third step of the refactoring checks if its innermost expression needs to be lifted into the monadic context with }\DIFaddend \texttt{\DIFdelbegin \DIFdel{(g y) >>=(}%DIFDELCMD < \v1 %%%
\DIFdel{-> f v2 v1)}\DIFdelend \DIFaddbegin \DIFadd{return}\DIFaddend }. In this \DIFdelbegin \DIFdel{manner the refactoring keeps popping elements off of the queue wrapping the previous expression in a lambda with the variable name as its argument and composing this lambda with the expression from the queue using bind until the queue is empty}\DIFdelend \DIFaddbegin \DIFadd{particular case since the innermost expression is a call to a target function so this step does not affect the final product of the refactoring}\DIFaddend .

\DIFaddbegin \DIFadd{Not the binding of }\texttt{\DIFadd{f}} \DIFadd{can be replaced with the bind expression. }\DIFaddend The final result of refactoring \texttt{f} is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{simpF_ref}. \DIFaddbegin \DIFadd{After rewriting }\texttt{\DIFadd{f}} \DIFadd{the refactoring would continue by refactoring the bodies of }\texttt{\DIFadd{g}} \DIFadd{and }\texttt{\DIFadd{h}}
\DIFaddend 

\begin{figure}[t]
\DIFdelbeginFL %DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < f x y = (g x) >>= (\ v2 -> (g y) >>= (\v1 -> f v2 v1))
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
\DIFdelendFL \DIFaddbeginFL \begin{lstlisting}
f x y = (h x) >>= (\ v1 -> (g y) >>= (\v2 -> f v1 v2))
\end{lstlisting}
\DIFaddendFL \caption{The final refactored version of \texttt{f}}
\label{simpF_ref}
\end{figure}

\DIFaddbegin \DIFadd{At this point the refactoring is capable of transforming simple function application. Haskell, obviously, has more expressive expressions beyond just function applications. The next section will describe how this implementation handles let expressions.
}

\DIFaddend \section{Let Expressions}

The previous section details how the refactoring works over simple function applications. This section will describe how a function with a let expression is refactored. This section will use the expression evaluator in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{evalExpr} as a motivating example.

\begin{figure}[t]
\begin{lstlisting}
data Expr = Var Char
          | N Int
          | Add Expr Expr
          | Sub Expr Expr
          | Assign Char Expr
          deriving Show

type Env = [(Char,Int)]

eval :: Expr -> Env -> (Int,Env)

eval (Var v) env = (head [val | (x,val) <- env, x==v], env)

eval (N n) env = (n,env)

eval (Add e1 e2) env = let (v1,env1) = eval e1 env
                                    (v2,env2) = eval e2 env1 
                           in (v1+v2,env2)
eval (Sub e1 e2) env = let (v1,env1) = eval e1 env
                                    (v2,env2) = eval e2 env1 
                           in (v1-v2,env2)
eval (Assign x e) env = let (v,env1) = eval e env 
	in (v, (x,v):env1)
\end{lstlisting}
\caption{An expression evaluator.}
\label{evalExpr}
\end{figure}

The idea is that at this point in the project the explicit passing of the state is unwanted so refactoring \texttt{eval} to become monadic so that the \texttt{Env} can be passed around using the \texttt{State} monad instead. The first two cases that handle the \texttt{Var} and \texttt{N} constructors can simply be wrapped with \texttt{return} because no recursive calls to \texttt{eval} happen in those cases. The three other cases contain let bindings that all call (the now monadic) \texttt{eval}. 

In those cases the local bindings can be used as the variables inside of a lambda expression. This seems simple enough\DIFdelbegin \DIFdel{but what order should the bindings be processed in}\DIFdelend \DIFaddbegin \DIFadd{, there is a question of the order in which the bindings should be processed}\DIFaddend ? Consider the let binding in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{intLet}, assuming that the \texttt{f} and \texttt{x} variables are in scope\DIFdelbegin \DIFdel{, due }\DIFdelend \DIFaddbegin \DIFadd{. Due }\DIFaddend to lazy evaluation this is a valid Haskell let binding despite \texttt{v1} being referenced on line one before it \DIFdelbegin \DIFdel{'s }\DIFdelend \DIFaddbegin \DIFadd{is }\DIFaddend defined on line two. However if the let bindings are processed in the obvious way from top to bottom\footnote{In this case: \texttt{g v1 >>= ($\backslash$v2 -> f x >>= ($\backslash$v1 -> ...))}} \texttt{v1} will not be in scope when it \DIFdelbegin \DIFdel{'s }\DIFdelend \DIFaddbegin \DIFadd{is }\DIFaddend passed to \texttt{g}.  

\begin{figure}[t]
\begin{lstlisting}
let v2 = g v1
     v1 = f x
    in ...
\end{lstlisting}
\caption{An interesting let binding}
\label{intLet}
\end{figure}

Fortunately GHC offers a solution\DIFdelbegin \DIFdel{, }\DIFdelend \DIFaddbegin \DIFadd{: }\DIFaddend the renamer of GHC\DIFaddbegin \DIFadd{, }\DIFaddend which checks for lexical errors\DIFdelbegin \DIFdel{also does }\DIFdelend \DIFaddbegin \DIFadd{, also performs }\DIFaddend dependency analysis on local bindings and orders them so that later bindings may depend on earlier ones but not vice versa \DIFaddbegin \DIFadd{when there are no mutually recursive bindings}\DIFaddend ~\citep{ghcApi}. This renamed ordering is used to lookup the parsed bindings in dependency analysed order.

Returning to the evaluator example the monadified version is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{monEval}.\footnote{Line breaks have been added to aid readability in this setting but the refactoring would not actually do this.} The let bindings in \texttt{eval} can be processed from top to bottom. In the \texttt{Add} case this means that the expression bound on line 16 in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{evalExpr} makes up the outermost call to bind and the pattern it was originally bound to becomes the pattern that matches the argument passed to the first lambda expression. 
\begin{figure}[t]
\begin{lstlisting}
eval :: Monad m => Expr -> Env -> m (Int,Env)

eval (Var v) env = return (head [val | (x,val) <- env, x==v], env)

eval (N n) env = return (n,env)

eval (Add e1 e2) env = eval e1 env >>= 
	(\(v1,env1) -> eval e2 env1 >>= 
		(\(v2,env2) -> return (v1+v2,env2)))

eval (Sub e1 e2) env = eval e1 env >>= 
	(\(v1,env1) -> eval e2 env1 >>= 
		(\(v2,env2) -> return (v1-v2,env2)))

eval (Assign x e) env = eval e env >>= 
	(\(v,env1) -> return (v, (x,v):env1)))
\end{lstlisting}
\caption{The monadified expression evaluator.}
\label{monEval}
\end{figure}

If there were pure expressions on the right hand side of the let binding those bindings will remain in a let expression. For example the function starting on line one in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{mixLet} contains two let bindings \texttt{pureF} is a pure function and so it remains inside \DIFdelbegin \DIFdel{of }\DIFdelend a let expression whereas \texttt{monF} is a target of the refactoring and it is lifted from the let and passed to bind. 

\begin{figure}[t]
\begin{lstlisting}
f x = let y = pureF x
             z = monF y
		     in z+x

f_m x = let y = pureF x in
		monF y >>= (\z -> return z+x)
\end{lstlisting}
\caption{A function with a mixture of pure and monadic function calls in a let expression.}
\label{mixLet}
\end{figure}

\DIFaddbegin \DIFadd{With this section the refactoring supports the transformation of the core of Haskell, the the typed lambda calculus with polymorphic let expressions. However, this refactoring outputs monadic code composed with the bind operator, which is probably not the most popular way to write monadic code. To better support monadic programming Haskell gives monads their own syntactic structure known as do-syntax. The next section will discuss a separate refactoring for introducing this syntax into the target function.
}

\DIFaddend \section{Adding Syntactic Sugar}

The refactored program from \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{monEval} is rewritten in a restricted call-by-value monadification style. However, idiomatic monadic Haskell is not often written in this style. Haskell also supports a piece of syntactic sugar called \texttt{do} notation. This notation allows for a series of bind computations to be written in a more compact and easier to read style. Figure~\ref{doEx} shows how the bind expression on line one can be sugared into the do statement on lines four through six.

\begin{figure}[t]
\begin{lstlisting}
m_expr >>= (\x -> return (f x))
--Sugars to

do
	x <- m_expr
	return (f x)
\end{lstlisting}
\caption{An example of how binds can sugar to \texttt{do} notation.}
\label{doEx}
\end{figure}

\textbf{\texttt{do}} notation is highly used when writing monadic Haskell programs and some texts even introduce it before explaining what monads are (\DIFdelbegin \DIFdel{see \mbox{%DIFAUXCMD
\cite{realWorldHaskell}}\hspace{0pt}%DIFAUXCMD
and }\DIFdelend \DIFaddbegin \DIFadd{\mbox{%DIFAUXCMD
\cite{realWorldHaskell}}\hspace{0pt}%DIFAUXCMD
; \mbox{%DIFAUXCMD
\cite{haskellCraft}}\hspace{0pt}%DIFAUXCMD
; }\DIFaddend \cite{learnYou}). Supporting this syntax is an important feature for a monadification refactoring to be practical. 

The sugaring of bind syntax into \texttt{do} syntax is a separate refactoring in HaRe. This allows for the programmer to choose exactly which functions should be written with \texttt{do} syntax and when functions are clearer remaining as binds. The refactoring takes in a single parameter, the position where the target function is declared. The sugared version of \texttt{eval} from \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{monEval} is shown in \DIFdelbegin \DIFdel{figure}\DIFdelend \DIFaddbegin \DIFadd{Figure}\DIFaddend ~\ref{finEval}. The first two cases weren't bind expressions so they remain just calls to return. The three other cases have been refactored into the equivalent \texttt{do} statements.

\begin{figure}[t]
\begin{lstlisting}
eval :: Monad m => Expr -> Env -> m (Int,Env)

eval (Var v) env = return (head [val | (x,val) <- env, x==v], env)

eval (N n) env = return (n,env)

eval (Add e1 e2) env = do
	(v1,env1) <- eval e1 env
	(v2,env2) <- eval e2 env1
	return (v1+v2,env2)

eval (Sub e1 e2) env = do
	(v1,env1) <- eval e1 env 
	(v2,env2) <- eval e2 env1
	return (v1-v2,env2)

eval (Assign x e) env = do
	(v,env1) <- eval e env
	return (v, (x,v):env1))
\end{lstlisting}
\caption{The sugared evaluator.}
\label{finEval}
\end{figure}

\DIFaddbegin \DIFadd{Introducing the do syntactic sugar into the target program is the final step of this two-step composite refactoring. Do syntax has become synonymous with monads in the Haskell language and a refactoring that supports them should also support this syntax as well.
}

\section{Summary}

\DIFaddend Monads are a challenging and characteristic feature of Haskell and, anecdotally \DIFaddbegin \DIFadd{at least}\DIFaddend , infamous amongst those trying to learn the language. Monadification is an important and common transformation that Haskell code undergoes. This chapter has described two refactorings that automate this transformation. The monadification refactoring transforms a set of functions into restricted call-by-value monad style, where only the result type of a function is made monadic. The second refactoring sugars the binds produced by the monadification into \texttt{do} notation, a common structure for monadic Haskell functions.

%DIF < %%%%%%%%%%%%%%%%%%%%%%%%%
\DIFdelbegin \chapter{\DIFdel{Related Work}}
%DIFAUXCMD
\addtocounter{chapter}{-1}%DIFAUXCMD
%DIFDELCMD < \label{chp:related}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{This chapter reviews current work in refactoring and other areas of the literature. The chapter is divided into several sections. This chapter will begin with a brief description of refactoring tools in general. The second section (\ref{funcTools}) will describe work done in the area of tooling for functional languages such as HLint~\mbox{%DIFAUXCMD
\citep{hlint} }\hspace{0pt}%DIFAUXCMD
a tool that suggests improvements to Haskell code and Wrangler~\mbox{%DIFAUXCMD
\citep{wrangler} }\hspace{0pt}%DIFAUXCMD
a refactoring tool for Erlang. The next section (\ref{otherTools}) will discuss novel, non-programming language refactoring of wikis and spreadsheets. Section~\ref{refacParallel} discusses automated refactoring tools that introduce parallelism into their target programs. Then Section~\ref{applicativeDo} will describe another approach to using applicative functors in Haskell.  Finally section~\ref{typeTrans} covers work done on type changing program transformations, including the work of Erwig and Ran on monadification~\mbox{%DIFAUXCMD
\citep{monadification} }\hspace{0pt}%DIFAUXCMD
and the type and transform system developed at the University of Utrecht~\mbox{%DIFAUXCMD
\citep{typeAndTransform}}\hspace{0pt}%DIFAUXCMD
.
}%DIFDELCMD < 

%DIFDELCMD < \section{Refactoring Tools in Modern IDEs}
%DIFDELCMD < %%%
\DIFdel{Refactoring tools have become a standard feature in integrated development environments. The four most popular IDEs for object-oriented languages, Eclipse}\footnote{%DIFDELCMD < \url{https://www.eclipse.org/}%%%
}%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{, NetBeans}\footnote{%DIFDELCMD < \url{https://netbeans.org/}%%%
}%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{, Intellij}\footnote{%DIFDELCMD < \url{https://www.jetbrains.com/idea/}%%%
}%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{, and Visual Studio}\footnote{%DIFDELCMD < \url{https://www.visualstudio.com/}%%%
} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{all come with refactoring tools for their primary language~\mbox{%DIFAUXCMD
\citep{ides}}\hspace{0pt}%DIFAUXCMD
. These refactoring tools support some general (renaming, method extraction) and object-oriented specific refactorings (pushing/pulling methods up/down the object hierarchy). 
}%DIFDELCMD < 

%DIFDELCMD < \section{Refactoring Tools for functional languages}%DIFDELCMD < \label{funcTools}%%%
%DIFDELCMD < %%%
\DIFdel{A reason that often used to be given to explain why functional languages are in widespread use in industry is the lack of a robust tooling ecosystem~\mbox{%DIFAUXCMD
\citep{wadlerTools}}\hspace{0pt}%DIFAUXCMD
. This is no longer the case as functional language ecosystems have undergone a great deal of development in recent years and, maybe coincidently, use in industry has gone up substantially in the last five years. This section will cover some notable refactoring and code smell tools for functional programming languages.
}%DIFDELCMD < 

%DIFDELCMD < \subsection{HLint}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{HLint is a ``code smell'' tool for Haskell. Poorly designed code often produces ``smells,'' apparently superficial problems that indicate deeper design issues~\mbox{%DIFAUXCMD
\citep{fowler}}\hspace{0pt}%DIFAUXCMD
. The most common of these smells is duplicated code. A code smell tool  suggests changes to a code base such as alternative functions to use, how to simplify code, and redundancies~\mbox{%DIFAUXCMD
\citep{hlint}}\hspace{0pt}%DIFAUXCMD
.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Code smell and refactoring tools are very closely related. Simplistically a code smell tool detects problems in a code base and a refactoring tool fixes them. If a tool can detect a problem why can't the same program fix them? HLint has a }\texttt{\DIFdel{-refactor}} %DIFAUXCMD
\DIFdel{flag that will automatically apply the suggestions. However a single piece of code could have multiple smells, how would HLint choose which one to apply? Also once a transformation was applied other hints may no longer be applicable.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{One of the powerful features of HLint is its customizability. An HLint (called }\texttt{\DIFdel{hlint.yaml}}%DIFAUXCMD
\DIFdel{) configuration file added to the root of a project   will be detected by HLint and it will suggest both the default hints as well as the custom hints from that file. Hints are very simple to write.
}%DIFDELCMD < 

%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < [caption={A simple hint from~\citep{hlint}}, label=lstHint]
%DIFDELCMD < - hint: {lhs: x !! 0, rhs: head x}
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Listing~\ref{lstHint} contains the definition of a hint that detects the list index operator is being used to look up the 0th element of a list and suggests using }\texttt{\DIFdel{head}} %DIFAUXCMD
\DIFdel{instead. The }\texttt{\DIFdel{lhs}} %DIFAUXCMD
\DIFdel{tag is the code HLint will search for. If code matching }\texttt{\DIFdel{lhs}} %DIFAUXCMD
\DIFdel{is found  HLint will suggest the code be replaced with the }\texttt{\DIFdel{rhs}} %DIFAUXCMD
\DIFdel{code. HLint assumes any single character variable is a substitution parameter. Given the hint from listing~\ref{lstHint} and the following code:
}%DIFDELCMD < 

%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < f list = list !! 0
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{HLint produces the following output.
}%DIFDELCMD < 

%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD <  Suggestion: Use head
%DIFDELCMD < Found:
%DIFDELCMD <   list !! 0
%DIFDELCMD < Why not:
%DIFDELCMD <   head list
%DIFDELCMD < 

%DIFDELCMD < 1 hint
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < 

%DIFDELCMD < \subsection{Haskell Tools Refact}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{HaRe isn't the only refactoring tool for Haskell. In late 2016 Haskell Tools Refact was announced and is currently at version 0.7~\mbox{%DIFAUXCMD
\citep{haskellTools}}\hspace{0pt}%DIFAUXCMD
. The Haskell Tools project is a GHC based developer tool kit~\mbox{%DIFAUXCMD
\citep{haskellToolsGit}}\hspace{0pt}%DIFAUXCMD
. There are six refactorings currently supported.
}%DIFDELCMD < 

%DIFDELCMD < \begin{itemize}
\begin{itemize}%DIFAUXCMD
%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Rename
}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Generate type signature
}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Generate exports
}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Extract binding
}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Inline binding
}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Organize imports
}
\end{itemize}%DIFAUXCMD
%DIFDELCMD < \end{itemize}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Haskell-tools has implemented  its own abstract syntax tree. The AST of haskell-tools is generated using information from all of GHC's compiler stages. Each node represents the same language elements it just includes additional information that is spread across the different stages of the GHC~\mbox{%DIFAUXCMD
\citep{haskellTools}}\hspace{0pt}%DIFAUXCMD
.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The haskell-tools refactorer is currently integrated into the Atom editor}\footnote{%DIFDELCMD < \url{https://atom.io/}%%%
} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{with Sublime Text}\footnote{%DIFDELCMD < \url{https://www.sublimetext.com}%%%
} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{support planned for the near future~\mbox{%DIFAUXCMD
\citep{haskellTools}
}\hspace{0pt}%DIFAUXCMD
}%DIFDELCMD < 

%DIFDELCMD < \subsection{Wrangler}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Wrangler is a refactoring and code inspection tool for Erlang~\mbox{%DIFAUXCMD
\citep{wrangler}}\hspace{0pt}%DIFAUXCMD
. Erlang is a functional programming language designed to be massively scalable and highly fault tolerant~\mbox{%DIFAUXCMD
\citep{erlang}}\hspace{0pt}%DIFAUXCMD
. It was originally developed in 1986 by Joe Armstrong, Robert Virding, and Mike Williams at the Computer Science Laboratory at Ericsson Telecom AB~\mbox{%DIFAUXCMD
\citep{erlangHistory}}\hspace{0pt}%DIFAUXCMD
. Erlang's core design tenets include lightweight processes, that communicate through message passing. Erlang also boasts a ``let it fail" error handling architecture, where processes either succeed or fail and other specialised processes handle the error~\mbox{%DIFAUXCMD
\citep{armstrongThesis}}\hspace{0pt}%DIFAUXCMD
.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Wrangler is accessible from the command line and has been integrated into both Emacs and Eclipse and currently supports a large library of refactorings, code smells, as well as other program analysis tools such as clone detection and automatic API migration~\mbox{%DIFAUXCMD
\citep{wrangler}}\hspace{0pt}%DIFAUXCMD
. Additionally Wrangler supports a template based API and a domain specific language which allow users to define their own refactorings and script composite refactorings~\mbox{%DIFAUXCMD
\citep{wranglerDomain}}\hspace{0pt}%DIFAUXCMD
.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The template-based API of Wrangler allows users to define program analyses and transformations using Erlang concrete syntax. Wrangler templates consist of fragments of Erlang syntax that may contain meta-variables or meta-atoms that can stand for any language element. Meta-variables/atoms are variables or atoms that end with the "}\texttt{}%DIFAUXCMD
\DIFdel{" character, this meta-variable/atom will then stand for the next language element. Meta-variables/atoms that end with "}\texttt{}%DIFAUXCMD
\DIFdel{" are list meta-variables/atoms that match a sequence of language elements as long as they are of same sort~\mbox{%DIFAUXCMD
\citep{letsUser}}\hspace{0pt}%DIFAUXCMD
.}\footnote{\DIFdel{Things like the arguments to a function or a sequence of expressions in a function body are the same "sort."}}
%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < [language=erlang]
%DIFDELCMD < ?T("erlang:spawn(Arg@)")
%DIFDELCMD < 

%DIFDELCMD < ?T("erlang:spawn(Arg@@)")
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{Some Wrangler templates}}
%DIFAUXCMD
%DIFDELCMD < \label{templates}
%DIFDELCMD < \end{figure}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The first template in figure~\ref{templates} matches applications of }\texttt{\DIFdel{erlang:spawn}} %DIFAUXCMD
\DIFdel{when it is called with one argument whereas the second template will match the same function with any number of arguments. }%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Composite refactorings are refactorings that are made up of multiple refactorings run, in sequence, one after the other. It can be challenging to develop composite refactorings if they are not explicitly handled by the refactoring tool. The naive solution just chains refactorings together with the output from one refactoring in a composite refactoring becoming the input to the next refactoring. However, what if the second refactoring fails in a chain of four? composite refactoring definitions, without tool support, become filled with error handling code to manage the situation when one of the component refactorings fail. Wrangler defines a domain specific language that helps describe the various facets of a composite refactoring.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The Wrangler DSL supports the creation of a composite refactoring through a variety of features. First, Wrangler extends every primitive refactoring with a }\textit{\DIFdel{refactoring command generator}}%DIFAUXCMD
\DIFdel{. A command generator allows the extended refactoring to accept not just concrete values but also structures that specify how the parameter should be generated; each parameter of a command generator accepts either a concrete value, a condition that checks if a value is satisfactory, or a generator for creating the parameter based on the previous parameters. A refactoring for renaming functions named with the format }\texttt{\DIFdel{camelCase}} %DIFAUXCMD
\DIFdel{to }\texttt{\DIFdel{camel\_case}} %DIFAUXCMD
\DIFdel{would accept three arguments the target filename, the name of the target function, and the desired new name. This command generator's first parameter is a condition that always returns true because any file is a valid target for renaming. The second parameter is another condition that checks if the function name matches is in camel case format (e.g. "}\texttt{\DIFdel{aFunName}}%DIFAUXCMD
\DIFdel{"). The final parameter is generated by taking the second parameter and modifying it so that the name is in the corresponding "snake case" format (e.g. "}\texttt{\DIFdel{a\_fun\_name}}%DIFAUXCMD
\DIFdel{"). 
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The DSL also allows decision making to occur during the execution of a composite refactoring. Composite refactorings are transactional and can be either atomic or non-atomic. Atomic composite refactorings require each component refactoring to be successfully applied before continuing onto the next refactoring. If a single refactoring fails inside of an atomic composite refactoring the entire refactoring fails and the program remains unchanged. When a single refactoring fails inside of a non-atomic composite refactoring, correspondingly, the entire refactoring will not fail and continue by trying the next refactoring in the sequence. The Wrangler DSL allows for refactorings to described as atomic and non-atomic sections at each level.  
}%DIFDELCMD < 

%DIFDELCMD < \subsection{ROTOR}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Another newcomer to the refactoring tools for functional language space is ROTOR}\footnote{\textbf{\DIFdel{R}}%DIFAUXCMD
\DIFdel{eliable }\textbf{\DIFdel{O}}%DIFAUXCMD
\DIFdel{Caml-base }\textbf{\DIFdel{T}}%DIFAUXCMD
\DIFdel{ool for }\textbf{\DIFdel{O}}%DIFAUXCMD
\DIFdel{Caml }\textbf{\DIFdel{R}}%DIFAUXCMD
\DIFdel{efactoring}} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{the first refactoring tool to target OCaml~\mbox{%DIFAUXCMD
\citep{rotor}}\hspace{0pt}%DIFAUXCMD
. Language features of OCaml provide some unique challenges for a refactoring tool. In OCaml one module may be included in another so that, for example, when renaming the function }\texttt{\DIFdel{f}} %DIFAUXCMD
\DIFdel{in module }\texttt{\DIFdel{A}} %DIFAUXCMD
\DIFdel{but }\texttt{\DIFdel{A}} %DIFAUXCMD
\DIFdel{is included in module }\texttt{\DIFdel{B}} %DIFAUXCMD
\DIFdel{the both }\texttt{\DIFdel{A.f}} %DIFAUXCMD
\DIFdel{and }\texttt{\DIFdel{B.f}} %DIFAUXCMD
\DIFdel{will need to be renamed. 
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{An opportunity of the ROTOR project is that it has an partner in industry, Jane Street Capital. ROTOR is using the core library}\footnote{%DIFDELCMD < \url{https://github.com/janestreet/core}%%%
} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{an "industrial strength" version of the OCaml standard library as a test bed for testing the refactoring tool.
 }%DIFDELCMD < 

%DIFDELCMD < \section{Novel applications of Refactoring}%DIFDELCMD < \label{otherTools}%%%
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Though automated refactoring was originally developed for the object-oriented language Smalltalk~\mbox{%DIFAUXCMD
\citep{refactoringBrowser} }\hspace{0pt}%DIFAUXCMD
there has been recent work on building tools that refactor things that are not traditional programming languages. This section with briefly talk about two novel applications of refactoring, spreadsheets and wikis.
}%DIFDELCMD < 

%DIFDELCMD < \subsection{Spreadsheet refactoring}
%DIFDELCMD < %%%
\DIFdel{Spreadsheets are a widely used tool inindustry, with 13 million end users that are estimated to be "programming" with spreadsheets or databases in 2012; this figure dwarfs 3 million professional programmers at that time~\mbox{%DIFAUXCMD
\citep{endUsers}}\hspace{0pt}%DIFAUXCMD
. Spreadsheets can also exhibit "bad smells" much like the code smells described by Fowler. End-users are also able to detect "spreadsheet smells" and agree that these patterns should be avoided~\mbox{%DIFAUXCMD
\citep{spreadsheetSmells}}\hspace{0pt}%DIFAUXCMD
. This indicates that even though it isn't a traditional form of programming, spreadsheets are a valid target for refactoring.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{BumbleBee is a refactoring tool whose grammar is built from the grammer for Excel formulas~\mbox{%DIFAUXCMD
\citep{bumblebee}}\hspace{0pt}%DIFAUXCMD
. Many spreadsheet users have no formal programming training so creating a language that would be familiar to them is important. The grammar of BumbleBee consists of two Excel formulas associated by a "$\leftrightarrow$" operator that indicates the two formula can be transformed into each other.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\begin{displaymath} \DIFdel{A1+A2+A3 \leftrightarrow SUM(A1:A3) }\end{displaymath}
%DIFAUXCMD
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The previous example means that adding together the cells A1, A2, and A3 could be replaced by a call to the $SUM$ function. BumbleBee also has metavariables that stand in for certain constructs from the Excel language. A "$C$" represents a cell, "$R$" a range, and "$P$" a constant. The following example transforms a sum over any range divided by the length of that range into a call to the average function over the same range.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\begin{displaymath} \DIFdel{SUB(R)/COUNT(R) \leftrightarrow AVERAGE(R) }\end{displaymath}
%DIFAUXCMD
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Finally BumbleBee allows for cells to be referenced in the form ${i,j}$ and cells can be connected into groups with the "$...$" operator. This enables the following transformation rule that replaces a chain of sums to be replaced with a single call to $SUM$.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\begin{displaymath} \DIFdel{\{i,j\} + ... + \{m,n\} \leftrightarrow SUM(\{i,j\}:\{m,n\}) }\end{displaymath}
%DIFAUXCMD
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The goal of this syntax is to allow user familiar with Excel but not another programming language to be able to create their own translations. BumbleBee is implemented as an add on for Excel so users can work with it in a familiar environment.
}%DIFDELCMD < 

%DIFDELCMD < \subsection{Wiki refactoring}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Another increasingly popular type of ``end user programming'' is writing Wikis. A wiki is essentially just a user-editable graph of articles and categories. Articles store written information and categories are keywords that characterize articles. As a community adds content to a wiki the articles that make up the wiki require refactoring. This may involve splitting an article into multiple articles, or moving an article from one category to another. Currently wiki engines (e.g. MediaWiki the engine for Wikipedia) are entirely focused on content creation and offer little to no support for refactoring.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Refactoring a wiki is a surprisingly difficult task. Beyond just the content of an article there is also the history of changes to that article and the "talk" page where users can discuss about the content of an article. A refactoring needs to preserve two key properties of the wiki, authorship independence and readership independence. Authorship independence means that a refactoring will preserve the attribution of content to the original author(s). Readership independence implies that a refactoring will not alter the content of the wiki, a refactoring will only change how content is distributed throughout the wiki.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Splitting an article into two is a common refactoring that needs to take place, common enough that Wikipedia documents the process.}\footnote{%DIFDELCMD < \url{http://en.wikipedia.org/wiki/Wikipedia:Splitting}%%%
} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{Article splitting occurs when an article describes multiple things that would be better served by having two dedicated articles. The new article that is created takes all of its content from the source article. The split refactoring maintains authorship independence by adding a note in the talk and recent changes pages of the new article indicating that the new article was originally a section of the source article. Additionally similar changes need to be added to the talk and recent changes pages of the source article as well. These changes allow for authorship to be tracked both through the source and new articles.
}%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < 	\begin{center}
%DIFDELCMD < 		\includegraphics[scale=.4]{images/wikiwhirl_mindmap.png}
%DIFDELCMD < 	\end{center}
%DIFDELCMD < 	%%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{A screen shot of WikiWhirl taken from }%DIFDELCMD < \url{http://www.onekin.org/portal/wikiwhirl}%%%
}
	%DIFAUXCMD
%DIFDELCMD < \label{wikiwhirl}
%DIFDELCMD < \end{figure}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Readership independence is maintained during an article split by adding a new section to the source article that summarises the content that was moved to the newly created article. This allows people reading the source article to still understand what the split of section contained. This process is tedious and poorly supported by the wiki engine which makes it an ideal target for automated refactoring.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{WikiWhirl is a visual DSL for refactoring wikis~\mbox{%DIFAUXCMD
\citep{wikiWhirl}}\hspace{0pt}%DIFAUXCMD
. WikiWhirl is built on top of FreeMind an open source mind mapping tool~\mbox{%DIFAUXCMD
\citep{freeMind}}\hspace{0pt}%DIFAUXCMD
. Mind mapping is a method of visualizing the relationships between pieces of  hierarchical information. WikiWhirl represents a wiki as a mind map and users can then refactor the wiki using this view (see figure~\ref{wikiwhirl}). }%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{WikiWhirl currently supports eight refactorings. They are:
}%DIFDELCMD < 

%DIFDELCMD < \begin{itemize}
\begin{itemize}%DIFAUXCMD
%DIFDELCMD < 	\item %%%
\item%DIFAUXCMD
\DIFdel{Create an article/category
	}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Categorize an article
	}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Uncategorize an article
	}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Rename an article/category
	}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Delete an article/category
	}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Merge two articles/categories
	}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Split an article/category
	}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Move a section from one article to another
}
\end{itemize}%DIFAUXCMD
%DIFDELCMD < \end{itemize}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Both wikis and spreadsheets are common data formats that deal with the same "smell" and quality issues that software projects experience. The difference between software development and these applications is that the users of the latter will not necessarily be technology professionals. Refactoring tools for wikis and spreadsheets have to be usable by laypeople not just software engineers. 
}%DIFDELCMD < 

%DIFDELCMD < \section{Refactoring to introduce parallelism}%DIFDELCMD < \label{refacParallel}%%%
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The reasons to refactor source code have also expanded beyond code quality. This section will describe two different projects that have developed refactorings to change the execution of a program from single to multi-threaded. Functional programming languages are well suited to parallel execution due to immutability by default and in some languages (such as Erlang) first class concurrency features. This section will first describe the ``ParaForming'' which uses refactoring to introduce parallel abstractions into Haskell code, then it will describe work done to refactor Erlang code to introduce algorithmic skeletons.
}%DIFDELCMD < 

%DIFDELCMD < \subsection{ParaForming}  
%DIFDELCMD < %%%
\DIFdel{ParaForming is an approach to construct parallel programs from an existing program using software refactoring~\mbox{%DIFAUXCMD
\citep{paraforming}}\hspace{0pt}%DIFAUXCMD
. The ParaForming work targets Glasgow parallel Haskell (GpH), an extension to Haskell, and is implemented in HaRe. Parallelism is added to programs in GpH using strategies (see figure~\ref{strategy}). 
}%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < type Strategy a = a -> Eval a
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{The strategy type}}
%DIFAUXCMD
%DIFDELCMD < \label{strategy}
%DIFDELCMD < \end{figure}   
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{A strategy takes its argument and determines how it will be evaluated inside of the }\texttt{\DIFdel{Eval}} %DIFAUXCMD
\DIFdel{monad. The }\texttt{\DIFdel{rpar}} %DIFAUXCMD
\DIFdel{strategy introduces parallelism by "sparking" its argument. A sparks are tasks that are collected into a pool which is managed by the runtime. The spark pool is a source of work that GHC can pull from when there are idle processors. Sparks may be evaluated in parallel or not at all depending on the availability of spare cores.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The simplest parallel refactoring is to introduce data parallelism. This refactoring is applied to an expression works over a list and evaluates each member of that list in a spark. A sequential function that sums the Euler totient function is in figure~\ref{eulerSeq} and the refactored program is in figure~\ref{eulerPar1}
}%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < sumEulerSeq :: Int -> Int
%DIFDELCMD < sumEulerSeq n = sum (map euler (mkList n))
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{A sequential calculation that sums the Euler totient function}}
%DIFAUXCMD
%DIFDELCMD < \label{eulerSeq}
%DIFDELCMD < \end{figure} 
%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < sumEulerPar1 :: Int -> Int
%DIFDELCMD < sumEulerPar1 n = sum (map euler (mkList n) `using` parList rdeepseq)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{A refactored version of the function from figure~\ref{eulerSeq}}}
%DIFAUXCMD
%DIFDELCMD < \label{eulerPar1}
%DIFDELCMD < \end{figure} 
%DIFDELCMD <   

%DIFDELCMD < %%%
\DIFdel{This refactoring evaluates the calculation of }\texttt{\DIFdel{map euler (mkList n)}} %DIFAUXCMD
\DIFdel{using}\footnote{\DIFdel{The (}\texttt{\DIFdel{using :: a -> Strategy a -> a}}%DIFAUXCMD
\DIFdel{) function just evaluates some expression with the given strategy.}} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdelend \DIFaddbegin \DIFadd{Unlike the other refactorings developed for this thesis this transformation has a long history in }\DIFaddend the \DIFdelbegin \texttt{\DIFdel{parList rdeepseq}} %DIFAUXCMD
\DIFdel{strategy. The }\texttt{\DIFdel{parlist}}%DIFAUXCMD
\footnote{\texttt{\DIFdel{parlist :: Strategy a -> Strategy }%DIFDELCMD < [%%%
\DIFdel{a}%DIFDELCMD < ]%%%
}%DIFAUXCMD
} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{function evaluates each element of a list in parallel according to a given strategy and }\texttt{\DIFdel{rdeepseq}} %DIFAUXCMD
\DIFdel{is the strategy the fully evaluates its argument. 
  }%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The refactored program in figure~\ref{eulerPar1} is highly parallel but not very efficient because the parallelism is too fine grained. Another refactoring can help in this case instead of sparking every element of a list another strategy can be introduced, one that separates the list into "chunks" and each of the chunks of the list is executed in parallel. This refactoring adds an additional argument to the function that determines how many chunks the list will be split into, as seen in figure~\ref{eulerChunk}.
  }%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < sumEulerChunk :: Int -> Int -> Int
%DIFDELCMD < sumEulerChunk c n = sum (map euler (mkList n) `using` parListChunk c rdeepseq)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{A "chunked" version of the function from figure~\ref{eulerSeq}}}
%DIFAUXCMD
%DIFDELCMD < \label{eulerChunk}
%DIFDELCMD < \end{figure}  
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{These two refactorings are both a way of introducing data parallelism with varying degrees of granularity. The other form of parallelism is known as task parallelism. Where data parallelism is focused on computing different parts of a data structure in parallel (the elements of a list in the previous case), task parallelism instead focuses on having different ``tasks'' excecuted in parallel. The work done in~\mbox{%DIFAUXCMD
\citep{paraforming} }\hspace{0pt}%DIFAUXCMD
outlines a refactoring that can make recursive calls happen in parallel.
}%DIFDELCMD < 

%DIFDELCMD < \subsection{Cost-Directed Parallel Refactoring}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The previous section touched on one of the big challenges of parallel programming, determining the correct level of parallelism to achieve maximum performance. \mbox{%DIFAUXCMD
\citep{parallelErl} }\hspace{0pt}%DIFAUXCMD
describes a methodology to introduce algorithmic skeletons into Erlang programs using the Erlang refactoring tool Wrangler. In addition to introducing a skeleton this work provides cost models that estimate the performance of the program after adding each skeleton. This estimate helps a programmer make an informed decision about what parallelisation strategy is the best for a particular program. 
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{An algorithmic skeleton is a common parallel pattern. A skeleton is implemented as a higher order function that takes in a sequential function and any parameters that the skeleton requires. \mbox{%DIFAUXCMD
\citep{parallelErl} }\hspace{0pt}%DIFAUXCMD
discusses the four most common and useful skeletons. For example, the map skeleton works by breaking up the target data into pieces that can be operated on in parallel. Finally the results from the the parallel computations are combined back into a single image. One of the examples presented in~\mbox{%DIFAUXCMD
\citep{parallelErl} }\hspace{0pt}%DIFAUXCMD
is that of an image processing system that denoises images. Denoising a section of an image can be done independently from processing the other sections of the same image. The introducing the map skeleton would break the image into pieces to be denoised in parallel then the outputted sections can be stitched back together again
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Skeletons are simple to understand in theory but it can be difficult to know which to apply in practice. This is when the cost models of each skeleton become useful to help make an informed decision about which skeleton should cause the greatest speed up. In~\mbox{%DIFAUXCMD
\citep{parallelErl} }\hspace{0pt}%DIFAUXCMD
an initial benchmark of the program can 
be used to estimate the speed up that different skeletons could provide.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Parallelisation can be tedious and difficult to do which makes it a good candidate for tool assistance. A refactoring tool can guide a programmer through the process of parallelisation. Much like how the data-driven refactorings have multiple small changes are required before the entire process can be considered ``finished'' changing a program to run in parallel is also a sequence of several smaller changes.
}%DIFDELCMD < 

%DIFDELCMD < \section{ApplicativeDo}%DIFDELCMD < \label{applicativeDo}%%%
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Applicative functors, as discussed in Chapter~\ref{applicative}, are a fairly recent addition to GHC and their being made a superclass of }\texttt{\DIFdel{Monad}} %DIFAUXCMD
\DIFdel{has made many more instances of the typeclass than there were before. However, compiler changes can't force a community to change its practices and }\texttt{\DIFdel{Applicative}} %DIFAUXCMD
\DIFdel{remains underutilised compared to }\texttt{\DIFdel{Monad}}%DIFAUXCMD
\DIFdel{.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \DIFadd{Haskell programming community. }\DIFaddend The \DIFdelbegin \DIFdel{under-utilisation of applicatives in Haskell has not gone un-noticed; \mbox{%DIFAUXCMD
\citep{applicativeDo}}\hspace{0pt}%DIFAUXCMD
, reimplement the way Haskell desugars do notation so that applicatives will be supported by do notation as well. 
}%DIFDELCMD < 

%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < f = do
%DIFDELCMD < 	x1 <- A
%DIFDELCMD < 	x2 <- B x1
%DIFDELCMD < 	x3 <- C
%DIFDELCMD < 	return (x2,x3)
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The function }\texttt{\DIFdel{f}} %DIFAUXCMD
\DIFdel{would normally desugar into the following:
}%DIFDELCMD < 

%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < f = A >>=
%DIFDELCMD < 	(\x1 -> B x1 >>=
%DIFDELCMD < 		(\x2 -> C >>=
%DIFDELCMD < 			(\x3 -> return (x2,x3))))
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{With applicative do activated }\texttt{\DIFdel{f}} %DIFAUXCMD
\DIFdel{would now desugar to:
}%DIFDELCMD < 

%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < f = (\x2 x3 -> (x2, x3))
%DIFDELCMD <         <$> (A >>= (\x1 -> B x1))
%DIFDELCMD <         <*> C                
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The }\textit{\DIFdel{ApplicativeDo}} %DIFAUXCMD
\DIFdel{algorithm will attempt to insert as many applies into the expression as possible. When the implementation of }\DIFdelend \DIFaddbegin \DIFadd{design space for this refactoring had already been extensively covered in~(\mbox{%DIFAUXCMD
\cite{lammelReuse}}\hspace{0pt}%DIFAUXCMD
;~\mbox{%DIFAUXCMD
\cite{monadification}}\hspace{0pt}%DIFAUXCMD
;~\mbox{%DIFAUXCMD
\cite{clausMonadResponse}}\hspace{0pt}%DIFAUXCMD
). Rather than producing an additional style of monadification the contribution of this chapter is the implementation of a practical monadification refactoring based on }\DIFaddend the \DIFdelbegin \texttt{\DIFdel{Applicative}} %DIFAUXCMD
\DIFdel{instance evaluates the two arguments of apply in parallel, better performance can be achieved by adding more applies. 
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{There could be multiple ways to desugar a particular function. The }\textit{\DIFdel{applicativeDo}} %DIFAUXCMD
\DIFdel{algorithm first assumes that every expression have an identical time cost, from this assumption the algorithm heuristically determines the desugaring with the shortest execution time.
}%DIFDELCMD < 

%DIFDELCMD < \section{Program transformations}%DIFDELCMD < \label{typeTrans}%%%
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Refactoring is a type of program transformation but it does not constitute the whole field. A major difference between refactoring and other types of program transformations is scope that a refactoring must be aware of. Program transformations typically focus on just the algorithm whereas refactorings must take into account the broader effects a transformation has on a codebase and the context that programs exist in. Additionally the target program of a refactoring needs to be readable, maintainable, and keep proper layout and user comments. Other types of program transformation don't typically have these concerns. This section will describe some of the program transformation work most relevant to this thesis. First it will describe the type and transform system developed by~\mbox{%DIFAUXCMD
\citep{typeAndTransformSemantics}}\hspace{0pt}%DIFAUXCMD
. Next there will be a discussion of the previous methods of monadificationfound in the literature. 
}%DIFDELCMD < 

%DIFDELCMD < \subsection{Type and transform systems}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The type-and-transform system described in~\mbox{%DIFAUXCMD
\citep{typeAndTransformSemantics} }\hspace{0pt}%DIFAUXCMD
is a system for a semantics preserving and type changing program transformations over the typed lambda calculus with let polymorphism. The type-and-transform system is limited to isomorphic types, there must be a way to convert between the two types and back again as described in figure~\ref{transformIso}.
}%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < rep :: A -> R
%DIFDELCMD < abs :: R -> A
%DIFDELCMD < 

%DIFDELCMD < rep . abs = id
%DIFDELCMD < abs . rep = id
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{The properties that must hold for the type-and-transform system to work over types }\texttt{\DIFdelFL{A}} %DIFAUXCMD
\DIFdelFL{and }\texttt{\DIFdelFL{R}}%DIFAUXCMD
}
%DIFAUXCMD
%DIFDELCMD < \label{transformIso}
%DIFDELCMD < \end{figure} 
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{The type-and-transform system supports type-changing rewrites through typed rewrite rules that insert conversions between the source and target types as appropriate. To handle the fact that there are multiple ways to retype a program each rewrite rule is weighted to maximize the use of the target type, introduce the target type as soon as possible in the program, and delay the conversion back to the source type as late as possible.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{This work emphasises formalisation and its correctness and the work is done in the context of the lambda calculus rather than a full programming language. There is a Haskell implementation of their system but it is only a prototype though they state that they want to expand this work to work with Haskell however this has not been published yet.
}%DIFDELCMD < 

%DIFDELCMD < \subsection{Automatic Monadification}%DIFDELCMD < \label{erwigMonad}%%%
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Monadification is not a new problem and various solutions have been presented in the literature. In~\mbox{%DIFAUXCMD
\citep{lammelReuse} }\hspace{0pt}%DIFAUXCMD
monadification is performed in two steps. First the program is transformed into A-normal form}\footnote{\DIFdel{This is also known as sequencing}}%DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{, which flattens applications into let expressions. The first line of figure~\ref{anormal} shows a normal expression and line 3 of the same figure shows that expression in A-normal form.
}%DIFDELCMD < 

%DIFDELCMD < \begin{figure}[t]
%DIFDELCMD < \begin{lstlisting}%DIFDELCMD < 
%DIFDELCMD < f (g x) (h y)
%DIFDELCMD < 

%DIFDELCMD < let x1 = g x in
%DIFDELCMD < 	let x2 = h y in
%DIFDELCMD < 		f x1 x2
%DIFDELCMD < \end{lstlisting}
%DIFDELCMD < %%%
%DIFDELCMD < \caption{%
{%DIFAUXCMD
\DIFdelFL{A-normal form converstion}}
%DIFAUXCMD
%DIFDELCMD < \label{anormal}
%DIFDELCMD < \end{figure}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdel{Once the program has been converted into A-normal a let expression of the form $ let x = t1 in t2 $ is transformed into $ t1 >>= \lambda x. t2 $. If the right hand side of the lambda isn't already a monadic type then }\texttt{\DIFdel{return}} %DIFAUXCMD
\DIFdel{will be introduced, e.g. $ t1 >>= \lambda x. return~~t2 $. The full transformation is given by inference rules in~\mbox{%DIFAUXCMD
\citep{lammelReuse}}\hspace{0pt}%DIFAUXCMD
}\DIFdelend \DIFaddbegin \DIFadd{styles of monadification, from~\mbox{%DIFAUXCMD
\cite{clausMonadResponse}}\hspace{0pt}%DIFAUXCMD
, and algorithms for introducing them provided by (\mbox{%DIFAUXCMD
\cite{lammelReuse}}\hspace{0pt}%DIFAUXCMD
;~\mbox{%DIFAUXCMD
\cite{monadification}}\hspace{0pt}%DIFAUXCMD
)}\DIFaddend .

\DIFdelbegin \DIFdel{Monadification is developed further by~\mbox{%DIFAUXCMD
\citep{monadification}}\hspace{0pt}%DIFAUXCMD
. This work provides an algorithm for restricted call-by-value monadification as opposed to the semantics style inference rules defined in~\mbox{%DIFAUXCMD
\citep{lammelReuse}}\hspace{0pt}%DIFAUXCMD
. This work targets the lambda calculus extended with case and let expressions. The algorithm from~\mbox{%DIFAUXCMD
\citep{monadification} }\hspace{0pt}%DIFAUXCMD
is very similar to the one implemented in HaRe. It has the same precondition where every call to a monadified function must be fully saturated and it produces the same style of monadification. A prototype implementation of this method was produced.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend \chapter{Conclusion}
\label{chp:conc}

\DIFdelbegin %DIFDELCMD < \section{Summary of Contributions}
%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend This thesis has explored a new category of refactoring for functional programming languages. Data-driven refactorings are transformations that are \textit{driven} by the data types a program uses. The generalisation refactorings, for example, are motivated by a desire to allow \DIFdelbegin \DIFdel{programs to be used }\DIFdelend \DIFaddbegin \DIFadd{code to be re-used }\DIFaddend in more places (e.g. the ``Maybe to MonadPlus'' refactoring) or even to generalise the way a program is evaluated as in the ``monad to applicative'' refactoring where the \texttt{Applicative} \DIFdelbegin \DIFdel{typeclass doesn't }\DIFdelend \DIFaddbegin \DIFadd{type class does not }\DIFaddend force sequential evaluation of the arguments to apply (\texttt{<*>}) like bind (\texttt{>>=}), the monad operation, does.

\DIFaddbegin \DIFadd{These refactorings are all designed to help programmers redesign the data their programs use and manipulate. It has been shown in the literature that as programmers make structural decisions the implementation of their programs technical debt can build up, and this debt can only be paid through rewriting and/or refactoring~(\mbox{%DIFAUXCMD
\cite{techDebt} }\hspace{0pt}%DIFAUXCMD
and~\mbox{%DIFAUXCMD
\cite{fowler}}\hspace{0pt}%DIFAUXCMD
). The core idea behind this thesis is that the data representation decisions that are also made during program development also accrue debt, and that refactoring is a valid and simple way to pay off this type of technical debt.  
}

\section{Summary of Contributions}

\DIFaddend The primary artifacts of this \DIFdelbegin \DIFdel{work }\DIFdelend \DIFaddbegin \DIFadd{thesis }\DIFaddend are my contributions to HaRe and its API. In particular the following contributions have been made:

\begin{itemize}
\item The design and implementation of a set of data-driven refactorings in HaRe. These refactorings are:
	\begin{itemize}
		\item Introduce a Type Synonym, \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{introSyn}
		\item Generalise Maybe to MonadPlus/Monad, \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{maybeMonadPlus} and \DIFdelbegin \DIFdel{chapter~\ref{generalImp}
		}\DIFdelend \DIFaddbegin \DIFadd{Chapter~\ref{chp:generalImp}
		}\DIFaddend \item List to Hughes List, \DIFdelbegin \DIFdel{section}\DIFdelend \DIFaddbegin \DIFadd{Section}\DIFaddend ~\ref{listToDlist} and chapter~\DIFdelbegin \DIFdel{\ref{generalImp}
		}\DIFdelend \DIFaddbegin \DIFadd{\ref{chp:generalImp}
		}\DIFaddend \item Generalise Monad to Applicative, \DIFdelbegin \DIFdel{chapter~\ref{chp:applicative}		
	}%DIFDELCMD < \item %%%
\item%DIFAUXCMD
\DIFdel{Monadification	, chapter~\ref{chp:monadification}	
}\DIFdelend \DIFaddbegin \DIFadd{Chapter~\ref{chp:applicative}		
	}\DIFaddend \end{itemize}
\item \DIFaddbegin \DIFadd{The implementation of Monadification, Chapter~\ref{chp:monadification}	
}\item \DIFaddend An API for the creation of embeddable types refactorings, chapter~\DIFdelbegin \DIFdel{\ref{generalImp}
}\DIFdelend \DIFaddbegin \DIFadd{\ref{chp:generalImp}
}\DIFaddend \item Enhancements and additions to HaRe's API, see \DIFdelbegin \DIFdel{chapter~\ref{generalImp}}\DIFdelend \DIFaddbegin \DIFadd{Chapter~\ref{chp:generalImp}}\DIFaddend . The API was extended while developing the refactorings mentioned above.
\begin{itemize}
	\item Functions that perform high level, common transformations, that many refactorings must do (e.g. wrapping a syntax element in \DIFdelbegin \DIFdel{parenthesis}\DIFdelend \DIFaddbegin \DIFadd{parentheses}\DIFaddend )
	\item Functions for retrieving particular syntax elements from the \DIFaddbegin \DIFadd{abstract syntax }\DIFaddend tree of an entire module such as getting the body of a function based on a position. 
	\item Functions that aid in working with ghc-exactprint and the \DIFdelbegin \DIFdel{annotations }\DIFdelend \DIFaddbegin \DIFadd{parsed GHC abstract syntax tree's annotations. These functions provide functionality }\DIFaddend such as changing the location of a syntax element relative to the prior syntax element or adding new annotations to a syntax element.
\end{itemize}	  
\end{itemize}
\DIFaddbegin 

\section{The GHC Tooling Ecosystem}

\DIFadd{The focus of the refactorings described in this theis are the changes they make to the data that the target programs use; any structural changes that occur during the transformation are incidental. Despite not being the focus of this thesis, a great deal of effort had to go into understanding the state of the GHC tooling ecosystem and how to rewrite GHC Haskell to produce it. This section will summarize my experience with the current state of the program transformation ecosystem of GHC.
}

\DIFadd{I began working on HaRe when the latest version of GHC was 7.6 (At the time of writing it is 8.4.3). At this point all comments, whitespace information, and the positioning of many special characters were completely eliminated from the parsed abstract syntax. To preserve the layout of source files HaRe had to generate a separate tree structure that contained all the tokens that were not represented by the AST and some whitespace information. When it came time to print the modified source the AST and this token tree needed to be combined for the output to be formatted correctly. 
}

\DIFadd{A major addition to the utility of the GHC API came with the release of GHC 7.10. With this release the GHC parser would now return the location of all comments and tokens that had been previously discarded, in a separate structure as part of the the parsed syntax tree. This implementation allowed compiler stages that depended on the parsed AST to remain unchanged because the new structure could be ignored. This addition to the GHC also coincided with the release of ghc-exactprint,}\footnote{\DIFadd{This is not coincidental. Alan Zimmerman is the programmer behind both the modification to the GHC and the ghc-exactprint library.}} \DIFadd{which was described in Section~\ref{ghcExactprint}. This library made transforming Haskell code much easier because it allowed the formatting to be done with relative positioning instead of absolute locations. Ghc-exactprint also greatly simplified the development of the common transformation library that was added to HaRe as a part of this thesis work. 
}

\DIFadd{In the short time that I have been a part of the Haskell tooling community it has grown and changed a great deal. The GHC has been changed specifically to better support tool builders, notable examples of this are~\mbox{%DIFAUXCMD
\cite{apiAnns} }\hspace{0pt}%DIFAUXCMD
and~\mbox{%DIFAUXCMD
\cite{treesThatGrow}}\hspace{0pt}%DIFAUXCMD
. There is also an initiative to create a pluginable editor interface for Haskell, haskell-ide-engine}\footnote{\url{https://github.com/haskell/haskell-ide-engine}} \DIFadd{so that a Haskell environment can be easily incorporated into many different editors and IDEs and tool developers only have to target the ide engine plugin system. The future of Haskell tooling is looking very bright! And in particular this makes adding HaRe to other IDEs that much easier. 
}

\subsection{Challenges of Working with the GHC API}

\DIFadd{Working with the GHC API is not without its challenges however. The structure of the abstract syntax is very complex and spread across several modules. The core types of the tree are represented by many constructors, the expression type for example, }\texttt{\DIFadd{HsExpr}}\DIFadd{, contains 51 different constructors as of GHC 8. This amount of complexity makes a generic programming library a mandatory prerequisite for working with the abstract syntax tree. 
}

\DIFadd{Fortunately for tool builders the number of types and constructors that represent the vast majority of ``standard'' Haskell code is much more limited. However, determining which constructors you need to target for any given transformation can be very tedious: in the author's experience this process involves looking at numerous printouts of abstract syntax trees. 
}

\DIFadd{This situation could be improved by centralising information for GHC tool builders. Currently the Haskell wiki hosts a single page ``GHC/As a library'' that gives a few examples of how to run the various stages over a single file~\mbox{%DIFAUXCMD
\citep{ghcAsALibrary}}\hspace{0pt}%DIFAUXCMD
. This is a fine general introduction to working with the GHC API but it does not contain many details about working with the abstract syntax or projects. The GHC developer wiki}\footnote{\url{https://ghc.haskell.org/trac/ghc/}} \DIFadd{does contain a great deal of detail about the inner workings of but it is not well organised or easy to find, and a lot of the material there is not directly relevent to tool builders. A centeralised source of Haskell tool building information would help guide people new to the field greatly.
}

\DIFaddend \section{Future Work}
This work and HaRe can be extended in multiple ways. Development on HaRe is ongoing and will continue for the foreseeable future. 

\textit{\textbf{Reimplement Refactorings}} - When HaRe was updated to work with GHC the original implementations of the refactorings were no longer valid using the new back-end. An important task in the near future will be to go back an reimplement the refactorings that are currently not supported in HaRe.  

\DIFdelbegin \textit{\textbf{\DIFdel{haskell-ide-engine}}%DIFAUXCMD
} %DIFAUXCMD
\DIFdel{- Alan Zimmerman a major contributor to HaRe is currently working on a new project called the }\textit{\DIFdel{haskell-ide-engine}}%DIFAUXCMD
\footnote{%DIFDELCMD < \url{https://github.com/haskell/haskell-ide-engine}%%%
} %DIFAUXCMD
\addtocounter{footnote}{-1}%DIFAUXCMD
\DIFdel{which will allow Haskell tools to be integrated into IDEs via the Language Server Protocol a standard method of communication between tools and development environments~\mbox{%DIFAUXCMD
\citep{lsp}}\hspace{0pt}%DIFAUXCMD
. This project will allow for HaRe to be easily integrated into multiple environments including, Eclipse, Visual Studio Code, Sublime Text, Atom, and Emacs.
}%DIFDELCMD < 

%DIFDELCMD < %%%
\DIFdelend \textit{\textbf{Interactive refactorings}} - Many of the more complex refactorings would benefit from becoming more interactive. For example if after the ``introduce type synonym'' refactoring was finished if HaRe you highlight different instances of the type that the synonym was introduced for and ask the user if that instance of the type should be renamed using the new synonym or not. Another possible interactive feature of a refactoring would be to offer to refactor some code if it failed the preconditions for another refactoring. For example, if when trying to refactor a \texttt{do} block to use applicative operations the block fails the precondition requiring that all left hand side variables not be used in a right hand side expression, HaRe could suggest extracting the monadic code into its own function and then performing the original refactoring on the modified function.

\textit{\textbf{Template language for refactoring}} - A powerful feature of Wrangler is its template language~\citep{letsUser}. This allows for Wrangler refactorings to be defined using concrete Erlang syntax rather than the abstract syntax, lowering the difficulty in implementing refactorings significantly for programmers not familiar with the Erlang backend. A similar feature for HaRe would allow refactorings to be written without having to understand the syntax tree of GHC. Template Haskell is a template language for Haskell and is built into the GHC. \DIFaddbegin \DIFadd{A significant amount of time while developing this thesis was spent exploring if Template Haskell and it's related feature, Quasiquotation ~\mbox{%DIFAUXCMD
\citep{quasi}}\hspace{0pt}%DIFAUXCMD
, could be used to develop a template language for refactoring Haskell. }\DIFaddend Unfortunately Template Haskell uses it's own abstract syntax for Haskell code which makes it difficult to use for refactoring GHC Haskell. However, there is a plan to change this though it is not finished yet~\citep{ghcTreesGrow}\DIFaddbegin \DIFadd{.
}

\textit{\textbf{\DIFadd{Data-Driven refactorings for other languages}}} \DIFadd{- The data-driven refactorings presented in this thesis lean heavily on the types that Haskell provides and how they are used by the Haskell community. Data-driven refactorings for other programming languages would be very different from the refactorings presented in this thesis. The refactorings related to Applicative Functors and Monads are much less useful for other langauges as they are a fairly unique feature of Haskell.}\footnote{\DIFadd{Haskell's }\textit{\DIFadd{purity}} \DIFadd{requires some way to handle effects and monads are the primary way this is handled in Haskell. However, nothing prevents monads from being adopted in any language that supports parameterised types they are just less useful in an effectful language.}} \DIFadd{Data-driven refactorings for other statically typed functional languages such as OCaml or Scala would be an interesting topic to explore further because of how different these langauges are from Haskell.
}

\DIFadd{Every programming language requires developers to make decisions about the representation of data. These representations will accrue technical debt just like any other part of a program. Programs must be rewritten to pay back this debt, therefore the data representations of a program will need to be rewritten throughout a programs development. This thesis has presented several refactorings that help Haskell programmers change how the data is represented within their programs}\DIFaddend . 


\bibliography{main}

% This index section is optional, use cleardoublepage and phantomsection to make the links work in your contents page. Uses makeidx package.
\cleardoublepage
\phantomsection
\label{index}
\printindex

\end{document}