Skip to content

Commit 5ac54d2

Browse files
Add Repartition vs Coalesce
1 parent 1a80d6a commit 5ac54d2

File tree

1 file changed

+35
-35
lines changed

1 file changed

+35
-35
lines changed

chapters/spark-operations.tex

+35-35
Original file line numberDiff line numberDiff line change
@@ -177,41 +177,41 @@ \subsection{Narrow and Wide Transformations}\label{subsec:narrow-and-wide-transf
177177
\end{itemize}
178178
\end{frame}
179179

180-
%\subsection{Repartition vs. Coalesce}\label{subsec:repartition-vs-coalesce}
181-
%\begin{frame}
182-
% \frametitle{Repartition vs. Coalesce}
183-
% \begin{itemize}
184-
% \item In Apache Spark, repartition and coalesce are two methods used to change the number of partitions in an RDD (Resilient
185-
% Distributed Dataset).
186-
% \end{itemize}
187-
%\end{frame}
188-
%
189-
%\begin{frame}
190-
% \frametitle{Repartition vs. Coalesce}
191-
%
192-
% \begin{table}[h!]
193-
% \centering
194-
% \resizebox{\textwidth}{!}{%
195-
% \begin{tabular}{|p{2cm} |p{6cm} |p{6cm} |}
196-
% \hline
197-
% \rowcolor{Gray}
198-
% \hline
199-
% \textbf{Aspect} & \textbf{Repartition} & \textbf{Coalesce} \\
200-
% \hline
201-
% \textbf{Purpose} & \textcolor{blue}{Increases or decreases} the number of partitions. & \textcolor{blue}{Decreases} the number of partitions. \\
202-
% \hline
203-
% \textbf{Mechanism} & \textcolor{blue}{Shuffles all} the data across the network to create a new set of partitions. & \textcolor{blue}{Merges existing} partitions \textcolor{blue}{without} a full data shuffle. \\
204-
% \hline
205-
% \textbf{Use Case} & Ideal for increasing the number of partitions or significantly \textcolor{blue}{changing the distribution} of data. & Efficient for \textcolor{blue}{reducing the number} of partitions when the target number is less than the current number. \\
206-
% \hline
207-
% \textbf{Cost} & Expensive due to the \textcolor{blue}{full data shuffle}. & Less expensive than \texttt{repartition} as it \textcolor{blue}{minimizes data movement}. \\
208-
% \hline
209-
% \end{tabular}
210-
% }
211-
% \caption{Comparison of Repartition and Coalesce in Apache Spark}\label{tab:rerepartition-coalesce}
212-
% \end{table}
213-
%\end{frame}
214-
%
180+
\subsection{Repartition vs. Coalesce}\label{subsec:repartition-vs-coalesce}
181+
\begin{frame}
182+
\frametitle{Repartition vs. Coalesce}
183+
\begin{itemize}
184+
\item In Apache Spark, repartition and coalesce are two methods used to change the number of partitions in an RDD (Resilient
185+
Distributed Dataset).
186+
\end{itemize}
187+
\end{frame}
188+
189+
\begin{frame}
190+
\frametitle{Repartition vs. Coalesce}
191+
192+
\begin{table}[h!]
193+
\centering
194+
\resizebox{\textwidth}{!}{%
195+
\begin{tabular}{|p{2cm} |p{6cm} |p{6cm} |}
196+
\hline
197+
\rowcolor{Gray}
198+
\hline
199+
\textbf{Aspect} & \textbf{Repartition} & \textbf{Coalesce} \\
200+
\hline
201+
\textbf{Purpose} & \textcolor{blue}{Increases or decreases} the number of partitions. & \textcolor{blue}{Decreases} the number of partitions. \\
202+
\hline
203+
\textbf{Mechanism} & \textcolor{blue}{Shuffles all} the data across the network to create a new set of partitions. & \textcolor{blue}{Merges existing} partitions \textcolor{blue}{without} a full data shuffle. \\
204+
\hline
205+
\textbf{Use Case} & Ideal for increasing the number of partitions or significantly \textcolor{blue}{changing the distribution} of data. & Efficient for \textcolor{blue}{reducing the number} of partitions when the target number is less than the current number. \\
206+
\hline
207+
\textbf{Cost} & Expensive due to the \textcolor{blue}{full data shuffle}. & Less expensive than \texttt{repartition} as it \textcolor{blue}{minimizes data movement}. \\
208+
\hline
209+
\end{tabular}
210+
}
211+
\caption{Comparison of Repartition and Coalesce in Apache Spark}\label{tab:rerepartition-coalesce}
212+
\end{table}
213+
\end{frame}
214+
215215
%\begin{frame}[fragile]
216216
% \frametitle{High-Level Code: Repartition}
217217
% \begin{lstlisting}[language=scala,label={lst:rep-partition},caption={Repartition Code}]

0 commit comments

Comments
 (0)