multiphysicsL8.tex

%
% Copyright © 2014  Ahmed Dorrah, Peeter Joot.  All Rights Reserved.
% Licenced as described in the file LICENSE under the root directory of this GIT repository.
%
%\input{../blogpost.tex}
%\renewcommand{\basename}{multiphysicsL8}
%\renewcommand{\dirname}{notes/ece1254/}
%\newcommand{\keywords}{ECE1254H}
%\input{../peeter_prologue_print2.tex}
%
%\beginArtNoToc
%\generatetitle{ECE1254H Modeling of Multiphysics Systems.  Lecture 8: Conjugate gradient method.  Taught by Prof.\ Piero Triverio}
%\chapter{Conjugate gradient method}
\label{chap:multiphysicsL8}
%
%\section{Disclaimer}
%
%Peeter's lecture notes transcribed from notes
%
\section{Recap: Summary of Gradient method.}
\index{gradient method}
%
The gradient method allowed for a low cost iterative solution of a linear system
%
\begin{equation}\label{eqn:multiphysicsL8:20}
M \Bx = \Bb,
\end{equation}
%
without a requirement for complete factorization.
The residual was defined as the difference between the application of any trial solution \( \By \) to \( M \) and \( \Bb \)
%
\begin{equation}\label{eqn:multiphysicsL8:40}
\Br = \Bb - M \By.
\end{equation}
%
An energy function was introduced
%
\begin{equation}\label{eqn:multiphysicsL8:60}
\Psi(\By) = \inv{2} \By^\T M \By - \By^\T \Bb,
\end{equation}
%
which has an extremum at the point of solution.  The goal was to attempt to
following the direction of steepest decent \( \Br^{(k)} = - \spacegrad \Psi \) with the hope of finding the minimum of the energy function.  That iteration is described by
%
\begin{equation}\label{eqn:multiphysicsL8:80}
\Bx^{(k + 1 )} =
\Bx^{(k  )} + \alpha_k \Br^{(k)},
\end{equation}
%
and illustrated in \cref{fig:lecture8:lecture8Fig1}.
%
\imageFigure{../figures/ece1254-multiphysics/lecture8Fig1}{Gradient descent.}{fig:lecture8:lecture8Fig1}{0.3}
%
The problem of the \textAndIndex{gradient method} is that it introduces multiple paths as sketched in \cref{fig:lecture8:lecture8Fig2}.
%
\imageFigure{../figures/ece1254-multiphysics/lecture8Fig2}{Gradient descent iteration.}{fig:lecture8:lecture8Fig2}{0.15}
%
which lengthens the total distance that has to be traversed in the iteration.
%
\section{Conjugate gradient method.}
\index{conjugate gradient}
%
The Conjugate gradient method makes the residual at step \( k \) orthogonal to all previous search directions
%
\begin{enumerate}
\item \( \Br^{(1)} \perp \Bd^{(0)} \)
\item \( \Br^{(1)} \perp \Bd^{(0)}, \Bd^{(1)} \)
\item \( \cdots \)
\end{enumerate}
%
After \( n \) iterations, the residual will be zero.
%
\paragraph{First iteration}
%
Given an initial guess \( \Bx^{(0)} \), proceed as in the gradient method
%
\begin{equation}\label{eqn:multiphysicsL8:100}
\Bd^{(0)} = - \spacegrad \Psi(\Bx^{(0)}) = \Br^{(0)},
\end{equation}
%
\begin{equation}\label{eqn:multiphysicsL8:120}
\Bx^{(1)} = \Bx^{(0)} + \alpha_0 \Br^{(0)},
\end{equation}
%
with
%
\begin{equation}\label{eqn:multiphysicsL8:140}
\alpha_0 = \frac
{ \lr{\Br^{(0)}}^\T \Br^{(0)} }
{ \lr{\Br^{(0)}}^\T M \Br^{(0)} },
\end{equation}
%
so that the residual is
\begin{equation}\label{eqn:multiphysicsL8:160}
\begin{aligned}
\Br^{(1)}
&= \Bb - M \Bx^{(1)}
\\ &= \Bb - M \Bx^{(0)} - \alpha_0 M \Br^{(0)}
\\ &= \Bb - \alpha M \Br^{(0)}.
\end{aligned}
\end{equation}
%
An orthogonality condition \( \Br^{(1)} \perp \Bd^{(0)} \) is desired.
%
\paragraph{Proof:}
%
\begin{equation}\label{eqn:multiphysicsL8:180}
\begin{aligned}
\innerprod{\Bd^{(0)}}{ \Br^{(1)}}
&=
\innerprod{\Bd^{(0)}}{ \Br^{(0)}}
-\alpha_0
\innerprod{\Bd^{(0)}}{ M \Br^{(0)}}
\\ &=
\innerprod{\Bd^{(0)}}{ \Br^{(0)}}
-\alpha_0
\innerprod{\Br^{(0)}}{ M \Br^{(0)}}.
\end{aligned}
\end{equation}
%
\paragraph{Second iteration}
%
\begin{equation}\label{eqn:multiphysicsL8:200}
\Bx^{(2)} = \Bx^{(1)} + \alpha_1 \Bd^{(1)}.
\end{equation}
%
The conditions to satisfy are
\begin{subequations}
\begin{equation}\label{eqn:multiphysicsL8:220}
\Bd^{(0)} \perp \Br^{(2)}
\end{equation}
\begin{equation}\label{eqn:multiphysicsL8:240}
\Bd^{(1)} \perp \Br^{(2)}
\end{equation}
\end{subequations}
%
Observe that the orthogonality condition of \cref{eqn:multiphysicsL8:220} is satisfied
%
\begin{equation}\label{eqn:multiphysicsL8:260}
\begin{aligned}
\innerprod{\Bd^{(0)}}{ \Br^{(2)}}
&=
\innerprod{\Bd^{(0)}}{ \Bb - M \Bx^{(2)}}
\\ &=
\innerprod{\Bd^{(0)}}{ \Bb - M \Bx^{(1)} - \alpha_1 M \Bd^{(1)}}
\\ &=
\mathLabelBox
[
   labelstyle={xshift=2cm},
   linestyle={out=270,in=90, latex-}
]
{
\innerprod{\Bd^{(0)}}{\Bb - M \Bx^{(1)}}
}
{\(= 0\) because \( \Bd^{(0)} \perp \Br^{(1)}\) }
-\alpha_1
\innerprod{\Bd^{(0)}}{ \alpha_1 M \Bd^{(1)}}
\end{aligned}
\end{equation}
%
This will be zero if an \( M \) orthogonality or conjugate zero condition can be imposed
%
\begin{equation}\label{eqn:multiphysicsL8:280}
\innerprod{\Bd^{(0)}}{M \Bd^{(1)}} = 0.
\end{equation}
%
To find a new search direction \( \Bd^{(1)} = \Br^{(1)} - \beta_0 \Bd^{(0)} \),
\cref{eqn:multiphysicsL8:280} is now imposed.
The \( \Br^{(1)} \) is the term from the standard gradient method, and the \( \beta_0 \Bd^{(0)} \) term is the conjugate gradient correction.  This gives \( \beta_0 \)
%
\begin{equation}\label{eqn:multiphysicsL8:300}
\beta_0 = \frac
{
\innerprod{\Bd^{(0)}}{M \Br^{(1)}}
}
{
\innerprod{\Bd^{(0)}}{M \Bd^{(0)}}
}
\end{equation}
%
Imposing \cref{eqn:multiphysicsL8:240} gives
%
\begin{equation}\label{eqn:multiphysicsL8:320}
\alpha_1 = \frac
{
\innerprod{\Bd^{(0)}}{\Br^{(1)}}
}
{
\innerprod{\Bd^{(1)}}{M \Bd^{(1)}}
}.
\end{equation}
%
\paragraph{Next iteration}
%
\begin{equation}\label{eqn:multiphysicsL8:340}
\begin{aligned}
\Bx^{(k + 1)} &= \Bx^{(k )} + \alpha_k \Bd^{(k)} \\
\Bd^{(k )} &= \Br^{(k )} - \beta_{k-1} \Bd^{(k-1)}
\end{aligned}
\end{equation}
%
The conditions to impose are
%
\begin{equation}\label{eqn:multiphysicsL8:360}
\begin{aligned}
	\Bd^{(0)} & \perp \Br^{(k + 1)} \\
	\vdots & \vdots \\
	\Bd^{(k-1)} & \perp \Br^{(k + 1)}
\end{aligned}
\end{equation}
%
However, there are only \( 2 \) degrees of freedom \( \alpha, \beta \), despite having many conditions to impose.
%
Impose the following to find \( \beta_{k-1} \)
%
\begin{equation}\label{eqn:multiphysicsL8:380}
	\Bd^{(k-1)} \perp \Br^{(k + 1)}
\end{equation}
%
(See slides for more)
%
\section{Full Algorithm.}
%
\input{conjugateGradientAlgorithm.tex}
%
% http://www.personal.ceu.hu/tex/breaking.htm
% the table below was getting interleaved with the algorithm above.
\clearpage
\section{Order analysis.}
\index{conjugate gradient!order analysis}
%
Note that for \( \Bx, \By \) in \R{n}, \( \innerprod{\Bx}{\By} = \Bx^\T \By \) is \( O(n) \), and \( M \Bx \) is \( O(n^2) \).
%
A conjugate gradient and LU comparison is given in \cref{tab:multiphysicsL8:400}, where \( k < n \)
%
\captionedTable{LU vs Conjugate gradient order.}{tab:multiphysicsL8:400}{
\begin{tabular}{|l|l|l|}
	\hline
                   & Full     & Sparse         \\ \hline
LU                 & \( O(n^3) \)   & \( O(n^{1.2-1.8}) \) \\ \hline
Conjugate gradient & \( O(k n^2) \) & \( 5.4          \) \\ \hline
\end{tabular}
}
%
\paragraph{Final comments}
%
\begin{enumerate}
\item How to select \( \Bx^{(0)} \) ?  An initial rough estimate can often be found by solving a simplified version of the problem.
\item Is it necessary to reserve memory space to store \( M \)?  No.  If \( \By = M \Bz \), the product can be calculated without physically storing the full matrix \( M \).
\end{enumerate}
%
%\EndArticle
%\EndNoBibArticle