Compare Steepest Descent and Conjugate Gradient method

clear
rng(1,'twister');
%close all

Generate a random sym pos def matrix

n = 10;
B = rand(n,n);
%get a random symmetric matrix
B = 0.5*(B + B');
[V,D] = eig(B);
%make its n eigs > 0 by picking from uniform pd [1,n^2]
Dp = randi(n^2,n,1);
%Dp = [1:n];
%Dp = [1,1,1,5,5,5,5,10,10,10];
A = V * diag(Dp) * V';
%prepare a col vector b for use in the quadratic functional
b_orig = rand(n,1);
b = b_orig + 0.00 * rand(n,1);

Prepare the quadratic functional and sim params

f = @(x) 0.5 * x' * A * x - b' * x;
gradf = @(x) A * x - b;
%get the true soln
x_true = A\b_orig;
%pick a random starting point
x0 = rand(n,1);
%sim params
max_iter = 50;
tol = 1e-6; %how low we want the gradient to be

Solve using gradient descent and exact step size

%x = x - alpha * gf, where alpha = grad(f)' * grad(f)/(grad(f)'*A*grad(f))
x = x0;
x_SD = [x];
gf = gradf(x);
gf_log = [norm(gf)];
%set initialization params
k = 0;
while norm(gf) > tol && k < max_iter
alpha = gf'*gf/(gf'*A*gf); %step length
x_new = x + alpha * (-gf); %move
gf = gradf(x_new); %gradient for next step
gf_log = [gf_log, norm(gf)];
x = x_new;
k = k+1;
disp(['SD: k=' num2str(k) ', gf=' num2str(norm(gf))]);
x_SD = [x_SD, x];
end
SD: k=1, gf=19.7954 SD: k=2, gf=6.1186 SD: k=3, gf=2.4806 SD: k=4, gf=1.2997 SD: k=5, gf=0.66802 SD: k=6, gf=0.50509 SD: k=7, gf=0.42454 SD: k=8, gf=0.43518 SD: k=9, gf=0.38779 SD: k=10, gf=0.40117 SD: k=11, gf=0.35776 SD: k=12, gf=0.37014 SD: k=13, gf=0.3301 SD: k=14, gf=0.34152 SD: k=15, gf=0.30457 SD: k=16, gf=0.31511 SD: k=17, gf=0.28102 SD: k=18, gf=0.29074 SD: k=19, gf=0.25929 SD: k=20, gf=0.26826 SD: k=21, gf=0.23925 SD: k=22, gf=0.24752 SD: k=23, gf=0.22075 SD: k=24, gf=0.22838 SD: k=25, gf=0.20368 SD: k=26, gf=0.21073 SD: k=27, gf=0.18793 SD: k=28, gf=0.19443 SD: k=29, gf=0.1734 SD: k=30, gf=0.1794 SD: k=31, gf=0.15999 SD: k=32, gf=0.16553 SD: k=33, gf=0.14762 SD: k=34, gf=0.15273 SD: k=35, gf=0.13621 SD: k=36, gf=0.14092 SD: k=37, gf=0.12568 SD: k=38, gf=0.13003 SD: k=39, gf=0.11596 SD: k=40, gf=0.11997 SD: k=41, gf=0.10699 SD: k=42, gf=0.1107 SD: k=43, gf=0.098722 SD: k=44, gf=0.10214 SD: k=45, gf=0.091089 SD: k=46, gf=0.094241 SD: k=47, gf=0.084046 SD: k=48, gf=0.086954 SD: k=49, gf=0.077548 SD: k=50, gf=0.080231
k_SD = k;
figure;
subplot(2,2,1);
plot(log(gf_log),'b','LineWidth',2); grid;
xlabel('iters');ylabel('Log($\|\nabla(f)\|$)','Interpreter','latex');title('LS convergence');

Optimize the same function using conjugate gradient method

%x = x + alpha p, where alpha = r'*r/(p'*A*p)
x = x0;
r = gradf(x);
p = -r;
x_CG = [x];
gf = gradf(x);
gf_log = [norm(gf)];
k = 0;
while norm(gf)>tol && k < max_iter
alpha = r'*r/(p'*A*p);
x_new = x + alpha * p;
%new x found, update params for next iter
r_new = r + alpha * A * p;
beta = r_new'*r_new/(r'*r);
p = -r_new + beta *p;
r = r_new;
x = x_new;
%book keeping
gf = gradf(x_new);
gf_log = [gf_log, norm(gf)];
k = k+1;
disp(['CG: k=' num2str(k) ', gf=' num2str(norm(gf))]);
x_CG = [x_CG, x];
end
CG: k=1, gf=19.7954 CG: k=2, gf=4.8087 CG: k=3, gf=1.6629 CG: k=4, gf=0.49969 CG: k=5, gf=0.45627 CG: k=6, gf=0.72266 CG: k=7, gf=0.35569 CG: k=8, gf=0.1212 CG: k=9, gf=0.002795 CG: k=10, gf=3.8083e-14
hold;
Current plot held
plot(log(gf_log),'r','LineWidth',2);
legend(['SD ' num2str(k_SD) ' iters'],['CG ' num2str(k) ' iters']);
subplot(2,2,2);
if n==2
plot(x0(1),x0(2),'ok','MarkerFaceColor','k')
hold on;
plot(x_true(1),x_true(2),'og','MarkerFaceColor','g')
plot(x_SD(1,:),x_SD(2,:),'b-x')
plot(x_CG(1,:),x_CG(2,:),'r-x')
grid; title('Trajectory, SD (blue) v/s CG (red)');
else
plot(sort(eig(A),'descend'),'x-','LineWidth',2); grid;
title('Eigenvalues of A');
end

See how the error evolved

subplot(2,2,3);
plot(log(vecnorm(x_SD-repmat(x_true,[1,1+k_SD]))),'b-x','LineWidth',2); grid;
ylabel('$\|x_k-x^*\|$','Interpreter','latex'); xlabel('iters'); title('SD');
subplot(2,2,4);
plot(log(vecnorm(x_CG-repmat(x_true,[1,1+k]))),'r-x','LineWidth',2); grid;
ylabel('$\|x_k-x^*\|$','Interpreter','latex'); xlabel('iters'); title('CG');