Python QR algorithm without Numpy for finding eigenvalues

#python

The practically important problem in computational mathematics is computing the eigenvalues of a matrix.

Computing the characteristic equation of a matrix involves computing a determinant, which is expensive, and should be avoided if possible.

Computing the roots to a polynomial equation is also a difficult problem. The standard algorithm for computing eigenvalues is called the QR-algorithm.This involves the QR-factorization of the matrix.

So let's start:

import math  #for sqrt function
from decimal import Decimal  # to be more precise in arithmetics instead of floats
import copy


initial_matrix = [[1, 2, 3, 4],
                     [4, 3, 2, 1],
                     [90, -90, 50, -50],
                     [-100, 500, -90, 45]]

def convert_matrix_to_decimals(matrix):
    for i in range(len(matrix)):
        for j in range(len(matrix[0])):
            matrix[i][j] =  Decimal(matrix[i][j])
    return matrix


n = len(initial_matrix)

initial_matrix = convert_matrix_to_decimals(initial_matrix)

And since we are working without Numpy let's add some functions to work with matrices:

def generate_null_matrix(dimension):
    elementary_matrix = [[] for i in range(dimension)]
    for i in range(dimension):
        for j in range(dimension): 
            elementary_matrix[i].append(Decimal(0.0))
    return elementary_matrix


def square_matrix_multiplication(matrix_a, matrix_b):
    result_matrix = generate_null_matrix(len(matrix_a))
    for i in range(n):
        for j in range(n):
            for k in range(n):
                result_matrix[i][j] += matrix_a[i][k] * matrix_b[k][j]
    return result_matrix


def transpose(matrix):
    result = generate_null_matrix(len(matrix))
    for i in range(len(matrix)):
        for j in range(i, len(matrix)):
            a = matrix[j][i]
            result[j][i] = matrix[i][j]
            result[i][j] = a
    return result

def round_matrix(matrix):
    temp = []
    for i in range(len(matrix)):
        temp.append(matrix[i].copy())

    for i in range(len(temp)):
        for j in range(len(temp)):
            temp[i][j] = float(round(temp[i][j], 2))
    return temp

def column(matrix, j):
    result = []
    for i in range(len(matrix)):
        result.append(Decimal(matrix[i][j]))
    return result

def row(matrix, i):
    return matrix[i].copy()



def set_column(matrix, j, vector):
    for i in range(len(matrix)):
        matrix[i][j] = Decimal(vector[i])
    return matrix

def vec1_minus_vec2(vec1, vec2):
    return [Decimal(vec1[i]) - Decimal(vec2[i]) for i in range(len(vec1))]

Now we are ready to do QR decomposition - usual way is to do it via Gram-Schmidt process:

So, let's code it:

def dot_product(vector1, vector2):
    result = Decimal(0)
    for i in range(len(vector1)):
        result += Decimal(vector1[i]) * Decimal(vector2[i])
    return result

def project_1_on_2(vector1, vector2):
    s = Decimal(dot_product(vector1, vector2)) / Decimal(dot_product(vector2, vector2))
    return [Decimal(i) * s for i in  vector2]

def Gram_Schmidt_orthogonalization(matrix):
    V = generate_null_matrix(len(matrix))
    for i in range(len(matrix)):
        v_i = column(matrix, i)
        for k in range(i):
            v_i = vec1_minus_vec2(v_i, project_1_on_2(column(matrix, i), column(V, k)))
        set_column(V, i, v_i)
    return V

def Gram_Schmidt_orthonormalization(matrix):
    V = Gram_Schmidt_orthogonalization(matrix)
    for i in range(len(V)):
        set_column(V, i, [Decimal(k) / Decimal(math.sqrt(dot_product(column(V, i), column(V, i)))) for k in column(V, i) ])
    return V

def Gram_Schmidt_R(matrix, Q): 
    R = generate_null_matrix(len(matrix))
    for col in range(len(matrix)):
        for row in range(col + 1):
            R[row][col] = dot_product(column(Q, row), column(matrix, col))
    return R

#QR decomposition
Q = Gram_Schmidt_orthonormalization(initial_matrix)
R = Gram_Schmidt_R(initial_matrix, Q)

And now we are ready to proceed with the simplest form of QR algorithm:

def eigenvalues_QR_algorithm(matrix):
    A_k = copy.deepcopy(matrix)
    U_k = generate_null_matrix(len(A_k))
    for i in range(len(U_k)):
        U_k[i][i] = Decimal(1.0)

    for k in range(100000):
        Q_k_plus1 = Gram_Schmidt_orthonormalization(A_k)
        R_k_plus1 = Gram_Schmidt_R(A_k, Q_k_plus1)
        A_k = square_matrix_multiplication(R_k_plus1, Q_k_plus1)
        U_k = square_matrix_multiplication(U_k, Q_k_plus1)
    eigenvalues = []
    for i in range(len(A_k)):
        eigenvalues.append(A_k[i][i])
    return eigenvalues, A_k, U_k

eigenvalues, A, Transformation = eigenvalues_QR_algorithm(initial_matrix)

Now we have eigenvalues, but the convergence of the algorithm is slow. In fact it can be arbitrarily slow if eigenvalues are very close to each other. Even if we assume that the number of steps is proportional to n we would get an O(n^4) complexity.

We can use Hessenberg QR algorithm to increase the speed, Hessenberg matrix is upper-diagonal matrix with one extra diagonal below the main one:

Now the QR algorithm using Givens rotations which are an instrument to get zero at the necessary cell:

The implementation might be found below:

def GivensMatrix(row1, row2, column, matrix):
    G = generate_null_matrix(len(matrix))
    length = Decimal(math.sqrt(math.pow(Decimal(matrix[row1][column]), 2) + math.pow(Decimal(matrix[row2][column]), 2)))
    for i in range(len(G)):
        if(i == row1 or i == row2):
            G[i][i] = Decimal(matrix[row1][column]) / length
        else:
            G[i][i] = Decimal(1.0)
    G[row1][row2] = Decimal(matrix[row2][column]) / length
    G[row2][row1] = Decimal(-matrix[row2][column]) / length
    return G

#Get upper Hessenberg form
def Upper_Hessenberg(matrix):
    H = copy.deepcopy(matrix)
    PG = generate_null_matrix(len(H))
    for i in range(len(H)):
        PG[i][i] = Decimal(1.0)

    for col in range(len(matrix) - 2):
        for row in range(col + 2, len(matrix)):
            G = GivensMatrix(col + 1, row, col, H)
            H = square_matrix_multiplication(square_matrix_multiplication(G, H),transpose(G))
            PG = square_matrix_multiplication(G, PG)
    return H, PG

def QR_decomposition_Givens_for_Hessenberg(matrix):
    R = copy.deepcopy(matrix)
    Q = generate_null_matrix(len(R))
    for i in range(len(R)):
        Q[i][i] = Decimal(1.0)
    for i in range(len(R)-1):
        G = GivensMatrix(i, i+1, i, R)
        Q = square_matrix_multiplication(G, Q)
        R = square_matrix_multiplication(G, R)
    return(transpose(Q),R)

H, PG = Upper_Hessenberg(initial_matrix)
Q,R = QR_decomposition_Givens_for_Hessenberg(H)

def HessenbergQR(matrix):
    A = copy.deepcopy(matrix)
    U_k = generate_null_matrix(len(A))
    for i in range(len(U_k)):
        U_k[i][i] = Decimal(1.0)

    H_k, PG = Upper_Hessenberg(A)

    for i in range(100):
        inverse_pg, R_k = QR_decomposition_Givens_for_Hessenberg(H_k)
        H_k = square_matrix_multiplication(R_k, inverse_pg)
        U_k = square_matrix_multiplication(U_k, inverse_pg)
    eigenvalues = []
    for i in range(len(H_k)):
        eigenvalues.append(H_k[i][i])
    U_k = square_matrix_multiplication(transpose(PG), U_k)
    return(eigenvalues, H_k, U_k)

eigenvalues_h, H_k, Transformation_h = HessenbergQR(initial_matrix)

This approach is O(N^3) and is much more stable than the first one. One can also enhance the algorithm by using Householder rotations instead of Givens and use so called spectral shifts or Raylaigh shifts to enhance convergence but it is a topic for another post.