Python Tutorial

II. Linear Systems of Algebraic Equations

1. Matrices and Vectors

Understanding the relationship between matrices and vectors is fundamental to linear algebra:

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Define vectors
v1 = np.array([3, 2])
v2 = np.array([1, 4])

# Vector operations
v_sum = v1 + v2
v_diff = v1 - v2
scalar_mult = 2 * v1
dot_product = np.dot(v1, v2)

print("Vector v1:", v1)
print("Vector v2:", v2)
print("v1 + v2 =", v_sum)
print("v1 - v2 =", v_diff)
print("2 * v1 =", scalar_mult)
print("v1 · v2 =", dot_product)

# Visualize vectors
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Left plot: Vector operations
ax1.quiver(0, 0, v1[0], v1[1], angles='xy', scale_units='xy', scale=1, color='blue', width=0.01)
ax1.quiver(0, 0, v2[0], v2[1], angles='xy', scale_units='xy', scale=1, color='red', width=0.01)
ax1.quiver(0, 0, v_sum[0], v_sum[1], angles='xy', scale_units='xy', scale=1, color='green', width=0.01)

# Add labels
ax1.text(v1[0]/2, v1[1]/2, 'v1', fontsize=12, ha='center')
ax1.text(v2[0]/2, v2[1]/2, 'v2', fontsize=12, ha='center')
ax1.text(v_sum[0]/2, v_sum[1]/2, 'v1+v2', fontsize=12, ha='center')

# Parallelogram for vector addition
ax1.plot([v1[0], v_sum[0]], [v1[1], v_sum[1]], 'k--', alpha=0.5)
ax1.plot([v2[0], v_sum[0]], [v2[1], v_sum[1]], 'k--', alpha=0.5)

ax1.set_xlim(-1, 5)
ax1.set_ylim(-1, 7)
ax1.grid(True, alpha=0.3)
ax1.set_aspect('equal')
ax1.set_title('Vector Addition')
ax1.set_xlabel('x')
ax1.set_ylabel('y')

# Right plot: Matrix transformation of vectors
ax2.set_title('Matrix Transformation')

# Define transformation matrix
A = np.array([[2, 1], [-1, 2]])

# Create multiple vectors to transform
theta = np.linspace(0, 2*np.pi, 20)
unit_vectors = np.array([np.cos(theta), np.sin(theta)])

# Transform vectors
transformed = A @ unit_vectors

# Plot original and transformed
ax2.plot(unit_vectors[0], unit_vectors[1], 'b-', linewidth=2, label='Original unit circle')
ax2.plot(transformed[0], transformed[1], 'r-', linewidth=2, label='After transformation A')

# Show some individual transformations
for i in range(0, 20, 5):
    ax2.arrow(0, 0, unit_vectors[0, i], unit_vectors[1, i], 
              head_width=0.1, head_length=0.05, fc='blue', ec='blue', alpha=0.5)
    ax2.arrow(0, 0, transformed[0, i], transformed[1, i], 
              head_width=0.1, head_length=0.05, fc='red', ec='red', alpha=0.5)

ax2.set_xlim(-3, 3)
ax2.set_ylim(-3, 3)
ax2.grid(True, alpha=0.3)
ax2.set_aspect('equal')
ax2.legend()
ax2.set_xlabel('x')
ax2.set_ylabel('y')

plt.tight_layout()
plt.show()

# 3D vectors example
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

# Define 3D vectors
v1_3d = np.array([1, 2, 3])
v2_3d = np.array([2, -1, 1])
v3_3d = np.cross(v1_3d, v2_3d)  # Cross product

# Plot vectors
ax.quiver(0, 0, 0, v1_3d[0], v1_3d[1], v1_3d[2], color='blue', arrow_length_ratio=0.1)
ax.quiver(0, 0, 0, v2_3d[0], v2_3d[1], v2_3d[2], color='red', arrow_length_ratio=0.1)
ax.quiver(0, 0, 0, v3_3d[0], v3_3d[1], v3_3d[2], color='green', arrow_length_ratio=0.1)

# Labels
ax.text(v1_3d[0], v1_3d[1], v1_3d[2], 'v1', fontsize=12)
ax.text(v2_3d[0], v2_3d[1], v2_3d[2], 'v2', fontsize=12)
ax.text(v3_3d[0], v3_3d[1], v3_3d[2], 'v1×v2', fontsize=12)

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title('Cross Product in 3D')

# Set equal aspect ratio
max_range = 6
ax.set_xlim([-max_range, max_range])
ax.set_ylim([-max_range, max_range])
ax.set_zlim([-max_range, max_range])

plt.show()

print("\nCross product v1 × v2 =", v3_3d)
print("Magnitude of cross product:", np.linalg.norm(v3_3d))

2. Python solves linear systems

Multiple methods exist for solving Ax = b. Let's explore and compare them:

import numpy as np
import matplotlib.pyplot as plt
import time

# Define a linear system Ax = b
A = np.array([[3, 1, -1],
              [2, -2, 4],
              [-1, 0.5, -1]])

b = np.array([1, -2, 0])

print("System of equations:")
print("3x + y - z = 1")
print("2x - 2y + 4z = -2")
print("-x + 0.5y - z = 0")
print("\nMatrix form: Ax = b")
print("A =")
print(A)
print("\nb =", b)

# Method 1: Using np.linalg.solve (most efficient)
print("\n=== Method 1: np.linalg.solve ===")
x1 = np.linalg.solve(A, b)
print("Solution x =", x1)
print("Verification: Ax =", A @ x1)
print("Should equal b =", b)

# Method 2: Using inverse matrix (educational, but less efficient)
print("\n=== Method 2: Using inverse matrix ===")
A_inv = np.linalg.inv(A)
x2 = A_inv @ b
print("A^(-1) =")
print(A_inv)
print("Solution x = A^(-1)b =", x2)

# Method 3: Using LU decomposition
print("\n=== Method 3: LU Decomposition ===")
from scipy.linalg import lu

P, L, U = lu(A)
print("L =")
print(L)
print("\nU =")
print(U)
print("\nVerification: LU =")
print(L @ U)
print("Should equal PA =")
print(P @ A)

# Solve using LU decomposition
# First solve Ly = Pb
y = np.linalg.solve(L, P @ b)
# Then solve Ux = y
x3 = np.linalg.solve(U, y)
print("\nSolution x =", x3)

# Compare performance for larger systems
print("\n=== Performance Comparison ===")
sizes = [10, 50, 100, 200, 500]
times_solve = []
times_inv = []

for n in sizes:
    # Generate random system
    A_large = np.random.rand(n, n) + n * np.eye(n)  # Ensure good conditioning
    b_large = np.random.rand(n)
    
    # Time np.linalg.solve
    start = time.time()
    x = np.linalg.solve(A_large, b_large)
    times_solve.append(time.time() - start)
    
    # Time inverse method
    start = time.time()
    x = np.linalg.inv(A_large) @ b_large
    times_inv.append(time.time() - start)

# Plot performance comparison
plt.figure(figsize=(10, 6))
plt.semilogy(sizes, times_solve, 'bo-', label='np.linalg.solve', linewidth=2)
plt.semilogy(sizes, times_inv, 'ro-', label='Inverse method', linewidth=2)
plt.xlabel('Matrix size (n×n)')
plt.ylabel('Time (seconds)')
plt.title('Performance Comparison: solve() vs inverse method')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# Visualizing solution for 2D system
print("\n=== Visualizing 2D Linear System ===")
# Simple 2D system for visualization
A_2d = np.array([[2, 1], [1, 3]])
b_2d = np.array([5, 7])
x_2d = np.linalg.solve(A_2d, b_2d)

print(f"2D System: 2x + y = 5, x + 3y = 7")
print(f"Solution: x = {x_2d[0]:.2f}, y = {x_2d[1]:.2f}")

# Plot the lines
fig, ax = plt.subplots(figsize=(8, 8))
x_range = np.linspace(-1, 4, 100)

# Line 1: 2x + y = 5 => y = 5 - 2x
y1 = 5 - 2*x_range
# Line 2: x + 3y = 7 => y = (7 - x)/3
y2 = (7 - x_range) / 3

ax.plot(x_range, y1, 'b-', linewidth=2, label='2x + y = 5')
ax.plot(x_range, y2, 'r-', linewidth=2, label='x + 3y = 7')
ax.plot(x_2d[0], x_2d[1], 'go', markersize=10, label=f'Solution ({x_2d[0]:.2f}, {x_2d[1]:.2f})')

ax.set_xlim(-1, 4)
ax.set_ylim(-1, 4)
ax.grid(True, alpha=0.3)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_title('Graphical Solution of Linear System')
ax.legend()
ax.set_aspect('equal')

plt.show()

3. Elementary Row Operations

Elementary row operations are the foundation of Gaussian elimination:

import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

def print_augmented_matrix(A, b, title=""):
    """Pretty print augmented matrix [A|b]"""
    n = A.shape[0]
    print(title)
    print("-" * 40)
    for i in range(n):
        row_str = "[ "
        for j in range(n):
            row_str += f"{A[i,j]:7.2f} "
        row_str += f"| {b[i]:7.2f} ]"
        print(row_str)
    print()

# Initial system
A = np.array([[2., 1., -1.],
              [4., 5., -3.],
              [-2., 5., -2.]], dtype=float)

b = np.array([8., 20., -2.], dtype=float)

print("Elementary Row Operations Demo")
print("==============================\n")
print("Original System:")
print("2x + y - z = 8")
print("4x + 5y - 3z = 20")
print("-2x + 5y - 2z = -2\n")

print_augmented_matrix(A, b, "Initial Augmented Matrix:")

# Create copies for manipulation
A_work = A.copy()
b_work = b.copy()

# Row Operation 1: R2 = R2 - 2*R1
print("Row Operation 1: R2 = R2 - 2*R1")
A_work[1] = A_work[1] - 2 * A_work[0]
b_work[1] = b_work[1] - 2 * b_work[0]
print_augmented_matrix(A_work, b_work, "After R2 = R2 - 2*R1:")

# Row Operation 2: R3 = R3 + R1
print("Row Operation 2: R3 = R3 + R1")
A_work[2] = A_work[2] + A_work[0]
b_work[2] = b_work[2] + b_work[0]
print_augmented_matrix(A_work, b_work, "After R3 = R3 + R1:")

# Row Operation 3: R3 = R3 - 2*R2
print("Row Operation 3: R3 = R3 - 2*R2")
A_work[2] = A_work[2] - 2 * A_work[1]
b_work[2] = b_work[2] - 2 * b_work[1]
print_augmented_matrix(A_work, b_work, "After R3 = R3 - 2*R2:")

# Back substitution
print("Back Substitution:")
print("-" * 40)
z = b_work[2] / A_work[2, 2]
y = (b_work[1] - A_work[1, 2] * z) / A_work[1, 1]
x = (b_work[0] - A_work[0, 1] * y - A_work[0, 2] * z) / A_work[0, 0]

print(f"From row 3: z = {b_work[2]:.2f} / {A_work[2,2]:.2f} = {z:.2f}")
print(f"From row 2: y = ({b_work[1]:.2f} - {A_work[1,2]:.2f}*{z:.2f}) / {A_work[1,1]:.2f} = {y:.2f}")
print(f"From row 1: x = ({b_work[0]:.2f} - {A_work[0,1]:.2f}*{y:.2f} - {A_work[0,2]:.2f}*{z:.2f}) / {A_work[0,0]:.2f} = {x:.2f}")

print(f"\nSolution: x = {x:.2f}, y = {y:.2f}, z = {z:.2f}")

# Verify solution
print("\nVerification:")
print(f"2({x:.2f}) + {y:.2f} - {z:.2f} = {2*x + y - z:.2f} (should be 8)")
print(f"4({x:.2f}) + 5({y:.2f}) - 3({z:.2f}) = {4*x + 5*y - 3*z:.2f} (should be 20)")
print(f"-2({x:.2f}) + 5({y:.2f}) - 2({z:.2f}) = {-2*x + 5*y - 2*z:.2f} (should be -2)")

# Visualization of row operations effect
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Function to plot matrix as heatmap
def plot_matrix(ax, matrix, title):
    im = ax.imshow(matrix, cmap='RdBu', aspect='equal', vmin=-10, vmax=10)
    ax.set_title(title)
    
    # Add text annotations
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            ax.text(j, i, f'{matrix[i, j]:.1f}', ha='center', va='center', fontsize=12)
    
    ax.set_xticks(range(matrix.shape[1]))
    ax.set_yticks(range(matrix.shape[0]))
    ax.set_xticklabels(['x', 'y', 'z', 'b'])
    ax.set_yticklabels(['Eq 1', 'Eq 2', 'Eq 3'])
    return im

# Plot original matrix
augmented_original = np.column_stack([A, b])
im1 = plot_matrix(axes[0, 0], augmented_original, 'Original System')

# Plot after first operation
A1 = A.copy()
b1 = b.copy()
A1[1] = A1[1] - 2 * A1[0]
b1[1] = b1[1] - 2 * b1[0]
augmented1 = np.column_stack([A1, b1])
im2 = plot_matrix(axes[0, 1], augmented1, 'After R2 = R2 - 2*R1')

# Plot after second operation
A2 = A1.copy()
b2 = b1.copy()
A2[2] = A2[2] + A2[0]
b2[2] = b2[2] + b2[0]
augmented2 = np.column_stack([A2, b2])
im3 = plot_matrix(axes[1, 0], augmented2, 'After R3 = R3 + R1')

# Plot final upper triangular form
augmented_final = np.column_stack([A_work, b_work])
im4 = plot_matrix(axes[1, 1], augmented_final, 'Final Upper Triangular Form')

plt.tight_layout()
plt.show()

# Interactive row operation calculator
print("\n" + "="*50)
print("Row Operation Calculator")
print("Try different row operations!")
print("="*50)

def apply_row_operation(A, operation_type, row1, row2=None, scalar=1):
    """
    Apply elementary row operation to matrix A
    operation_type: 'swap', 'scale', 'add'
    """
    A_new = A.copy()
    
    if operation_type == 'swap':
        A_new[[row1, row2]] = A_new[[row2, row1]]
        print(f"Swapped rows {row1+1} and {row2+1}")
    elif operation_type == 'scale':
        A_new[row1] = scalar * A_new[row1]
        print(f"Scaled row {row1+1} by {scalar}")
    elif operation_type == 'add':
        A_new[row1] = A_new[row1] + scalar * A_new[row2]
        print(f"Added {scalar} times row {row2+1} to row {row1+1}")
    
    return A_new

# Example usage
A_example = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float)
print("\nExample matrix:")
print(A_example)

print("\nApplying row operations:")
A_example = apply_row_operation(A_example, 'scale', 0, scalar=2)
print(A_example)

A_example = apply_row_operation(A_example, 'add', 1, 0, scalar=-2)
print(A_example)

4. Row Echelon Forms

5. LU Factorization

LU decomposition factors a matrix A into a lower triangular matrix L and upper triangular matrix U:

import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import lu, lu_factor, lu_solve
import time

# Example matrix
A = np.array([[4, 3, -1],
              [2, 4, 3],
              [1, -2, 4]], dtype=float)

print("LU Factorization Tutorial")
print("========================\n")
print("Original matrix A:")
print(A)

# Perform LU decomposition
P, L, U = lu(A)

print("\nLU Decomposition: PA = LU")
print("\nPermutation matrix P:")
print(P)
print("\nLower triangular matrix L:")
print(L)
print("\nUpper triangular matrix U:")
print(U)

# Verify decomposition
print("\nVerification: LU =")
print(L @ U)
print("\nPA =")
print(P @ A)
print("\nDifference (should be near zero):", np.max(np.abs(P @ A - L @ U)))

# Visualize the matrices
fig, axes = plt.subplots(2, 3, figsize=(12, 8))

# Helper function to plot matrix
def plot_matrix_lu(ax, matrix, title, mask_upper=False, mask_lower=False):
    matrix_plot = matrix.copy()
    
    # Create mask for triangular display
    if mask_upper:
        for i in range(matrix.shape[0]):
            for j in range(i+1, matrix.shape[1]):
                matrix_plot[i, j] = 0
    if mask_lower:
        for i in range(1, matrix.shape[0]):
            for j in range(i):
                matrix_plot[i, j] = 0
    
    im = ax.imshow(matrix_plot, cmap='RdBu', aspect='equal')
    ax.set_title(title)
    
    # Add text annotations
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            if not (mask_upper and j > i) and not (mask_lower and j < i):
                ax.text(j, i, f'{matrix[i, j]:.2f}', 
                       ha='center', va='center', fontsize=10)
    
    ax.set_xticks([])
    ax.set_yticks([])
    return im

# Plot matrices
plot_matrix_lu(axes[0, 0], A, 'Original Matrix A')
plot_matrix_lu(axes[0, 1], L, 'Lower Triangular L', mask_upper=True)
plot_matrix_lu(axes[0, 2], U, 'Upper Triangular U', mask_lower=True)
plot_matrix_lu(axes[1, 0], P, 'Permutation P')
plot_matrix_lu(axes[1, 1], L @ U, 'Product LU')
plot_matrix_lu(axes[1, 2], P @ A, 'Product PA')

plt.tight_layout()
plt.show()

# Using LU decomposition to solve linear systems
print("\n" + "="*50)
print("Solving Linear Systems with LU Decomposition")
print("="*50)

# Define multiple right-hand sides
B = np.array([[1, 2],
              [2, 3],
              [3, 4]])

print("\nSolving AX = B where B has multiple columns:")
print("B =")
print(B)

# Method 1: Direct solve for each column
print("\nMethod 1: Solve for each column separately")
X1 = np.zeros_like(B)
for i in range(B.shape[1]):
    X1[:, i] = np.linalg.solve(A, B[:, i])
print("Solution X:")
print(X1)

# Method 2: Using LU decomposition (more efficient for multiple RHS)
print("\nMethod 2: Using LU decomposition")
lu_factored, piv = lu_factor(A)
X2 = lu_solve((lu_factored, piv), B)
print("Solution X:")
print(X2)

# Verify solutions
print("\nVerification: AX =")
print(A @ X2)
print("Should equal B =")
print(B)

# Performance comparison
print("\n" + "="*50)
print("Performance Comparison: Direct vs LU")
print("="*50)

n_sizes = [50, 100, 200, 300]
n_rhs = [1, 5, 10, 20]  # Number of right-hand sides

results = {'direct': {}, 'lu': {}}

for n in n_sizes:
    results['direct'][n] = []
    results['lu'][n] = []
    
    # Generate random matrix
    A_test = np.random.rand(n, n)
    
    for k in n_rhs:
        B_test = np.random.rand(n, k)
        
        # Time direct method
        start = time.time()
        for i in range(k):
            np.linalg.solve(A_test, B_test[:, i])
        time_direct = time.time() - start
        results['direct'][n].append(time_direct)
        
        # Time LU method
        start = time.time()
        lu_fac, piv = lu_factor(A_test)
        lu_solve((lu_fac, piv), B_test)
        time_lu = time.time() - start
        results['lu'][n].append(time_lu)

# Plot results
fig, ax = plt.subplots(figsize=(10, 6))

for i, n in enumerate(n_sizes):
    ax.plot(n_rhs, results['direct'][n], 'o-', 
            label=f'Direct (n={n})', color=f'C{i}')
    ax.plot(n_rhs, results['lu'][n], 's--', 
            label=f'LU (n={n})', color=f'C{i}')

ax.set_xlabel('Number of right-hand sides')
ax.set_ylabel('Time (seconds)')
ax.set_title('Performance: Direct solve vs LU decomposition')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(True, alpha=0.3)
ax.set_yscale('log')

plt.tight_layout()
plt.show()

# Interactive LU decomposition step-by-step
print("\n" + "="*50)
print("Step-by-Step LU Decomposition")
print("="*50)

def lu_decomposition_steps(A):
    n = A.shape[0]
    L = np.eye(n)
    U = A.copy()
    
    print("Initial: U = A, L = I")
    print("U =")
    print(U)
    print("\nL =")
    print(L)
    
    for k in range(n-1):
        print(f"\nStep {k+1}: Eliminate column {k+1} below diagonal")
        
        for i in range(k+1, n):
            factor = U[i, k] / U[k, k]
            L[i, k] = factor
            U[i, k:] = U[i, k:] - factor * U[k, k:]
            
            print(f"  L[{i+1},{k+1}] = {factor:.3f}")
        
        print("\nUpdated U:")
        print(U)
        print("\nUpdated L:")
        print(L)
    
    return L, U

# Example
A_demo = np.array([[2, 1, 1],
                   [4, 3, 3],
                   [8, 7, 9]], dtype=float)

print("\nDemonstration with matrix:")
print(A_demo)

L_demo, U_demo = lu_decomposition_steps(A_demo)

print("\n" + "="*30)
print("Final result:")
print("L @ U =")
print(L_demo @ U_demo)
print("\nOriginal A =")
print(A_demo)
print("\nDifference:", np.max(np.abs(A_demo - L_demo @ U_demo)))

6. Row Space and Column Space

7. Null Space or Kernel

8. Rank and Nullity

9. Square Matrices

10. Projection Operators

Projection operators are fundamental in many applications including least squares and signal processing:

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# 2D Projection Example
print("2D Vector Projection")
print("==================\n")

# Define vectors
v = np.array([3, 4])  # Vector to project
u = np.array([1, 0])  # Vector to project onto

# Projection formula: proj_u(v) = (v·u / u·u) * u
proj_scalar = np.dot(v, u) / np.dot(u, u)
proj_v_onto_u = proj_scalar * u

print(f"Vector v = {v}")
print(f"Vector u = {u}")
print(f"Projection of v onto u = {proj_v_onto_u}")
print(f"Orthogonal component = {v - proj_v_onto_u}")

# Visualize 2D projection
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Left plot: Vector projection
ax1.quiver(0, 0, v[0], v[1], angles='xy', scale_units='xy', scale=1, 
           color='blue', width=0.01, label='v')
ax1.quiver(0, 0, u[0], u[1], angles='xy', scale_units='xy', scale=1, 
           color='red', width=0.01, label='u')
ax1.quiver(0, 0, proj_v_onto_u[0], proj_v_onto_u[1], angles='xy', 
           scale_units='xy', scale=1, color='green', width=0.01, 
           label='proj_u(v)')

# Draw orthogonal component
orthogonal = v - proj_v_onto_u
ax1.quiver(proj_v_onto_u[0], proj_v_onto_u[1], orthogonal[0], 
           orthogonal[1], angles='xy', scale_units='xy', scale=1, 
           color='orange', width=0.01, label='v - proj_u(v)')

# Draw dashed lines
ax1.plot([v[0], proj_v_onto_u[0]], [v[1], proj_v_onto_u[1]], 
         'k--', alpha=0.5)

ax1.set_xlim(-1, 5)
ax1.set_ylim(-1, 5)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
ax1.legend()
ax1.set_title('Vector Projection in 2D')
ax1.set_xlabel('x')
ax1.set_ylabel('y')

# Right plot: Projection onto a subspace
ax2.set_title('Projection onto a Line (1D Subspace)')

# Generate points
t = np.linspace(-5, 5, 100)
line_direction = np.array([2, 1])  # Direction of the line
line_points = np.outer(t, line_direction)

# Plot the line
ax2.plot(line_points[:, 0], line_points[:, 1], 'r-', 
         linewidth=2, label='Subspace (line)')

# Project several points onto the line
points = np.array([[3, 1], [1, 3], [-1, 2], [2, -1]])
colors = ['blue', 'green', 'purple', 'orange']

for i, point in enumerate(points):
    # Project point onto line
    proj_scalar = np.dot(point, line_direction) / np.dot(line_direction, line_direction)
    projection = proj_scalar * line_direction
    
    # Plot original point and projection
    ax2.plot(point[0], point[1], 'o', color=colors[i], markersize=8)
    ax2.plot(projection[0], projection[1], 's', color=colors[i], markersize=8)
    
    # Draw projection line
    ax2.plot([point[0], projection[0]], [point[1], projection[1]], 
             '--', color=colors[i], alpha=0.5)
    
    # Label
    ax2.text(point[0] + 0.1, point[1] + 0.1, f'p{i+1}', fontsize=10)

ax2.set_xlim(-4, 4)
ax2.set_ylim(-4, 4)
ax2.set_aspect('equal')
ax2.grid(True, alpha=0.3)
ax2.legend()
ax2.set_xlabel('x')
ax2.set_ylabel('y')

plt.tight_layout()
plt.show()

# Projection Matrix
print("\n" + "="*50)
print("Projection Matrices")
print("="*50)

# For projection onto span of columns of A
A = np.array([[1, 0],
              [1, 1],
              [1, 2]])

print("\nMatrix A (we'll project onto its column space):")
print(A)

# Projection matrix P = A(A^T A)^(-1)A^T
ATA = A.T @ A
print("\nA^T A =")
print(ATA)

ATA_inv = np.linalg.inv(ATA)
P = A @ ATA_inv @ A.T

print("\nProjection matrix P = A(A^T A)^(-1)A^T:")
print(P)

# Properties of projection matrix
print("\nProperties of projection matrix:")
print(f"1. P is symmetric: {np.allclose(P, P.T)}")
print(f"2. P^2 = P (idempotent): {np.allclose(P @ P, P)}")
print(f"3. Eigenvalues of P: {np.linalg.eigvals(P)}")
print("   (Should be 0s and 1s)")

# Example: Project a vector onto column space of A
b = np.array([2, 1, 4])
proj_b = P @ b

print(f"\nOriginal vector b = {b}")
print(f"Projection of b onto Col(A) = {proj_b}")
print(f"Residual (orthogonal to Col(A)) = {b - proj_b}")

# Verify orthogonality
print(f"\nVerification - residual orthogonal to columns of A:")
print(f"A^T @ residual = {A.T @ (b - proj_b)}")
print("(Should be close to zero)")

# 3D Visualization
fig = plt.figure(figsize=(12, 10))

# 3D projection onto a plane
ax3d = fig.add_subplot(221, projection='3d')

# Define a plane by two vectors
v1 = np.array([1, 0, 0])
v2 = np.array([0, 1, 0.5])

# Create plane mesh
u_range = np.linspace(-2, 2, 10)
v_range = np.linspace(-2, 2, 10)
U, V = np.meshgrid(u_range, v_range)

# Plane points
X = U * v1[0] + V * v2[0]
Y = U * v1[1] + V * v2[1]
Z = U * v1[2] + V * v2[2]

ax3d.plot_surface(X, Y, Z, alpha=0.3, color='lightblue')

# Point to project
point_3d = np.array([1, 1, 2])

# Project onto plane span(v1, v2)
A_plane = np.column_stack([v1, v2])
P_plane = A_plane @ np.linalg.inv(A_plane.T @ A_plane) @ A_plane.T
proj_point = P_plane @ point_3d

# Plot point and projection
ax3d.scatter(*point_3d, color='red', s=100, label='Original point')
ax3d.scatter(*proj_point, color='green', s=100, label='Projection')

# Draw projection line
ax3d.plot([point_3d[0], proj_point[0]], 
          [point_3d[1], proj_point[1]], 
          [point_3d[2], proj_point[2]], 'k--')

ax3d.set_xlabel('X')
ax3d.set_ylabel('Y')
ax3d.set_zlabel('Z')
ax3d.set_title('3D Projection onto a Plane')
ax3d.legend()

# Application: Gram-Schmidt Orthogonalization
ax_gs = fig.add_subplot(222)

print("\n" + "="*50)
print("Application: Gram-Schmidt Process")
print("="*50)

# Input vectors
vectors = np.array([[3, 1],
                    [2, 2]])

print("\nOriginal vectors:")
for i, v in enumerate(vectors.T):
    print(f"v{i+1} = {v}")

# Gram-Schmidt process
Q = np.zeros_like(vectors, dtype=float)

# First vector: just normalize
Q[:, 0] = vectors[:, 0] / np.linalg.norm(vectors[:, 0])

# Second vector: subtract projection onto first
v2 = vectors[:, 1]
proj = np.dot(v2, Q[:, 0]) * Q[:, 0]
Q[:, 1] = v2 - proj
Q[:, 1] = Q[:, 1] / np.linalg.norm(Q[:, 1])

print("\nOrthonormal vectors:")
for i in range(Q.shape[1]):
    print(f"q{i+1} = {Q[:, i]}")

print(f"\nVerification - dot product: {np.dot(Q[:, 0], Q[:, 1]):.6f}")
print("(Should be 0)")

# Visualize Gram-Schmidt
ax_gs.quiver(0, 0, vectors[0, 0], vectors[1, 0], angles='xy', 
             scale_units='xy', scale=1, color='blue', width=0.01, 
             label='v1', alpha=0.5)
ax_gs.quiver(0, 0, vectors[0, 1], vectors[1, 1], angles='xy', 
             scale_units='xy', scale=1, color='red', width=0.01, 
             label='v2', alpha=0.5)

# Show orthonormal vectors
scale = 3  # Scale for visibility
ax_gs.quiver(0, 0, scale*Q[0, 0], scale*Q[1, 0], angles='xy', 
             scale_units='xy', scale=1, color='blue', width=0.02, 
             label='q1')
ax_gs.quiver(0, 0, scale*Q[0, 1], scale*Q[1, 1], angles='xy', 
             scale_units='xy', scale=1, color='red', width=0.02, 
             label='q2')

# Show projection
ax_gs.plot([v2[0], proj[0]], [v2[1], proj[1]], 'k--', alpha=0.5)

ax_gs.set_xlim(-1, 4)
ax_gs.set_ylim(-1, 4)
ax_gs.set_aspect('equal')
ax_gs.grid(True, alpha=0.3)
ax_gs.legend()
ax_gs.set_title('Gram-Schmidt Orthogonalization')

plt.tight_layout()
plt.show()

11. Least Square Approximation

Least squares finds the best approximate solution when a system is overdetermined:

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Generate noisy data
np.random.seed(42)
n_points = 50
x = np.linspace(0, 10, n_points)
y_true = 2 * x + 1
y_noise = y_true + np.random.normal(0, 2, n_points)

print("Least Squares Approximation")
print("==========================\n")

# Method 1: Using normal equations
# We want to fit y = ax + b, so A = [x, 1]
A = np.column_stack([x, np.ones(n_points)])

print("Design matrix A shape:", A.shape)
print("First 5 rows of A:")
print(A[:5])

# Normal equations: A^T A x = A^T b
ATA = A.T @ A
ATb = A.T @ y_noise

print("\nNormal equations: A^T A x = A^T b")
print("A^T A =")
print(ATA)
print("\nA^T b =")
print(ATb)

# Solve normal equations
coeffs_normal = np.linalg.solve(ATA, ATb)
print(f"\nSolution via normal equations: a = {coeffs_normal[0]:.4f}, b = {coeffs_normal[1]:.4f}")

# Method 2: Using np.linalg.lstsq
coeffs_lstsq, residuals, rank, s = np.linalg.lstsq(A, y_noise, rcond=None)
print(f"\nSolution via lstsq: a = {coeffs_lstsq[0]:.4f}, b = {coeffs_lstsq[1]:.4f}")
print(f"Residual sum of squares: {residuals[0]:.4f}")
print(f"Rank of A: {rank}")

# Method 3: Using QR decomposition
Q, R = np.linalg.qr(A)
coeffs_qr = np.linalg.solve(R, Q.T @ y_noise)
print(f"\nSolution via QR: a = {coeffs_qr[0]:.4f}, b = {coeffs_qr[1]:.4f}")

# Visualization
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Plot 1: Data and fitted line
ax1 = axes[0, 0]
y_fit = coeffs_lstsq[0] * x + coeffs_lstsq[1]

ax1.scatter(x, y_noise, alpha=0.6, label='Noisy data')
ax1.plot(x, y_true, 'g--', linewidth=2, label='True line: y = 2x + 1')
ax1.plot(x, y_fit, 'r-', linewidth=2, 
         label=f'Fitted: y = {coeffs_lstsq[0]:.2f}x + {coeffs_lstsq[1]:.2f}')

# Plot residuals
for i in range(0, n_points, 5):
    ax1.plot([x[i], x[i]], [y_noise[i], y_fit[i]], 'k-', alpha=0.3, linewidth=1)

ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.set_title('Linear Least Squares Fit')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Residuals
ax2 = axes[0, 1]
residuals_plot = y_noise - y_fit

ax2.scatter(x, residuals_plot, alpha=0.6)
ax2.axhline(y=0, color='r', linestyle='--')
ax2.set_xlabel('x')
ax2.set_ylabel('Residuals')
ax2.set_title('Residual Plot')
ax2.grid(True, alpha=0.3)

# Plot 3: Polynomial fitting
ax3 = axes[1, 0]

# Fit polynomials of different degrees
degrees = [1, 2, 3, 5]
colors = ['blue', 'green', 'orange', 'red']

for deg, color in zip(degrees, colors):
    # Create polynomial features
    A_poly = np.vander(x, deg + 1, increasing=True)
    coeffs_poly = np.linalg.lstsq(A_poly, y_noise, rcond=None)[0]
    
    # Evaluate polynomial
    x_plot = np.linspace(0, 10, 200)
    A_plot = np.vander(x_plot, deg + 1, increasing=True)
    y_poly = A_plot @ coeffs_poly
    
    ax3.plot(x_plot, y_poly, color=color, linewidth=2, 
             label=f'Degree {deg}')

ax3.scatter(x, y_noise, alpha=0.6, color='black', s=20)
ax3.set_xlabel('x')
ax3.set_ylabel('y')
ax3.set_title('Polynomial Least Squares')
ax3.legend()
ax3.grid(True, alpha=0.3)
ax3.set_ylim(-10, 30)

# Plot 4: Geometric interpretation
ax4 = axes[1, 1]

# Simple 3-point example for visualization
x_simple = np.array([1, 2, 3])
y_simple = np.array([2, 3, 5])
A_simple = np.column_stack([x_simple, np.ones(3)])

# Project b onto column space of A
P = A_simple @ np.linalg.inv(A_simple.T @ A_simple) @ A_simple.T
y_proj = P @ y_simple

ax4.scatter(x_simple, y_simple, s=100, color='blue', 
            label='Data points', zorder=5)
ax4.scatter(x_simple, y_proj, s=100, color='red', 
            marker='s', label='Projections', zorder=5)

# Fitted line
coeffs_simple = np.linalg.lstsq(A_simple, y_simple, rcond=None)[0]
x_line = np.linspace(0.5, 3.5, 100)
y_line = coeffs_simple[0] * x_line + coeffs_simple[1]
ax4.plot(x_line, y_line, 'r-', linewidth=2, label='Least squares line')

# Draw projection lines
for i in range(3):
    ax4.plot([x_simple[i], x_simple[i]], 
             [y_simple[i], y_proj[i]], 
             'k--', alpha=0.5)

ax4.set_xlabel('x')
ax4.set_ylabel('y')
ax4.set_title('Geometric View: Projection onto Col(A)')
ax4.legend()
ax4.grid(True, alpha=0.3)
ax4.set_xlim(0.5, 3.5)

plt.tight_layout()
plt.show()

# Weighted least squares
print("\n" + "="*50)
print("Weighted Least Squares")
print("="*50)

# Create weights (inverse of variance)
# Points in the middle have higher confidence (lower variance)
weights = np.exp(-((x - 5)**2) / 10)
W = np.diag(weights)

print("\nWeight matrix (diagonal):")
print(np.diag(W)[:10], "...")

# Weighted normal equations: A^T W A x = A^T W b
ATWA = A.T @ W @ A
ATWb = A.T @ W @ y_noise

coeffs_weighted = np.linalg.solve(ATWA, ATWb)
print(f"\nWeighted LS solution: a = {coeffs_weighted[0]:.4f}, b = {coeffs_weighted[1]:.4f}")
print(f"Unweighted solution: a = {coeffs_lstsq[0]:.4f}, b = {coeffs_lstsq[1]:.4f}")

# Visualize weighted least squares
fig, ax = plt.subplots(figsize=(10, 6))

y_fit_weighted = coeffs_weighted[0] * x + coeffs_weighted[1]
y_fit_unweighted = coeffs_lstsq[0] * x + coeffs_lstsq[1]

# Plot data with size proportional to weight
scatter = ax.scatter(x, y_noise, s=weights*100, alpha=0.6, 
                     c=weights, cmap='viridis', 
                     label='Data (size ∝ weight)')

ax.plot(x, y_fit_weighted, 'r-', linewidth=2, 
        label=f'Weighted LS: y = {coeffs_weighted[0]:.2f}x + {coeffs_weighted[1]:.2f}')
ax.plot(x, y_fit_unweighted, 'b--', linewidth=2, 
        label=f'Unweighted LS: y = {coeffs_lstsq[0]:.2f}x + {coeffs_lstsq[1]:.2f}')

plt.colorbar(scatter, label='Weight')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_title('Weighted vs Unweighted Least Squares')
ax.legend()
ax.grid(True, alpha=0.3)

plt.show()

# Ridge regression (regularized least squares)
print("\n" + "="*50)
print("Ridge Regression (Regularized Least Squares)")
print("="*50)

# Create more features for overfitting demonstration
A_poly = np.vander(x, 10, increasing=True)  # 10th degree polynomial

# Different regularization parameters
lambdas = [0, 0.01, 0.1, 1, 10]

fig, ax = plt.subplots(figsize=(10, 6))

for lam in lambdas:
    # Ridge regression: minimize ||Ax - b||^2 + lambda||x||^2
    # Solution: x = (A^T A + lambda I)^(-1) A^T b
    n_features = A_poly.shape[1]
    coeffs_ridge = np.linalg.solve(
        A_poly.T @ A_poly + lam * np.eye(n_features), 
        A_poly.T @ y_noise
    )
    
    # Evaluate
    x_plot = np.linspace(0, 10, 200)
    A_plot = np.vander(x_plot, 10, increasing=True)
    y_ridge = A_plot @ coeffs_ridge
    
    ax.plot(x_plot, y_ridge, linewidth=2, 
            label=f'λ = {lam}', alpha=0.8)

ax.scatter(x, y_noise, alpha=0.6, color='black', s=20)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_title('Ridge Regression with Different Regularization')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_ylim(-10, 30)

plt.show()