//
Understanding the relationship between matrices and vectors is fundamental to linear algebra:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# Define vectors
v1 = np.array([3, 2])
v2 = np.array([1, 4])
# Vector operations
v_sum = v1 + v2
v_diff = v1 - v2
scalar_mult = 2 * v1
dot_product = np.dot(v1, v2)
print("Vector v1:", v1)
print("Vector v2:", v2)
print("v1 + v2 =", v_sum)
print("v1 - v2 =", v_diff)
print("2 * v1 =", scalar_mult)
print("v1 · v2 =", dot_product)
# Visualize vectors
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# Left plot: Vector operations
ax1.quiver(0, 0, v1[0], v1[1], angles='xy', scale_units='xy', scale=1, color='blue', width=0.01)
ax1.quiver(0, 0, v2[0], v2[1], angles='xy', scale_units='xy', scale=1, color='red', width=0.01)
ax1.quiver(0, 0, v_sum[0], v_sum[1], angles='xy', scale_units='xy', scale=1, color='green', width=0.01)
# Add labels
ax1.text(v1[0]/2, v1[1]/2, 'v1', fontsize=12, ha='center')
ax1.text(v2[0]/2, v2[1]/2, 'v2', fontsize=12, ha='center')
ax1.text(v_sum[0]/2, v_sum[1]/2, 'v1+v2', fontsize=12, ha='center')
# Parallelogram for vector addition
ax1.plot([v1[0], v_sum[0]], [v1[1], v_sum[1]], 'k--', alpha=0.5)
ax1.plot([v2[0], v_sum[0]], [v2[1], v_sum[1]], 'k--', alpha=0.5)
ax1.set_xlim(-1, 5)
ax1.set_ylim(-1, 7)
ax1.grid(True, alpha=0.3)
ax1.set_aspect('equal')
ax1.set_title('Vector Addition')
ax1.set_xlabel('x')
ax1.set_ylabel('y')
# Right plot: Matrix transformation of vectors
ax2.set_title('Matrix Transformation')
# Define transformation matrix
A = np.array([[2, 1], [-1, 2]])
# Create multiple vectors to transform
theta = np.linspace(0, 2*np.pi, 20)
unit_vectors = np.array([np.cos(theta), np.sin(theta)])
# Transform vectors
transformed = A @ unit_vectors
# Plot original and transformed
ax2.plot(unit_vectors[0], unit_vectors[1], 'b-', linewidth=2, label='Original unit circle')
ax2.plot(transformed[0], transformed[1], 'r-', linewidth=2, label='After transformation A')
# Show some individual transformations
for i in range(0, 20, 5):
ax2.arrow(0, 0, unit_vectors[0, i], unit_vectors[1, i],
head_width=0.1, head_length=0.05, fc='blue', ec='blue', alpha=0.5)
ax2.arrow(0, 0, transformed[0, i], transformed[1, i],
head_width=0.1, head_length=0.05, fc='red', ec='red', alpha=0.5)
ax2.set_xlim(-3, 3)
ax2.set_ylim(-3, 3)
ax2.grid(True, alpha=0.3)
ax2.set_aspect('equal')
ax2.legend()
ax2.set_xlabel('x')
ax2.set_ylabel('y')
plt.tight_layout()
plt.show()
# 3D vectors example
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
# Define 3D vectors
v1_3d = np.array([1, 2, 3])
v2_3d = np.array([2, -1, 1])
v3_3d = np.cross(v1_3d, v2_3d) # Cross product
# Plot vectors
ax.quiver(0, 0, 0, v1_3d[0], v1_3d[1], v1_3d[2], color='blue', arrow_length_ratio=0.1)
ax.quiver(0, 0, 0, v2_3d[0], v2_3d[1], v2_3d[2], color='red', arrow_length_ratio=0.1)
ax.quiver(0, 0, 0, v3_3d[0], v3_3d[1], v3_3d[2], color='green', arrow_length_ratio=0.1)
# Labels
ax.text(v1_3d[0], v1_3d[1], v1_3d[2], 'v1', fontsize=12)
ax.text(v2_3d[0], v2_3d[1], v2_3d[2], 'v2', fontsize=12)
ax.text(v3_3d[0], v3_3d[1], v3_3d[2], 'v1×v2', fontsize=12)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title('Cross Product in 3D')
# Set equal aspect ratio
max_range = 6
ax.set_xlim([-max_range, max_range])
ax.set_ylim([-max_range, max_range])
ax.set_zlim([-max_range, max_range])
plt.show()
print("\nCross product v1 × v2 =", v3_3d)
print("Magnitude of cross product:", np.linalg.norm(v3_3d))
Multiple methods exist for solving Ax = b. Let's explore and compare them:
import numpy as np
import matplotlib.pyplot as plt
import time
# Define a linear system Ax = b
A = np.array([[3, 1, -1],
[2, -2, 4],
[-1, 0.5, -1]])
b = np.array([1, -2, 0])
print("System of equations:")
print("3x + y - z = 1")
print("2x - 2y + 4z = -2")
print("-x + 0.5y - z = 0")
print("\nMatrix form: Ax = b")
print("A =")
print(A)
print("\nb =", b)
# Method 1: Using np.linalg.solve (most efficient)
print("\n=== Method 1: np.linalg.solve ===")
x1 = np.linalg.solve(A, b)
print("Solution x =", x1)
print("Verification: Ax =", A @ x1)
print("Should equal b =", b)
# Method 2: Using inverse matrix (educational, but less efficient)
print("\n=== Method 2: Using inverse matrix ===")
A_inv = np.linalg.inv(A)
x2 = A_inv @ b
print("A^(-1) =")
print(A_inv)
print("Solution x = A^(-1)b =", x2)
# Method 3: Using LU decomposition
print("\n=== Method 3: LU Decomposition ===")
from scipy.linalg import lu
P, L, U = lu(A)
print("L =")
print(L)
print("\nU =")
print(U)
print("\nVerification: LU =")
print(L @ U)
print("Should equal PA =")
print(P @ A)
# Solve using LU decomposition
# First solve Ly = Pb
y = np.linalg.solve(L, P @ b)
# Then solve Ux = y
x3 = np.linalg.solve(U, y)
print("\nSolution x =", x3)
# Compare performance for larger systems
print("\n=== Performance Comparison ===")
sizes = [10, 50, 100, 200, 500]
times_solve = []
times_inv = []
for n in sizes:
# Generate random system
A_large = np.random.rand(n, n) + n * np.eye(n) # Ensure good conditioning
b_large = np.random.rand(n)
# Time np.linalg.solve
start = time.time()
x = np.linalg.solve(A_large, b_large)
times_solve.append(time.time() - start)
# Time inverse method
start = time.time()
x = np.linalg.inv(A_large) @ b_large
times_inv.append(time.time() - start)
# Plot performance comparison
plt.figure(figsize=(10, 6))
plt.semilogy(sizes, times_solve, 'bo-', label='np.linalg.solve', linewidth=2)
plt.semilogy(sizes, times_inv, 'ro-', label='Inverse method', linewidth=2)
plt.xlabel('Matrix size (n×n)')
plt.ylabel('Time (seconds)')
plt.title('Performance Comparison: solve() vs inverse method')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
# Visualizing solution for 2D system
print("\n=== Visualizing 2D Linear System ===")
# Simple 2D system for visualization
A_2d = np.array([[2, 1], [1, 3]])
b_2d = np.array([5, 7])
x_2d = np.linalg.solve(A_2d, b_2d)
print(f"2D System: 2x + y = 5, x + 3y = 7")
print(f"Solution: x = {x_2d[0]:.2f}, y = {x_2d[1]:.2f}")
# Plot the lines
fig, ax = plt.subplots(figsize=(8, 8))
x_range = np.linspace(-1, 4, 100)
# Line 1: 2x + y = 5 => y = 5 - 2x
y1 = 5 - 2*x_range
# Line 2: x + 3y = 7 => y = (7 - x)/3
y2 = (7 - x_range) / 3
ax.plot(x_range, y1, 'b-', linewidth=2, label='2x + y = 5')
ax.plot(x_range, y2, 'r-', linewidth=2, label='x + 3y = 7')
ax.plot(x_2d[0], x_2d[1], 'go', markersize=10, label=f'Solution ({x_2d[0]:.2f}, {x_2d[1]:.2f})')
ax.set_xlim(-1, 4)
ax.set_ylim(-1, 4)
ax.grid(True, alpha=0.3)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_title('Graphical Solution of Linear System')
ax.legend()
ax.set_aspect('equal')
plt.show()
Elementary row operations are the foundation of Gaussian elimination:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML
def print_augmented_matrix(A, b, title=""):
"""Pretty print augmented matrix [A|b]"""
n = A.shape[0]
print(title)
print("-" * 40)
for i in range(n):
row_str = "[ "
for j in range(n):
row_str += f"{A[i,j]:7.2f} "
row_str += f"| {b[i]:7.2f} ]"
print(row_str)
print()
# Initial system
A = np.array([[2., 1., -1.],
[4., 5., -3.],
[-2., 5., -2.]], dtype=float)
b = np.array([8., 20., -2.], dtype=float)
print("Elementary Row Operations Demo")
print("==============================\n")
print("Original System:")
print("2x + y - z = 8")
print("4x + 5y - 3z = 20")
print("-2x + 5y - 2z = -2\n")
print_augmented_matrix(A, b, "Initial Augmented Matrix:")
# Create copies for manipulation
A_work = A.copy()
b_work = b.copy()
# Row Operation 1: R2 = R2 - 2*R1
print("Row Operation 1: R2 = R2 - 2*R1")
A_work[1] = A_work[1] - 2 * A_work[0]
b_work[1] = b_work[1] - 2 * b_work[0]
print_augmented_matrix(A_work, b_work, "After R2 = R2 - 2*R1:")
# Row Operation 2: R3 = R3 + R1
print("Row Operation 2: R3 = R3 + R1")
A_work[2] = A_work[2] + A_work[0]
b_work[2] = b_work[2] + b_work[0]
print_augmented_matrix(A_work, b_work, "After R3 = R3 + R1:")
# Row Operation 3: R3 = R3 - 2*R2
print("Row Operation 3: R3 = R3 - 2*R2")
A_work[2] = A_work[2] - 2 * A_work[1]
b_work[2] = b_work[2] - 2 * b_work[1]
print_augmented_matrix(A_work, b_work, "After R3 = R3 - 2*R2:")
# Back substitution
print("Back Substitution:")
print("-" * 40)
z = b_work[2] / A_work[2, 2]
y = (b_work[1] - A_work[1, 2] * z) / A_work[1, 1]
x = (b_work[0] - A_work[0, 1] * y - A_work[0, 2] * z) / A_work[0, 0]
print(f"From row 3: z = {b_work[2]:.2f} / {A_work[2,2]:.2f} = {z:.2f}")
print(f"From row 2: y = ({b_work[1]:.2f} - {A_work[1,2]:.2f}*{z:.2f}) / {A_work[1,1]:.2f} = {y:.2f}")
print(f"From row 1: x = ({b_work[0]:.2f} - {A_work[0,1]:.2f}*{y:.2f} - {A_work[0,2]:.2f}*{z:.2f}) / {A_work[0,0]:.2f} = {x:.2f}")
print(f"\nSolution: x = {x:.2f}, y = {y:.2f}, z = {z:.2f}")
# Verify solution
print("\nVerification:")
print(f"2({x:.2f}) + {y:.2f} - {z:.2f} = {2*x + y - z:.2f} (should be 8)")
print(f"4({x:.2f}) + 5({y:.2f}) - 3({z:.2f}) = {4*x + 5*y - 3*z:.2f} (should be 20)")
print(f"-2({x:.2f}) + 5({y:.2f}) - 2({z:.2f}) = {-2*x + 5*y - 2*z:.2f} (should be -2)")
# Visualization of row operations effect
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# Function to plot matrix as heatmap
def plot_matrix(ax, matrix, title):
im = ax.imshow(matrix, cmap='RdBu', aspect='equal', vmin=-10, vmax=10)
ax.set_title(title)
# Add text annotations
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
ax.text(j, i, f'{matrix[i, j]:.1f}', ha='center', va='center', fontsize=12)
ax.set_xticks(range(matrix.shape[1]))
ax.set_yticks(range(matrix.shape[0]))
ax.set_xticklabels(['x', 'y', 'z', 'b'])
ax.set_yticklabels(['Eq 1', 'Eq 2', 'Eq 3'])
return im
# Plot original matrix
augmented_original = np.column_stack([A, b])
im1 = plot_matrix(axes[0, 0], augmented_original, 'Original System')
# Plot after first operation
A1 = A.copy()
b1 = b.copy()
A1[1] = A1[1] - 2 * A1[0]
b1[1] = b1[1] - 2 * b1[0]
augmented1 = np.column_stack([A1, b1])
im2 = plot_matrix(axes[0, 1], augmented1, 'After R2 = R2 - 2*R1')
# Plot after second operation
A2 = A1.copy()
b2 = b1.copy()
A2[2] = A2[2] + A2[0]
b2[2] = b2[2] + b2[0]
augmented2 = np.column_stack([A2, b2])
im3 = plot_matrix(axes[1, 0], augmented2, 'After R3 = R3 + R1')
# Plot final upper triangular form
augmented_final = np.column_stack([A_work, b_work])
im4 = plot_matrix(axes[1, 1], augmented_final, 'Final Upper Triangular Form')
plt.tight_layout()
plt.show()
# Interactive row operation calculator
print("\n" + "="*50)
print("Row Operation Calculator")
print("Try different row operations!")
print("="*50)
def apply_row_operation(A, operation_type, row1, row2=None, scalar=1):
"""
Apply elementary row operation to matrix A
operation_type: 'swap', 'scale', 'add'
"""
A_new = A.copy()
if operation_type == 'swap':
A_new[[row1, row2]] = A_new[[row2, row1]]
print(f"Swapped rows {row1+1} and {row2+1}")
elif operation_type == 'scale':
A_new[row1] = scalar * A_new[row1]
print(f"Scaled row {row1+1} by {scalar}")
elif operation_type == 'add':
A_new[row1] = A_new[row1] + scalar * A_new[row2]
print(f"Added {scalar} times row {row2+1} to row {row1+1}")
return A_new
# Example usage
A_example = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float)
print("\nExample matrix:")
print(A_example)
print("\nApplying row operations:")
A_example = apply_row_operation(A_example, 'scale', 0, scalar=2)
print(A_example)
A_example = apply_row_operation(A_example, 'add', 1, 0, scalar=-2)
print(A_example)
LU decomposition factors a matrix A into a lower triangular matrix L and upper triangular matrix U:
import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import lu, lu_factor, lu_solve
import time
# Example matrix
A = np.array([[4, 3, -1],
[2, 4, 3],
[1, -2, 4]], dtype=float)
print("LU Factorization Tutorial")
print("========================\n")
print("Original matrix A:")
print(A)
# Perform LU decomposition
P, L, U = lu(A)
print("\nLU Decomposition: PA = LU")
print("\nPermutation matrix P:")
print(P)
print("\nLower triangular matrix L:")
print(L)
print("\nUpper triangular matrix U:")
print(U)
# Verify decomposition
print("\nVerification: LU =")
print(L @ U)
print("\nPA =")
print(P @ A)
print("\nDifference (should be near zero):", np.max(np.abs(P @ A - L @ U)))
# Visualize the matrices
fig, axes = plt.subplots(2, 3, figsize=(12, 8))
# Helper function to plot matrix
def plot_matrix_lu(ax, matrix, title, mask_upper=False, mask_lower=False):
matrix_plot = matrix.copy()
# Create mask for triangular display
if mask_upper:
for i in range(matrix.shape[0]):
for j in range(i+1, matrix.shape[1]):
matrix_plot[i, j] = 0
if mask_lower:
for i in range(1, matrix.shape[0]):
for j in range(i):
matrix_plot[i, j] = 0
im = ax.imshow(matrix_plot, cmap='RdBu', aspect='equal')
ax.set_title(title)
# Add text annotations
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if not (mask_upper and j > i) and not (mask_lower and j < i):
ax.text(j, i, f'{matrix[i, j]:.2f}',
ha='center', va='center', fontsize=10)
ax.set_xticks([])
ax.set_yticks([])
return im
# Plot matrices
plot_matrix_lu(axes[0, 0], A, 'Original Matrix A')
plot_matrix_lu(axes[0, 1], L, 'Lower Triangular L', mask_upper=True)
plot_matrix_lu(axes[0, 2], U, 'Upper Triangular U', mask_lower=True)
plot_matrix_lu(axes[1, 0], P, 'Permutation P')
plot_matrix_lu(axes[1, 1], L @ U, 'Product LU')
plot_matrix_lu(axes[1, 2], P @ A, 'Product PA')
plt.tight_layout()
plt.show()
# Using LU decomposition to solve linear systems
print("\n" + "="*50)
print("Solving Linear Systems with LU Decomposition")
print("="*50)
# Define multiple right-hand sides
B = np.array([[1, 2],
[2, 3],
[3, 4]])
print("\nSolving AX = B where B has multiple columns:")
print("B =")
print(B)
# Method 1: Direct solve for each column
print("\nMethod 1: Solve for each column separately")
X1 = np.zeros_like(B)
for i in range(B.shape[1]):
X1[:, i] = np.linalg.solve(A, B[:, i])
print("Solution X:")
print(X1)
# Method 2: Using LU decomposition (more efficient for multiple RHS)
print("\nMethod 2: Using LU decomposition")
lu_factored, piv = lu_factor(A)
X2 = lu_solve((lu_factored, piv), B)
print("Solution X:")
print(X2)
# Verify solutions
print("\nVerification: AX =")
print(A @ X2)
print("Should equal B =")
print(B)
# Performance comparison
print("\n" + "="*50)
print("Performance Comparison: Direct vs LU")
print("="*50)
n_sizes = [50, 100, 200, 300]
n_rhs = [1, 5, 10, 20] # Number of right-hand sides
results = {'direct': {}, 'lu': {}}
for n in n_sizes:
results['direct'][n] = []
results['lu'][n] = []
# Generate random matrix
A_test = np.random.rand(n, n)
for k in n_rhs:
B_test = np.random.rand(n, k)
# Time direct method
start = time.time()
for i in range(k):
np.linalg.solve(A_test, B_test[:, i])
time_direct = time.time() - start
results['direct'][n].append(time_direct)
# Time LU method
start = time.time()
lu_fac, piv = lu_factor(A_test)
lu_solve((lu_fac, piv), B_test)
time_lu = time.time() - start
results['lu'][n].append(time_lu)
# Plot results
fig, ax = plt.subplots(figsize=(10, 6))
for i, n in enumerate(n_sizes):
ax.plot(n_rhs, results['direct'][n], 'o-',
label=f'Direct (n={n})', color=f'C{i}')
ax.plot(n_rhs, results['lu'][n], 's--',
label=f'LU (n={n})', color=f'C{i}')
ax.set_xlabel('Number of right-hand sides')
ax.set_ylabel('Time (seconds)')
ax.set_title('Performance: Direct solve vs LU decomposition')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(True, alpha=0.3)
ax.set_yscale('log')
plt.tight_layout()
plt.show()
# Interactive LU decomposition step-by-step
print("\n" + "="*50)
print("Step-by-Step LU Decomposition")
print("="*50)
def lu_decomposition_steps(A):
n = A.shape[0]
L = np.eye(n)
U = A.copy()
print("Initial: U = A, L = I")
print("U =")
print(U)
print("\nL =")
print(L)
for k in range(n-1):
print(f"\nStep {k+1}: Eliminate column {k+1} below diagonal")
for i in range(k+1, n):
factor = U[i, k] / U[k, k]
L[i, k] = factor
U[i, k:] = U[i, k:] - factor * U[k, k:]
print(f" L[{i+1},{k+1}] = {factor:.3f}")
print("\nUpdated U:")
print(U)
print("\nUpdated L:")
print(L)
return L, U
# Example
A_demo = np.array([[2, 1, 1],
[4, 3, 3],
[8, 7, 9]], dtype=float)
print("\nDemonstration with matrix:")
print(A_demo)
L_demo, U_demo = lu_decomposition_steps(A_demo)
print("\n" + "="*30)
print("Final result:")
print("L @ U =")
print(L_demo @ U_demo)
print("\nOriginal A =")
print(A_demo)
print("\nDifference:", np.max(np.abs(A_demo - L_demo @ U_demo)))
Projection operators are fundamental in many applications including least squares and signal processing:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# 2D Projection Example
print("2D Vector Projection")
print("==================\n")
# Define vectors
v = np.array([3, 4]) # Vector to project
u = np.array([1, 0]) # Vector to project onto
# Projection formula: proj_u(v) = (v·u / u·u) * u
proj_scalar = np.dot(v, u) / np.dot(u, u)
proj_v_onto_u = proj_scalar * u
print(f"Vector v = {v}")
print(f"Vector u = {u}")
print(f"Projection of v onto u = {proj_v_onto_u}")
print(f"Orthogonal component = {v - proj_v_onto_u}")
# Visualize 2D projection
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# Left plot: Vector projection
ax1.quiver(0, 0, v[0], v[1], angles='xy', scale_units='xy', scale=1,
color='blue', width=0.01, label='v')
ax1.quiver(0, 0, u[0], u[1], angles='xy', scale_units='xy', scale=1,
color='red', width=0.01, label='u')
ax1.quiver(0, 0, proj_v_onto_u[0], proj_v_onto_u[1], angles='xy',
scale_units='xy', scale=1, color='green', width=0.01,
label='proj_u(v)')
# Draw orthogonal component
orthogonal = v - proj_v_onto_u
ax1.quiver(proj_v_onto_u[0], proj_v_onto_u[1], orthogonal[0],
orthogonal[1], angles='xy', scale_units='xy', scale=1,
color='orange', width=0.01, label='v - proj_u(v)')
# Draw dashed lines
ax1.plot([v[0], proj_v_onto_u[0]], [v[1], proj_v_onto_u[1]],
'k--', alpha=0.5)
ax1.set_xlim(-1, 5)
ax1.set_ylim(-1, 5)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
ax1.legend()
ax1.set_title('Vector Projection in 2D')
ax1.set_xlabel('x')
ax1.set_ylabel('y')
# Right plot: Projection onto a subspace
ax2.set_title('Projection onto a Line (1D Subspace)')
# Generate points
t = np.linspace(-5, 5, 100)
line_direction = np.array([2, 1]) # Direction of the line
line_points = np.outer(t, line_direction)
# Plot the line
ax2.plot(line_points[:, 0], line_points[:, 1], 'r-',
linewidth=2, label='Subspace (line)')
# Project several points onto the line
points = np.array([[3, 1], [1, 3], [-1, 2], [2, -1]])
colors = ['blue', 'green', 'purple', 'orange']
for i, point in enumerate(points):
# Project point onto line
proj_scalar = np.dot(point, line_direction) / np.dot(line_direction, line_direction)
projection = proj_scalar * line_direction
# Plot original point and projection
ax2.plot(point[0], point[1], 'o', color=colors[i], markersize=8)
ax2.plot(projection[0], projection[1], 's', color=colors[i], markersize=8)
# Draw projection line
ax2.plot([point[0], projection[0]], [point[1], projection[1]],
'--', color=colors[i], alpha=0.5)
# Label
ax2.text(point[0] + 0.1, point[1] + 0.1, f'p{i+1}', fontsize=10)
ax2.set_xlim(-4, 4)
ax2.set_ylim(-4, 4)
ax2.set_aspect('equal')
ax2.grid(True, alpha=0.3)
ax2.legend()
ax2.set_xlabel('x')
ax2.set_ylabel('y')
plt.tight_layout()
plt.show()
# Projection Matrix
print("\n" + "="*50)
print("Projection Matrices")
print("="*50)
# For projection onto span of columns of A
A = np.array([[1, 0],
[1, 1],
[1, 2]])
print("\nMatrix A (we'll project onto its column space):")
print(A)
# Projection matrix P = A(A^T A)^(-1)A^T
ATA = A.T @ A
print("\nA^T A =")
print(ATA)
ATA_inv = np.linalg.inv(ATA)
P = A @ ATA_inv @ A.T
print("\nProjection matrix P = A(A^T A)^(-1)A^T:")
print(P)
# Properties of projection matrix
print("\nProperties of projection matrix:")
print(f"1. P is symmetric: {np.allclose(P, P.T)}")
print(f"2. P^2 = P (idempotent): {np.allclose(P @ P, P)}")
print(f"3. Eigenvalues of P: {np.linalg.eigvals(P)}")
print(" (Should be 0s and 1s)")
# Example: Project a vector onto column space of A
b = np.array([2, 1, 4])
proj_b = P @ b
print(f"\nOriginal vector b = {b}")
print(f"Projection of b onto Col(A) = {proj_b}")
print(f"Residual (orthogonal to Col(A)) = {b - proj_b}")
# Verify orthogonality
print(f"\nVerification - residual orthogonal to columns of A:")
print(f"A^T @ residual = {A.T @ (b - proj_b)}")
print("(Should be close to zero)")
# 3D Visualization
fig = plt.figure(figsize=(12, 10))
# 3D projection onto a plane
ax3d = fig.add_subplot(221, projection='3d')
# Define a plane by two vectors
v1 = np.array([1, 0, 0])
v2 = np.array([0, 1, 0.5])
# Create plane mesh
u_range = np.linspace(-2, 2, 10)
v_range = np.linspace(-2, 2, 10)
U, V = np.meshgrid(u_range, v_range)
# Plane points
X = U * v1[0] + V * v2[0]
Y = U * v1[1] + V * v2[1]
Z = U * v1[2] + V * v2[2]
ax3d.plot_surface(X, Y, Z, alpha=0.3, color='lightblue')
# Point to project
point_3d = np.array([1, 1, 2])
# Project onto plane span(v1, v2)
A_plane = np.column_stack([v1, v2])
P_plane = A_plane @ np.linalg.inv(A_plane.T @ A_plane) @ A_plane.T
proj_point = P_plane @ point_3d
# Plot point and projection
ax3d.scatter(*point_3d, color='red', s=100, label='Original point')
ax3d.scatter(*proj_point, color='green', s=100, label='Projection')
# Draw projection line
ax3d.plot([point_3d[0], proj_point[0]],
[point_3d[1], proj_point[1]],
[point_3d[2], proj_point[2]], 'k--')
ax3d.set_xlabel('X')
ax3d.set_ylabel('Y')
ax3d.set_zlabel('Z')
ax3d.set_title('3D Projection onto a Plane')
ax3d.legend()
# Application: Gram-Schmidt Orthogonalization
ax_gs = fig.add_subplot(222)
print("\n" + "="*50)
print("Application: Gram-Schmidt Process")
print("="*50)
# Input vectors
vectors = np.array([[3, 1],
[2, 2]])
print("\nOriginal vectors:")
for i, v in enumerate(vectors.T):
print(f"v{i+1} = {v}")
# Gram-Schmidt process
Q = np.zeros_like(vectors, dtype=float)
# First vector: just normalize
Q[:, 0] = vectors[:, 0] / np.linalg.norm(vectors[:, 0])
# Second vector: subtract projection onto first
v2 = vectors[:, 1]
proj = np.dot(v2, Q[:, 0]) * Q[:, 0]
Q[:, 1] = v2 - proj
Q[:, 1] = Q[:, 1] / np.linalg.norm(Q[:, 1])
print("\nOrthonormal vectors:")
for i in range(Q.shape[1]):
print(f"q{i+1} = {Q[:, i]}")
print(f"\nVerification - dot product: {np.dot(Q[:, 0], Q[:, 1]):.6f}")
print("(Should be 0)")
# Visualize Gram-Schmidt
ax_gs.quiver(0, 0, vectors[0, 0], vectors[1, 0], angles='xy',
scale_units='xy', scale=1, color='blue', width=0.01,
label='v1', alpha=0.5)
ax_gs.quiver(0, 0, vectors[0, 1], vectors[1, 1], angles='xy',
scale_units='xy', scale=1, color='red', width=0.01,
label='v2', alpha=0.5)
# Show orthonormal vectors
scale = 3 # Scale for visibility
ax_gs.quiver(0, 0, scale*Q[0, 0], scale*Q[1, 0], angles='xy',
scale_units='xy', scale=1, color='blue', width=0.02,
label='q1')
ax_gs.quiver(0, 0, scale*Q[0, 1], scale*Q[1, 1], angles='xy',
scale_units='xy', scale=1, color='red', width=0.02,
label='q2')
# Show projection
ax_gs.plot([v2[0], proj[0]], [v2[1], proj[1]], 'k--', alpha=0.5)
ax_gs.set_xlim(-1, 4)
ax_gs.set_ylim(-1, 4)
ax_gs.set_aspect('equal')
ax_gs.grid(True, alpha=0.3)
ax_gs.legend()
ax_gs.set_title('Gram-Schmidt Orthogonalization')
plt.tight_layout()
plt.show()
Least squares finds the best approximate solution when a system is overdetermined:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
# Generate noisy data
np.random.seed(42)
n_points = 50
x = np.linspace(0, 10, n_points)
y_true = 2 * x + 1
y_noise = y_true + np.random.normal(0, 2, n_points)
print("Least Squares Approximation")
print("==========================\n")
# Method 1: Using normal equations
# We want to fit y = ax + b, so A = [x, 1]
A = np.column_stack([x, np.ones(n_points)])
print("Design matrix A shape:", A.shape)
print("First 5 rows of A:")
print(A[:5])
# Normal equations: A^T A x = A^T b
ATA = A.T @ A
ATb = A.T @ y_noise
print("\nNormal equations: A^T A x = A^T b")
print("A^T A =")
print(ATA)
print("\nA^T b =")
print(ATb)
# Solve normal equations
coeffs_normal = np.linalg.solve(ATA, ATb)
print(f"\nSolution via normal equations: a = {coeffs_normal[0]:.4f}, b = {coeffs_normal[1]:.4f}")
# Method 2: Using np.linalg.lstsq
coeffs_lstsq, residuals, rank, s = np.linalg.lstsq(A, y_noise, rcond=None)
print(f"\nSolution via lstsq: a = {coeffs_lstsq[0]:.4f}, b = {coeffs_lstsq[1]:.4f}")
print(f"Residual sum of squares: {residuals[0]:.4f}")
print(f"Rank of A: {rank}")
# Method 3: Using QR decomposition
Q, R = np.linalg.qr(A)
coeffs_qr = np.linalg.solve(R, Q.T @ y_noise)
print(f"\nSolution via QR: a = {coeffs_qr[0]:.4f}, b = {coeffs_qr[1]:.4f}")
# Visualization
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# Plot 1: Data and fitted line
ax1 = axes[0, 0]
y_fit = coeffs_lstsq[0] * x + coeffs_lstsq[1]
ax1.scatter(x, y_noise, alpha=0.6, label='Noisy data')
ax1.plot(x, y_true, 'g--', linewidth=2, label='True line: y = 2x + 1')
ax1.plot(x, y_fit, 'r-', linewidth=2,
label=f'Fitted: y = {coeffs_lstsq[0]:.2f}x + {coeffs_lstsq[1]:.2f}')
# Plot residuals
for i in range(0, n_points, 5):
ax1.plot([x[i], x[i]], [y_noise[i], y_fit[i]], 'k-', alpha=0.3, linewidth=1)
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.set_title('Linear Least Squares Fit')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Plot 2: Residuals
ax2 = axes[0, 1]
residuals_plot = y_noise - y_fit
ax2.scatter(x, residuals_plot, alpha=0.6)
ax2.axhline(y=0, color='r', linestyle='--')
ax2.set_xlabel('x')
ax2.set_ylabel('Residuals')
ax2.set_title('Residual Plot')
ax2.grid(True, alpha=0.3)
# Plot 3: Polynomial fitting
ax3 = axes[1, 0]
# Fit polynomials of different degrees
degrees = [1, 2, 3, 5]
colors = ['blue', 'green', 'orange', 'red']
for deg, color in zip(degrees, colors):
# Create polynomial features
A_poly = np.vander(x, deg + 1, increasing=True)
coeffs_poly = np.linalg.lstsq(A_poly, y_noise, rcond=None)[0]
# Evaluate polynomial
x_plot = np.linspace(0, 10, 200)
A_plot = np.vander(x_plot, deg + 1, increasing=True)
y_poly = A_plot @ coeffs_poly
ax3.plot(x_plot, y_poly, color=color, linewidth=2,
label=f'Degree {deg}')
ax3.scatter(x, y_noise, alpha=0.6, color='black', s=20)
ax3.set_xlabel('x')
ax3.set_ylabel('y')
ax3.set_title('Polynomial Least Squares')
ax3.legend()
ax3.grid(True, alpha=0.3)
ax3.set_ylim(-10, 30)
# Plot 4: Geometric interpretation
ax4 = axes[1, 1]
# Simple 3-point example for visualization
x_simple = np.array([1, 2, 3])
y_simple = np.array([2, 3, 5])
A_simple = np.column_stack([x_simple, np.ones(3)])
# Project b onto column space of A
P = A_simple @ np.linalg.inv(A_simple.T @ A_simple) @ A_simple.T
y_proj = P @ y_simple
ax4.scatter(x_simple, y_simple, s=100, color='blue',
label='Data points', zorder=5)
ax4.scatter(x_simple, y_proj, s=100, color='red',
marker='s', label='Projections', zorder=5)
# Fitted line
coeffs_simple = np.linalg.lstsq(A_simple, y_simple, rcond=None)[0]
x_line = np.linspace(0.5, 3.5, 100)
y_line = coeffs_simple[0] * x_line + coeffs_simple[1]
ax4.plot(x_line, y_line, 'r-', linewidth=2, label='Least squares line')
# Draw projection lines
for i in range(3):
ax4.plot([x_simple[i], x_simple[i]],
[y_simple[i], y_proj[i]],
'k--', alpha=0.5)
ax4.set_xlabel('x')
ax4.set_ylabel('y')
ax4.set_title('Geometric View: Projection onto Col(A)')
ax4.legend()
ax4.grid(True, alpha=0.3)
ax4.set_xlim(0.5, 3.5)
plt.tight_layout()
plt.show()
# Weighted least squares
print("\n" + "="*50)
print("Weighted Least Squares")
print("="*50)
# Create weights (inverse of variance)
# Points in the middle have higher confidence (lower variance)
weights = np.exp(-((x - 5)**2) / 10)
W = np.diag(weights)
print("\nWeight matrix (diagonal):")
print(np.diag(W)[:10], "...")
# Weighted normal equations: A^T W A x = A^T W b
ATWA = A.T @ W @ A
ATWb = A.T @ W @ y_noise
coeffs_weighted = np.linalg.solve(ATWA, ATWb)
print(f"\nWeighted LS solution: a = {coeffs_weighted[0]:.4f}, b = {coeffs_weighted[1]:.4f}")
print(f"Unweighted solution: a = {coeffs_lstsq[0]:.4f}, b = {coeffs_lstsq[1]:.4f}")
# Visualize weighted least squares
fig, ax = plt.subplots(figsize=(10, 6))
y_fit_weighted = coeffs_weighted[0] * x + coeffs_weighted[1]
y_fit_unweighted = coeffs_lstsq[0] * x + coeffs_lstsq[1]
# Plot data with size proportional to weight
scatter = ax.scatter(x, y_noise, s=weights*100, alpha=0.6,
c=weights, cmap='viridis',
label='Data (size ∝ weight)')
ax.plot(x, y_fit_weighted, 'r-', linewidth=2,
label=f'Weighted LS: y = {coeffs_weighted[0]:.2f}x + {coeffs_weighted[1]:.2f}')
ax.plot(x, y_fit_unweighted, 'b--', linewidth=2,
label=f'Unweighted LS: y = {coeffs_lstsq[0]:.2f}x + {coeffs_lstsq[1]:.2f}')
plt.colorbar(scatter, label='Weight')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_title('Weighted vs Unweighted Least Squares')
ax.legend()
ax.grid(True, alpha=0.3)
plt.show()
# Ridge regression (regularized least squares)
print("\n" + "="*50)
print("Ridge Regression (Regularized Least Squares)")
print("="*50)
# Create more features for overfitting demonstration
A_poly = np.vander(x, 10, increasing=True) # 10th degree polynomial
# Different regularization parameters
lambdas = [0, 0.01, 0.1, 1, 10]
fig, ax = plt.subplots(figsize=(10, 6))
for lam in lambdas:
# Ridge regression: minimize ||Ax - b||^2 + lambda||x||^2
# Solution: x = (A^T A + lambda I)^(-1) A^T b
n_features = A_poly.shape[1]
coeffs_ridge = np.linalg.solve(
A_poly.T @ A_poly + lam * np.eye(n_features),
A_poly.T @ y_noise
)
# Evaluate
x_plot = np.linspace(0, 10, 200)
A_plot = np.vander(x_plot, 10, increasing=True)
y_ridge = A_plot @ coeffs_ridge
ax.plot(x_plot, y_ridge, linewidth=2,
label=f'λ = {lam}', alpha=0.8)
ax.scatter(x, y_noise, alpha=0.6, color='black', s=20)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_title('Ridge Regression with Different Regularization')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_ylim(-10, 30)
plt.show()