CNN From Scratch
NumPy implementation of a 2D convolution layer
A minimal implementation of a CNN convolution layer using only NumPy. No PyTorch, no TensorFlow - just matrix operations.
Key Concepts
Weights initialized with std = sqrt(2/fan_in) for better gradient flow
Zero-padding to control output dimensions
Step size for sliding the kernel
((H - K + 2P) / S) + 1
Implementation
import numpy as np
class CNNFromScratch:
def __init__(self, in_channels, out_channels, kernel_size, padding, stride=1, bias=False):
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.padding = padding
self.stride = stride
self.bias = np.random.randn(self.out_channels) if bias else None # (C_out,)
# He initialization
f_in = self.kernel_size ** 2 * self.in_channels # fan_in = K * K * C_in
std = np.sqrt(2 / f_in)
self.weight = np.random.randn(
self.out_channels, self.in_channels, self.kernel_size, self.kernel_size
) * std # (C_out, C_in, K, K)
def forward(self, x): # x: (B, C_in, H, W)
B, C, H, W = x.shape
# Apply padding
if self.padding > 0:
x = np.pad(
x,
pad_width=(
(0, 0), # batch
(0, 0), # channel
(self.padding, self.padding), # height
(self.padding, self.padding) # width
),
mode='constant'
) # x: (B, C_in, H + 2P, W + 2P)
# Output dimensions
H_out = ((H - self.kernel_size + 2 * self.padding) // self.stride) + 1
W_out = ((W - self.kernel_size + 2 * self.padding) // self.stride) + 1
output = np.zeros((B, self.out_channels, H_out, W_out)) # (B, C_out, H_out, W_out)
# Convolution
for b in range(B):
for c_out in range(self.out_channels):
for i in range(H_out):
for j in range(W_out):
h_start = i * self.stride
h_end = h_start + self.kernel_size
w_start = j * self.stride
w_end = w_start + self.kernel_size
x_patch = x[b, :, h_start:h_end, w_start:w_end] # (C_in, K, K)
conv_sum = np.sum(x_patch * self.weight[c_out]) # (C_in, K, K) * (C_in, K, K) -> scalar
if self.bias is not None:
conv_sum += self.bias[c_out] # scalar + scalar
output[b, c_out, i, j] = conv_sum
return output # (B, C_out, H_out, W_out)
# Test
if __name__ == "__main__":
conv = CNNFromScratch(in_channels=3, out_channels=5, kernel_size=3, padding=1, stride=1, bias=True)
x = np.random.randn(1, 3, 8, 8) # (B=1, C_in=3, H=8, W=8)
out = conv.forward(x)
print(out.shape) # (1, 5, 8, 8) -> (B, C_out, H_out, W_out)
Complexity
O(B * C_out * H_out * W_out * C_in * K * K)
O(B * C_out * H_out * W_out) for output
Further Reading
Stanford's comprehensive guide to CNNs, covering architecture, layers, and practical considerations.
A Guide to Convolution ArithmeticTechnical paper explaining convolution operations, padding, stride, and transposed convolutions.
Delving Deep into Rectifiers (He Initialization)The original paper introducing He initialization for deep networks with ReLU activations.
Image Kernels Explained VisuallyInteractive visualization of how convolution kernels transform images - blur, sharpen, edge detection.