# Takes a (28, 28) image flattened to (784,), reduces to 128, then ten
# dimensions, making the final classification with argmax of the
# output.
class MNIST:
def __init__(self):
self.layers = [Linear(784,128,bias=False), Linear(128,10,bias=False)]
def __call__(self,x):
for layer in self.layers[:-1]:
x = layer(x).leakyrelu()
return self.layers[-1](x)
def __getitem__(self, layer):
l = lambda x,n: x if n == layer else l(self.layers[n](x).leakyrelu(),n+1)
return lambda x: l(x,0)
# from tinygrad
# <https://github.com/tinygrad/tinygrad/blob/36ab04ae35f9cec3a9635ad62047a0051ea27d51/extra/datasets/__init__.py#L8-L15>
def fetch_mnist():
from pathlib import Path
import gzip
parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
dirname = Path(__file__).parent.resolve()
X_train = parse(dirname / "mnist/train-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_train = parse(dirname / "mnist/train-labels-idx1-ubyte.gz")[8:]
X_test = parse(dirname / "mnist/t10k-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_test = parse(dirname / "mnist/t10k-labels-idx1-ubyte.gz")[8:]
return X_train, Y_train, X_test, Y_test
X_train, Y_train, X_test, Y_test = fetch_mnist()
def trained(steps=1000):
net = MNIST()
opt = SGD([layer.weight for layer in net.layers], lr=3e-4)
Tensor.training = True
for step in range(steps):
sample = np.random.randint(0, X_train.shape[0], size=(64))
batch = Tensor(X_train[sample], requires_grad=False)
labels = Tensor(Y_train[sample])
guess = net(batch)
loss = guess.sparse_categorical_crossentropy(labels)
opt.zero_grad()
loss.backward()
opt.step()
if step == steps-1:
pred = guess.argmax(axis=-1)
accuracy = (pred == labels).mean()
print(f"Step: {step+1}\tLoss: {loss.numpy()}\tAccuracy: {accuracy.numpy()}")
Tensor.training=False
return net
# Writes out the PCA projection of final layer output for 200 random
# points from the test dataset.
def experiment():
net = trained()
sample = np.random.randint(0, X_test.shape[0], size=(200))
points = Tensor(X_test[sample])
labels = Tensor(Y_test[sample])
# Get output of first layer for all points
guess = net[1](points).numpy()
basis = PCA2d(guess)
with open("mnist-experiment.data", "w") as file:
for index,guess in enumerate(guess):
classification = labels[index].numpy()
projection = guess @ basis
file.write("{}\t{}\t{}\n".format(projection[0], projection[1], classification))
# Same as experiment above, but only evaluates the network on two
# classifications.
def experiment2():
net = trained()
typeone = [i for i in range(len(Y_test)) if Y_test[i] == 4]
typetwo = [i for i in range(len(Y_test)) if Y_test[i] == 7]
sample = typeone[:100] + typetwo[:100]
points = Tensor(X_test[sample])
labels = Tensor(Y_test[sample])
# Get output of first layer for all points
guess = net[1](points).numpy()
basis = PCA2d(guess)
with open("mnist-experiment-2.data", "w") as file:
for index,guess in enumerate(guess):
classification = labels[index].numpy()
projection = guess @ basis
file.write("{}\t{}\t{}\n".format(projection[0], projection[1], classification))