# mnist-experiment.py -rw-r--r-- 3.4 KiB View raw
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Takes a (28, 28) image flattened to (784,), reduces to 128, then ten
# dimensions, making the final classification with argmax of the
# output.
class MNIST:
    def __init__(self):
        self.layers = [Linear(784,128,bias=False), Linear(128,10,bias=False)]
    def __call__(self,x):
        for layer in self.layers[:-1]:
            x = layer(x).leakyrelu()
        return self.layers[-1](x)

    def __getitem__(self, layer):
        l = lambda x,n: x if n == layer else l(self.layers[n](x).leakyrelu(),n+1)
        return lambda x: l(x,0)

# from tinygrad
# <https://github.com/tinygrad/tinygrad/blob/36ab04ae35f9cec3a9635ad62047a0051ea27d51/extra/datasets/__init__.py#L8-L15>
def fetch_mnist():
    from pathlib import Path
    import gzip
    parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
    dirname = Path(__file__).parent.resolve()
    X_train = parse(dirname / "mnist/train-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
    Y_train = parse(dirname / "mnist/train-labels-idx1-ubyte.gz")[8:]
    X_test = parse(dirname / "mnist/t10k-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
    Y_test = parse(dirname / "mnist/t10k-labels-idx1-ubyte.gz")[8:]
    return X_train, Y_train, X_test, Y_test

X_train, Y_train, X_test, Y_test = fetch_mnist()

def trained(steps=1000):
    net = MNIST()
    opt = SGD([layer.weight for layer in net.layers], lr=3e-4)
    Tensor.training = True
    for step in range(steps):
        sample = np.random.randint(0, X_train.shape[0], size=(64))
        batch = Tensor(X_train[sample], requires_grad=False)
        labels = Tensor(Y_train[sample])
        guess = net(batch)
        loss = guess.sparse_categorical_crossentropy(labels)
        opt.zero_grad()
        loss.backward()
        opt.step()
        if step == steps-1:
            pred = guess.argmax(axis=-1)
            accuracy = (pred == labels).mean()
            print(f"Step: {step+1}\tLoss: {loss.numpy()}\tAccuracy: {accuracy.numpy()}")
    Tensor.training=False
    return net

# Writes out the PCA projection of final layer output for 200 random
# points from the test dataset.
def experiment():
    net = trained()
    sample = np.random.randint(0, X_test.shape[0], size=(200))
    points = Tensor(X_test[sample])
    labels = Tensor(Y_test[sample])

    # Get output of first layer for all points
    guess = net[1](points).numpy()
    basis = PCA2d(guess)
    with open("mnist-experiment.data", "w") as file:
        for index,guess in enumerate(guess):
            classification = labels[index].numpy()
            projection = guess @ basis
            file.write("{}\t{}\t{}\n".format(projection[0], projection[1], classification))

# Same as experiment above, but only evaluates the network on two
# classifications.
def experiment2():
    net = trained()
    typeone = [i for i in range(len(Y_test)) if Y_test[i] == 4]
    typetwo = [i for i in range(len(Y_test)) if Y_test[i] == 7]
    sample = typeone[:100] + typetwo[:100]
    points = Tensor(X_test[sample])
    labels = Tensor(Y_test[sample])

    # Get output of first layer for all points
    guess = net[1](points).numpy()
    basis = PCA2d(guess)
    with open("mnist-experiment-2.data", "w") as file:
        for index,guess in enumerate(guess):
            classification = labels[index].numpy()
            projection = guess @ basis
            file.write("{}\t{}\t{}\n".format(projection[0], projection[1], classification))