# Michels1978.py -rw-r--r-- 4.0 KiB View raw
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
'''
The following is based on "A Concise Extensible Metalanguage for
Translator Implementation", by Douglas L. Michels, 3rd USA-JAPAN Computer
Conference, 1978.

=> https://apps.dtic.mil/sti/citations/ADA036466
=> https://apps.dtic.mil/sti/tr/pdf/ADA036466.pdf

This is a direct transliteration of Michels' interpreter code from the
paper to Python.  It happens that the syntax and semantics he used are
very close to Python already so the code is almost identical.  The major
difference is an explicit check for null input in the Test() function.

The program is very slow (it takes over two seconds to recognize
itself on my laptop!) but that can be dealt with by putting a cache
before the Test() function.  (You can cache other functions too but just
caching the Test() function makes the program fast enough on modern
hardware.)
'''
# (Normally I would assert copyright on this, but because it is so close in
# form and spirit to the code in the paper it doesn't seem appropriate.)
from functools import cache


grammar = ''.join('''\

    G |&:L
        &:R
         :G
       &:L
        :R

    R |&":
        :L
       |&"&
         &:R
          :R
        |&"|
          &:R
           :R
         |&">
           :L
          &""
           :L

    L |"A|"B|"C|"D|"E|"F|"G|"H|"I|"J|"K|"L|"M
      |"N|"O|"P|"Q|"R|"S|"T|"U|"V|"W|"X|"Y|"Z
      |"&|"||":|""|">|"(|")|"'|"_|"[|"]|"=|";"*

'''.split()).replace('_', ' ')

# Here it is for reference:
# G|&:L&:R:G&:L:RR|&"::L|&"&&:R:R|&"|&:R:R|&">:L&"":LL|"A|"B|"C|"D|"E|"F|"G|"H|"I|"J|"K|"L|"M|"N|"O|"P|"Q|"R|"S|"T|"U|"V|"W|"X|"Y|"Z|"&|"||":|""|">|"(|")|"'|" |"[|"]|"=|";"*


# Some string handling utilities.


def first(string):
    return string[0]


def rest(string):
    return string[1:]


def Concat(left, right):
    return f'{left}{right}'


# The Interpreter


def Machine(G, I):
    if Test(G, rest(G), I) and not Remaining(G, rest(G), I):
        return True, Emit(G, rest(G), I)
    return False, ''


@cache
def Test(G, R, I):
    ch = first(R)
    if ':' == ch: return Test(G, Find(G, rest(R)), I)
    if '&' == ch:
        if Test(G, rest(R), I):
            return Test(G, Skip(rest(R)), Remaining(G, rest(R), I))
        return False
    if '|' == ch:
        if Test(G, rest(R), I):
            return True
        return Test(G, Skip(rest(R)), I)
    if '>' == ch: return True
    if '"' == ch: return bool(I) and first(rest(R)) == first(I)
    raise ValueError  # should never get here


def Remaining(G, R, I):
    ch = first(R)
    if ':' == ch: return Remaining(G, Find(G, rest(R)), I)
    if '&' == ch: return Remaining(G, Skip(rest(R)), Remaining(G, rest(R), I))
    if '|' == ch:
        if Test(G, rest(R), I):
            return Remaining(G, rest(R), I)
        return Remaining(G, Skip(rest(R)), I)
    if '>' == ch: return I
    if '"' == ch: return rest(I)
    raise ValueError  # should never get here


def Emit(G, R, I):
    ch = first(R)
    if ':' == ch: return Emit(G, Find(G, rest(R)), I)
    if '&' == ch: return Concat(
        Emit(G, rest(R), I),
        Emit(G, Skip(rest(R)), Remaining(G, rest(R), I))
        )
    if '|' == ch:
        if Test(G, rest(R), I):
            return Emit(G, rest(R), I)
        return Emit(G, Skip(rest(R)), I)
    if '>' == ch: return first(rest(R))
    if '"' == ch: return ''
    raise ValueError  # should never get here


def Skip(R):
    ch = first(R)
    if ':' == ch: return rest(rest(R))
    if '&' == ch: return Skip(Skip(rest(R)))
    if '|' == ch: return Skip(Skip(rest(R)))
    if '>' == ch: return rest(rest(R))
    if '"' == ch: return rest(rest(R))
    raise ValueError  # should never get here


def Find(G, R):
    if first(G) == first(R):
        return rest(G)
    return Find(Skip(rest(G)), R)


if __name__ == '__main__':
    p = Machine(grammar, grammar)
    assert p[0]


# The character recognizer pattern was generated with the help of this
# bit of doggerl.  I use '_' for ' ' (space) and then replace it after
# stripping blanks.
##k = '''ABCDEFGHIJKLMNOPQRSTUVWXYZ&|:">()'_[]=;*'''
##k = list(k)
##k.insert(0, '')
##k = '|"'.join(k)
##print(k)