'''
The following is based on "A Concise Extensible Metalanguage for
Translator Implementation", by Douglas L. Michels, 3rd USA-JAPAN Computer
Conference, 1978.
=> https://apps.dtic.mil/sti/citations/ADA036466
=> https://apps.dtic.mil/sti/tr/pdf/ADA036466.pdf
This is a direct transliteration of Michels' interpreter code from the
paper to Python. It happens that the syntax and semantics he used are
very close to Python already so the code is almost identical. The major
difference is an explicit check for null input in the Test() function.
The program is very slow (it takes over two seconds to recognize
itself on my laptop!) but that can be dealt with by putting a cache
before the Test() function. (You can cache other functions too but just
caching the Test() function makes the program fast enough on modern
hardware.)
'''
# (Normally I would assert copyright on this, but because it is so close in
# form and spirit to the code in the paper it doesn't seem appropriate.)
from functools import cache
grammar = ''.join('''\
G |&:L
&:R
:G
&:L
:R
R |&":
:L
|&"&
&:R
:R
|&"|
&:R
:R
|&">
:L
&""
:L
L |"A|"B|"C|"D|"E|"F|"G|"H|"I|"J|"K|"L|"M
|"N|"O|"P|"Q|"R|"S|"T|"U|"V|"W|"X|"Y|"Z
|"&|"||":|""|">|"(|")|"'|"_|"[|"]|"=|";"*
'''.split()).replace('_', ' ')
# Here it is for reference:
# G|&:L&:R:G&:L:RR|&"::L|&"&&:R:R|&"|&:R:R|&">:L&"":LL|"A|"B|"C|"D|"E|"F|"G|"H|"I|"J|"K|"L|"M|"N|"O|"P|"Q|"R|"S|"T|"U|"V|"W|"X|"Y|"Z|"&|"||":|""|">|"(|")|"'|" |"[|"]|"=|";"*
# Some string handling utilities.
def first(string):
return string[0]
def rest(string):
return string[1:]
def Concat(left, right):
return f'{left}{right}'
# The Interpreter
def Machine(G, I):
if Test(G, rest(G), I) and not Remaining(G, rest(G), I):
return True, Emit(G, rest(G), I)
return False, ''
@cache
def Test(G, R, I):
ch = first(R)
if ':' == ch: return Test(G, Find(G, rest(R)), I)
if '&' == ch:
if Test(G, rest(R), I):
return Test(G, Skip(rest(R)), Remaining(G, rest(R), I))
return False
if '|' == ch:
if Test(G, rest(R), I):
return True
return Test(G, Skip(rest(R)), I)
if '>' == ch: return True
if '"' == ch: return bool(I) and first(rest(R)) == first(I)
raise ValueError # should never get here
def Remaining(G, R, I):
ch = first(R)
if ':' == ch: return Remaining(G, Find(G, rest(R)), I)
if '&' == ch: return Remaining(G, Skip(rest(R)), Remaining(G, rest(R), I))
if '|' == ch:
if Test(G, rest(R), I):
return Remaining(G, rest(R), I)
return Remaining(G, Skip(rest(R)), I)
if '>' == ch: return I
if '"' == ch: return rest(I)
raise ValueError # should never get here
def Emit(G, R, I):
ch = first(R)
if ':' == ch: return Emit(G, Find(G, rest(R)), I)
if '&' == ch: return Concat(
Emit(G, rest(R), I),
Emit(G, Skip(rest(R)), Remaining(G, rest(R), I))
)
if '|' == ch:
if Test(G, rest(R), I):
return Emit(G, rest(R), I)
return Emit(G, Skip(rest(R)), I)
if '>' == ch: return first(rest(R))
if '"' == ch: return ''
raise ValueError # should never get here
def Skip(R):
ch = first(R)
if ':' == ch: return rest(rest(R))
if '&' == ch: return Skip(Skip(rest(R)))
if '|' == ch: return Skip(Skip(rest(R)))
if '>' == ch: return rest(rest(R))
if '"' == ch: return rest(rest(R))
raise ValueError # should never get here
def Find(G, R):
if first(G) == first(R):
return rest(G)
return Find(Skip(rest(G)), R)
if __name__ == '__main__':
p = Machine(grammar, grammar)
assert p[0]
# The character recognizer pattern was generated with the help of this
# bit of doggerl. I use '_' for ' ' (space) and then replace it after
# stripping blanks.
##k = '''ABCDEFGHIJKLMNOPQRSTUVWXYZ&|:">()'_[]=;*'''
##k = list(k)
##k.insert(0, '')
##k = '|"'.join(k)
##print(k)