1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python
# mdpynb MD_INPUT_FILE [PDF_OUTPUT_FILE]
# Run python code cells from markdown, and format results as pdf
# James Brind, October 2022, CC-BY-4.0
import sys, subprocess, re, datetime
from io import StringIO
# Define some delimiter strings
STDOUT_DELIMITER = "!!! ENTERING BLOCK"
STDOUT_DELIMITER_PRINT = 'print("' + STDOUT_DELIMITER + '",end="")'
BLOCK_START = "```python"
BLOCK_END = "```"
SAVEFIG_RE = re.compile('plt\.savefig\(["\'](.*)["\']')
def exec_get_output(s):
"""Execute python code in string `s` and return output."""
old_stdout = sys.stdout
sys.stdout = mystdout = StringIO()
exec(s)
sys.stdout = old_stdout
return mystdout.getvalue()
def call_output(cmd_str):
return subprocess.check_output(cmd_str, shell=True).decode('utf-8').strip()
def get_metadata():
"""Generate a metadata string."""
user = call_output('whoami')
host = call_output('hostname')
date = datetime.date.today()
try:
git_hash = call_output('git rev-parse --short HEAD 2> /dev/null')
except subprocess.CalledProcessError:
git_hash = None
output_str = f'Generated by `{user}@{host}` on {date}'
if git_hash:
output_str += f', last commit `{git_hash}`'
output_str += '.'
return output_str
def make_notebook(md_file, pdf_file):
"""Convert the markdown code to a pdf notebook."""
# Read the markdown file
with open(md_file, "r") as f:
md_lines = f.read().splitlines()
# Loop over markdown lines
in_block = False
py_lines = []
fig_names = []
n_fig = 0
for l in md_lines:
if not in_block:
# When we enter a Python code block, insert a print statement
# with our delimiter, so we can assign STDOUT to each cell
if BLOCK_START in l:
in_block = True
py_lines.append(STDOUT_DELIMITER_PRINT)
fig_names.append([])
else:
# Check for end of code block
if BLOCK_END in l:
in_block = False
# Ignore plt.show
elif "plt.show()" in l:
pass
else:
py_lines.append(l)
# Record any saved figures for later inclusion
for fig_now in SAVEFIG_RE.findall(l):
n_fig += 1
fig_names[-1].append(fig_now)
# Run the Python code and record the output
out_str = exec_get_output("\n".join(py_lines))
# Split on our added delimiter per cell
cell_str = out_str.split(STDOUT_DELIMITER)[1:]
# Loop over the markdown lines again
in_block = False
md_lines_out = []
for l in md_lines:
# Include all markdown lines in the notebook
md_lines_out.append(l)
if not in_block:
if BLOCK_START in l:
in_block = True
else:
# At the end of a code block
if BLOCK_END in l:
in_block = False
# Append STDOUT for this cell
current_output = cell_str.pop(0).strip("\n")
if current_output:
md_lines_out.append("```output")
md_lines_out.append(current_output)
md_lines_out.append("```")
# Append any saved figures for this cell
current_figs = fig_names.pop(0)
for fig in current_figs:
md_lines_out.append("![](%s)" % fig)
md_lines_out.append("")
md_lines_out.append("___")
md_lines_out.append("")
md_lines_out.append(get_metadata())
# Put the markdown lines back to a big string
md_str_out = "\n".join(md_lines_out)
# Call out to pandoc to generate pdf
subprocess.run(
"pandoc -V papersize:a4 --highlight-style tango -o %s" % pdf_file,
input=md_str_out.encode("utf-8"),
shell=True,
)
if __name__ == "__main__":
"""Entry point for script usage."""
# Extract command-line arguments
md_file = sys.argv[1]
try:
pdf_file = sys.argv[2]
except IndexError:
pdf_file = md_file.replace(".md", ".pdf")
# Run on the given input file
make_notebook(md_file, pdf_file)