# mdpynb -rw-r--r-- 4.1 KiB View raw
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python
# mdpynb MD_INPUT_FILE [PDF_OUTPUT_FILE]
# Run python code cells from markdown, and format results as pdf
# James Brind, October 2022, CC-BY-4.0

import sys, subprocess, re, datetime
from io import StringIO


# Define some delimiter strings
STDOUT_DELIMITER = "!!! ENTERING BLOCK"
STDOUT_DELIMITER_PRINT = 'print("' + STDOUT_DELIMITER + '",end="")'
BLOCK_START = "```python"
BLOCK_END = "```"
SAVEFIG_RE = re.compile('plt\.savefig\(["\'](.*)["\']')


def exec_get_output(s):
    """Execute python code in string `s` and return output."""
    old_stdout = sys.stdout
    sys.stdout = mystdout = StringIO()
    exec(s)
    sys.stdout = old_stdout
    return mystdout.getvalue()

def call_output(cmd_str):
    return subprocess.check_output(cmd_str, shell=True).decode('utf-8').strip()

def get_metadata():
    """Generate a metadata string."""
    user = call_output('whoami')
    host = call_output('hostname')
    date = datetime.date.today()
    try:
        git_hash = call_output('git rev-parse --short HEAD 2> /dev/null')
    except subprocess.CalledProcessError:
        git_hash = None
    output_str = f'Generated by `{user}@{host}` on {date}'
    if git_hash:
        output_str += f', last commit `{git_hash}`'
    output_str += '.'
    return output_str


def make_notebook(md_file, pdf_file):
    """Convert the markdown code to a pdf notebook."""

    # Read the markdown file
    with open(md_file, "r") as f:
        md_lines = f.read().splitlines()

    # Loop over markdown lines
    in_block = False
    py_lines = []
    fig_names = []
    n_fig = 0
    for l in md_lines:
        if not in_block:
            # When we enter a Python code block, insert a print statement
            # with our delimiter, so we can assign STDOUT to each cell
            if BLOCK_START in l:
                in_block = True

                py_lines.append(STDOUT_DELIMITER_PRINT)
                fig_names.append([])
        else:
            # Check for end of code block
            if BLOCK_END in l:
                in_block = False
            # Ignore plt.show
            elif "plt.show()" in l:
                pass
            else:
                py_lines.append(l)

            # Record any saved figures for later inclusion
            for fig_now in SAVEFIG_RE.findall(l):
                n_fig += 1
                fig_names[-1].append(fig_now)

    # Run the Python code and record the output
    out_str = exec_get_output("\n".join(py_lines))

    # Split on our added delimiter per cell
    cell_str = out_str.split(STDOUT_DELIMITER)[1:]


    # Loop over the markdown lines again
    in_block = False
    md_lines_out = []
    for l in md_lines:
        # Include all markdown lines in the notebook
        md_lines_out.append(l)

        if not in_block:
            if BLOCK_START in l:
                in_block = True
        else:
            # At the end of a code block
            if BLOCK_END in l:
                in_block = False

                # Append STDOUT for this cell
                current_output = cell_str.pop(0).strip("\n")
                if current_output:
                    md_lines_out.append("```output")
                    md_lines_out.append(current_output)
                    md_lines_out.append("```")

                # Append any saved figures for this cell
                current_figs = fig_names.pop(0)
                for fig in current_figs:
                    md_lines_out.append("![](%s)" % fig)

    md_lines_out.append("")
    md_lines_out.append("___")
    md_lines_out.append("")
    md_lines_out.append(get_metadata())

    # Put the markdown lines back to a big string
    md_str_out = "\n".join(md_lines_out)

    # Call out to pandoc to generate pdf
    subprocess.run(
        "pandoc -V papersize:a4 --highlight-style tango -o %s" % pdf_file,
        input=md_str_out.encode("utf-8"),
        shell=True,
    )


if __name__ == "__main__":
    """Entry point for script usage."""

    # Extract command-line arguments
    md_file = sys.argv[1]
    try:
        pdf_file = sys.argv[2]
    except IndexError:
        pdf_file = md_file.replace(".md", ".pdf")

    # Run on the given input file
    make_notebook(md_file, pdf_file)