# srht_to_gitlab.py -rw-r--r-- 5.4 KiB View raw
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env python3
# SPDX-License-Identifier: AGPL-3.0

import os, sys, json, csv

"""
Convert todo.sr.ht JSON dump to CSV that Gitlab can import.

JSON dump structure is as of 2025-02-23. Gitlab's import format is: two column
(issue title, issue description). Issue descriptions and titles use a
markdown variant. (Gitlab's export includes fields like 'Created At',
'Author Username', but these are not recognized by the importer.)

This is unavoidably a lossy process; you should _not_ blindly trust the output
and should compare with the original. Issues will need some post-processing.

Ignored JSON metadata fields include:

updated, ref, assignees, upstream, X-Payload-Signature, X-Payload-Nonce, ticket_id, id

Ignored events: CREATED, TICKET_MENTIONED, LABEL_ADDED, USER_MENTIONED

"""

src = "input.json"
dst = "output.csv"
# Folder to dump .md files for review before uploading
output_folder = "output"
concatenated_posts = "concatenated.md"
os.makedirs(output_folder, exist_ok=True)

dump = json.loads(open(src, "r").read())

csv_out = open(dst, "w", newline="")
csv = csv.writer(csv_out)
csv.writerow(["Title", "Description"])

concat_output = open(concatenated_posts, "w")

labels = dump["labels"]
tickets = dump["tickets"]


def participant_to_name(user):
    # User name extraction: no attempts at crosslinking, might be wrong. Handles are good enough
    if user["type"] == "user":
        return "srht:" + user["name"]
    elif user["type"] == "email":
        return "email:" + user["name"]
    elif user["type"] == "external":
        # These have a format like github.com:mstoeckl
        return user["external_id"]
    else:
        raise NotImplementedError(user)


def default_status(x):
    if x is not None:
        return x
    else:
        return "REPORTED"


def default_resolution(x):
    if x is not None:
        return x
    else:
        return "UNRESOLVED"


posts_with_separators = []
for ticket in sorted(tickets, key=lambda t: t["id"]):
    ticket_id = ticket["id"]

    submitter_name = participant_to_name(ticket["submitter"])

    title = ticket["subject"]

    print(ticket["id"], submitter_name, title)

    metadata = []
    metadata.append(("Imported post ID", str(ticket_id)))
    metadata.append(("Title", str(title)))
    metadata.append(("Status", ticket["status"]))
    metadata.append(("Resolution", ticket["resolution"]))
    metadata.append(("Labels", ", ".join(ticket["labels"])))

    metadata.append(("User", submitter_name))
    metadata.append(("Submission date", ticket["created"]))

    header = ""
    for k, v in metadata:
        header += "{}: {}\n\n".format(k, v)

    post_sequence = []
    post_sequence.append(header)

    if ticket["body"].strip():
        # Only add initial post if nonempty; can be empty for 'title-only' issues
        init_post = "From: {}\n\nDate: {}\n\n".format(submitter_name, ticket["created"])
        init_post += ticket["body"]
        post_sequence.append(init_post)

    may_have_separator = (
        "===" in ticket["body"] or "***" in ticket["body"] or "---" in ticket["body"]
    )

    for event in ticket["events"]:
        for typ in event["event_type"]:
            if (
                typ == "CREATED"
                or typ == "TICKET_MENTIONED"
                or typ == "LABEL_ADDED"
                or typ == "USER_MENTIONED"
            ):
                # Skip, these are not critical events to record
                continue
            participant = participant_to_name(event["participant"])
            if typ == "STATUS_CHANGE":
                post_sequence.append(
                    "Status change from {} {} to {} {}\n\nUser: {}\n\nDate: {}\n\n".format(
                        default_status(event["old_status"]),
                        default_resolution(event["old_resolution"]),
                        event["new_status"],
                        event["new_resolution"],
                        participant,
                        ticket["created"],
                    )
                )
                continue
            if typ != "COMMENT":
                raise NotImplementedError(typ)

            text = event["comment"]["text"]
            if not text.strip():
                # Drop empty comments, may happen when the status is changed but there is not comment
                continue

            if "===" in text or "***" in text or "---" in text:
                may_have_separator = True

            post_sequence.append(
                "From: {}\n\nDate: {}\n\n{}".format(
                    participant_to_name(event["comment"]["author"]),
                    event["comment"]["created"],
                    text,
                )
            )

    splitter = "\n\n-------\n"
    output = splitter.join(post_sequence)

    with open(os.path.join(output_folder, str(ticket_id) + ".md"), "w") as f:
        f.write(output)
    concat_output.write(splitter)
    concat_output.write("# " + title + "\n\n")
    concat_output.write(output)

    csv.writerow([title, output])
    if may_have_separator:
        posts_with_separators.append(ticket_id)

csv_out.close()
concat_output.close()
print("Exported, {} tickets".format(len(tickets)))
# The import script uses horizontal rules to split individual messages within an issue
# If a message contains separators, it might be confusing and need editing later.
# False positives are possible.
print("Some posts _might_ contain separators: {}".format(posts_with_separators))