#!/usr/bin/env python3
# SPDX-License-Identifier: AGPL-3.0
import os, sys, json, csv
"""
Convert todo.sr.ht JSON dump to CSV that Gitlab can import.
JSON dump structure is as of 2025-02-23. Gitlab's import format is: two column
(issue title, issue description). Issue descriptions and titles use a
markdown variant. (Gitlab's export includes fields like 'Created At',
'Author Username', but these are not recognized by the importer.)
This is unavoidably a lossy process; you should _not_ blindly trust the output
and should compare with the original. Issues will need some post-processing.
Ignored JSON metadata fields include:
updated, ref, assignees, upstream, X-Payload-Signature, X-Payload-Nonce, ticket_id, id
Ignored events: CREATED, TICKET_MENTIONED, LABEL_ADDED, USER_MENTIONED
"""
src = "input.json"
dst = "output.csv"
# Folder to dump .md files for review before uploading
output_folder = "output"
concatenated_posts = "concatenated.md"
os.makedirs(output_folder, exist_ok=True)
dump = json.loads(open(src, "r").read())
csv_out = open(dst, "w", newline="")
csv = csv.writer(csv_out)
csv.writerow(["Title", "Description"])
concat_output = open(concatenated_posts, "w")
labels = dump["labels"]
tickets = dump["tickets"]
def participant_to_name(user):
# User name extraction: no attempts at crosslinking, might be wrong. Handles are good enough
if user["type"] == "user":
return "srht:" + user["name"]
elif user["type"] == "email":
return "email:" + user["name"]
elif user["type"] == "external":
# These have a format like github.com:mstoeckl
return user["external_id"]
else:
raise NotImplementedError(user)
def default_status(x):
if x is not None:
return x
else:
return "REPORTED"
def default_resolution(x):
if x is not None:
return x
else:
return "UNRESOLVED"
posts_with_separators = []
for ticket in sorted(tickets, key=lambda t: t["id"]):
ticket_id = ticket["id"]
submitter_name = participant_to_name(ticket["submitter"])
title = ticket["subject"]
print(ticket["id"], submitter_name, title)
metadata = []
metadata.append(("Imported post ID", str(ticket_id)))
metadata.append(("Title", str(title)))
metadata.append(("Status", ticket["status"]))
metadata.append(("Resolution", ticket["resolution"]))
metadata.append(("Labels", ", ".join(ticket["labels"])))
metadata.append(("User", submitter_name))
metadata.append(("Submission date", ticket["created"]))
header = ""
for k, v in metadata:
header += "{}: {}\n\n".format(k, v)
post_sequence = []
post_sequence.append(header)
if ticket["body"].strip():
# Only add initial post if nonempty; can be empty for 'title-only' issues
init_post = "From: {}\n\nDate: {}\n\n".format(submitter_name, ticket["created"])
init_post += ticket["body"]
post_sequence.append(init_post)
may_have_separator = (
"===" in ticket["body"] or "***" in ticket["body"] or "---" in ticket["body"]
)
for event in ticket["events"]:
for typ in event["event_type"]:
if (
typ == "CREATED"
or typ == "TICKET_MENTIONED"
or typ == "LABEL_ADDED"
or typ == "USER_MENTIONED"
):
# Skip, these are not critical events to record
continue
participant = participant_to_name(event["participant"])
if typ == "STATUS_CHANGE":
post_sequence.append(
"Status change from {} {} to {} {}\n\nUser: {}\n\nDate: {}\n\n".format(
default_status(event["old_status"]),
default_resolution(event["old_resolution"]),
event["new_status"],
event["new_resolution"],
participant,
ticket["created"],
)
)
continue
if typ != "COMMENT":
raise NotImplementedError(typ)
text = event["comment"]["text"]
if not text.strip():
# Drop empty comments, may happen when the status is changed but there is not comment
continue
if "===" in text or "***" in text or "---" in text:
may_have_separator = True
post_sequence.append(
"From: {}\n\nDate: {}\n\n{}".format(
participant_to_name(event["comment"]["author"]),
event["comment"]["created"],
text,
)
)
splitter = "\n\n-------\n"
output = splitter.join(post_sequence)
with open(os.path.join(output_folder, str(ticket_id) + ".md"), "w") as f:
f.write(output)
concat_output.write(splitter)
concat_output.write("# " + title + "\n\n")
concat_output.write(output)
csv.writerow([title, output])
if may_have_separator:
posts_with_separators.append(ticket_id)
csv_out.close()
concat_output.close()
print("Exported, {} tickets".format(len(tickets)))
# The import script uses horizontal rules to split individual messages within an issue
# If a message contains separators, it might be confusing and need editing later.
# False positives are possible.
print("Some posts _might_ contain separators: {}".format(posts_with_separators))