feat: Add Discord server export collection

This commit introduces a new command `borg collect discord import` to
import and archive Discord server exports.

The command processes a JSON export from tools like DiscordChatExporter,
converts the messages into Markdown files organized by channel,
downloads all attachments to create a self-contained archive, and
generates a searchable `INDEX.json` file.

Key features:
- Parses DiscordChatExporter JSON format.
- Converts chat logs to Markdown, preserving metadata.
- Downloads and localizes all message attachments.
- Creates a searchable `INDEX.json`.
- Sanitizes server and channel names to prevent path traversal.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
This commit is contained in:
google-labs-jules[bot] 2026-02-02 00:49:43 +00:00
parent cf2af53ed3
commit 642ceb458b
5 changed files with 430 additions and 0 deletions

1
.gitignore vendored
View file

@ -10,3 +10,4 @@ demo-track.smsg
# Dev artifacts
.playwright-mcp/
discord/

236
cmd/collect_discord.go Normal file
View file

@ -0,0 +1,236 @@
package cmd
import (
"encoding/json"
"fmt"
"github.com/spf13/cobra"
"io"
"net/http"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
// collectDiscordCmd represents the collect discord command
var collectDiscordCmd = &cobra.Command{
Use: "discord",
Short: "Collect a Discord server export.",
Long: `Collect a Discord server export from DiscordChatExporter and store it in a searchable archive.`,
}
// DiscordExport represents the top-level structure of a DiscordChatExporter JSON export.
// This struct is based on a common format, but may need adjustments for different export versions.
type DiscordExport struct {
Guild Guild `json:"guild"`
Channels []Channel `json:"channels"`
Messages []Message `json:"messages"`
}
// Guild represents the server information.
type Guild struct {
ID string `json:"id"`
Name string `json:"name"`
}
// Channel represents a channel in the server.
type Channel struct {
ID string `json:"id"`
Name string `json:"name"`
}
// Message represents a single message in a channel.
type Message struct {
ID string `json:"id"`
ChannelID string `json:"channelId"`
Author Author `json:"author"`
Timestamp time.Time `json:"timestamp"`
Content string `json:"content"`
Attachments []Attachment `json:"attachments"`
}
// Author represents the message author.
type Author struct {
ID string `json:"id"`
Name string `json:"name"`
AvatarURL string `json:"avatarUrl"`
}
// Attachment represents a file attached to a message.
type Attachment struct {
URL string `json:"url"`
FileName string `json:"fileName"`
}
// sanitizeFilename removes characters that are invalid in file paths.
func sanitizeFilename(name string) string {
// Replace path separators and other problematic characters with a dash.
return strings.Map(func(r rune) rune {
switch r {
case '/', '\\', ':', '*', '?', '"', '<', '>', '|':
return '-'
}
return r
}, name)
}
var collectDiscordImportCmd = &cobra.Command{
Use: "import [path]",
Short: "Import a DiscordChatExporter JSON export.",
Long: `Import a DiscordChatExporter JSON export and convert it to a searchable archive.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
filePath := args[0]
fmt.Println("Importing Discord export from:", filePath)
// Read the JSON file
jsonFile, err := os.Open(filePath)
if err != nil {
return fmt.Errorf("could not open file: %w", err)
}
defer jsonFile.Close()
byteValue, err := io.ReadAll(jsonFile)
if err != nil {
return fmt.Errorf("could not read file: %w", err)
}
// Unmarshal the JSON data
var export DiscordExport
if err := json.Unmarshal(byteValue, &export); err != nil {
return fmt.Errorf("could not unmarshal json: %w", err)
}
// Group messages by channel
messagesByChannel := make(map[string][]Message)
for _, msg := range export.Messages {
messagesByChannel[msg.ChannelID] = append(messagesByChannel[msg.ChannelID], msg)
}
// Sanitize server name for the directory path
sanitizedServerName := sanitizeFilename(export.Guild.Name)
// Create a searchable index
type SearchEntry struct {
ID string `json:"id"`
Channel string `json:"channel"`
Author string `json:"author"`
Timestamp time.Time `json:"timestamp"`
Content string `json:"content"`
}
channelNames := make(map[string]string)
for _, ch := range export.Channels {
channelNames[ch.ID] = ch.Name
}
var searchIndex []SearchEntry
for _, msg := range export.Messages {
searchIndex = append(searchIndex, SearchEntry{
ID: msg.ID,
Channel: channelNames[msg.ChannelID],
Author: msg.Author.Name,
Timestamp: msg.Timestamp,
Content: msg.Content,
})
}
// Create the main output directory
outputDir := filepath.Join("discord", sanitizedServerName)
if err := os.MkdirAll(outputDir, 0755); err != nil {
return fmt.Errorf("could not create output directory: %w", err)
}
// Save the index to a file
indexData, err := json.MarshalIndent(searchIndex, "", " ")
if err != nil {
return fmt.Errorf("could not marshal search index: %w", err)
}
indexPath := filepath.Join(outputDir, "INDEX.json")
if err := os.WriteFile(indexPath, indexData, 0644); err != nil {
return fmt.Errorf("could not write search index: %w", err)
}
// Process each channel and convert messages to Markdown
for _, channel := range export.Channels {
// Sort messages by timestamp
sort.Slice(messagesByChannel[channel.ID], func(i, j int) bool {
return messagesByChannel[channel.ID][i].Timestamp.Before(messagesByChannel[channel.ID][j].Timestamp)
})
var markdownContent strings.Builder
markdownContent.WriteString(fmt.Sprintf("# %s\n\n", channel.Name))
for _, msg := range messagesByChannel[channel.ID] {
markdownContent.WriteString("---\n")
markdownContent.WriteString(fmt.Sprintf("**%s** `%s`\n\n", msg.Author.Name, msg.Timestamp.Format("2006-01-02 15:04:05")))
markdownContent.WriteString(msg.Content)
markdownContent.WriteString("\n")
for _, att := range msg.Attachments {
// Download attachment
resp, err := http.Get(att.URL)
if err != nil {
// Log the error but don't block the entire process
fmt.Printf("Warning: could not download attachment %s: %v\n", att.URL, err)
markdownContent.WriteString(fmt.Sprintf("\n[Failed to download %s](%s)", att.FileName, att.URL))
continue
}
// Create attachments directory
attachmentsDir := filepath.Join(outputDir, "attachments")
if err := os.MkdirAll(attachmentsDir, 0755); err != nil {
return fmt.Errorf("could not create attachments directory: %w", err)
}
// Save attachment
sanitizedAttachmentName := sanitizeFilename(att.FileName)
attachmentPath := filepath.Join(attachmentsDir, sanitizedAttachmentName)
outFile, err := os.Create(attachmentPath)
if err != nil {
resp.Body.Close()
return fmt.Errorf("could not create attachment file: %w", err)
}
if _, err := io.Copy(outFile, resp.Body); err != nil {
outFile.Close()
resp.Body.Close()
return fmt.Errorf("could not save attachment: %w", err)
}
outFile.Close()
resp.Body.Close()
// Update markdown to link to local file
localPath := filepath.Join("..", "attachments", sanitizedAttachmentName)
markdownContent.WriteString(fmt.Sprintf("\n[%s](%s)", att.FileName, localPath))
}
markdownContent.WriteString("\n\n")
}
// Create the output directory for markdown files
channelsDir := filepath.Join(outputDir, "channels")
if err := os.MkdirAll(channelsDir, 0755); err != nil {
return fmt.Errorf("could not create output directory: %w", err)
}
// Sanitize channel name for the filename
sanitizedChannelName := sanitizeFilename(channel.Name)
// Write the markdown to a file
filePath := filepath.Join(channelsDir, fmt.Sprintf("%s.md", sanitizedChannelName))
if err := os.WriteFile(filePath, []byte(markdownContent.String()), 0644); err != nil {
return fmt.Errorf("could not write markdown file for channel %s: %w", channel.Name, err)
}
}
fmt.Printf("Successfully created archive in discord/%s\n", sanitizedServerName)
return nil
},
}
func init() {
collectCmd.AddCommand(collectDiscordCmd)
collectDiscordCmd.AddCommand(collectDiscordImportCmd)
}

132
cmd/collect_discord_test.go Normal file
View file

@ -0,0 +1,132 @@
package cmd
import (
"encoding/json"
"io"
"net/http"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/Snider/Borg/pkg/mocks"
)
func TestCollectDiscordImportCmd_Good(t *testing.T) {
// Mock HTTP client
mockClient := mocks.NewMockClient(map[string]*http.Response{
"https://example.com/file.txt": {
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader("attachment content")),
},
})
http.DefaultClient = mockClient
// Create a temporary directory
tempDir := t.TempDir()
// Read the sample export from testdata
sampleData, err := os.ReadFile("testdata/sample_export.json")
if err != nil {
t.Fatalf("failed to read sample export file: %v", err)
}
jsonPath := filepath.Join(tempDir, "export.json")
if err := os.WriteFile(jsonPath, sampleData, 0644); err != nil {
t.Fatalf("failed to write sample json: %v", err)
}
// Change working directory to tempDir to check relative output path
oldWd, err := os.Getwd()
if err != nil {
t.Fatalf("failed to get current working directory: %v", err)
}
if err := os.Chdir(tempDir); err != nil {
t.Fatalf("failed to change working directory: %v", err)
}
defer os.Chdir(oldWd)
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetCollectCmd())
// Execute command
_, err = executeCommand(rootCmd, "collect", "discord", "import", "export.json")
if err != nil {
t.Fatalf("collect discord import command failed: %v", err)
}
// Verify output
sanitizedServerName := "Test-Server"
expectedBaseDir := filepath.Join("discord", sanitizedServerName)
// Verify INDEX.json
indexPath := filepath.Join(expectedBaseDir, "INDEX.json")
indexContent, err := os.ReadFile(indexPath)
if err != nil {
t.Fatalf("failed to read INDEX.json: %v", err)
}
type SearchEntry struct {
ID string `json:"id"`
Channel string `json:"channel"`
Author string `json:"author"`
Timestamp time.Time `json:"timestamp"`
Content string `json:"content"`
}
var index []SearchEntry
if err := json.Unmarshal(indexContent, &index); err != nil {
t.Fatalf("failed to unmarshal INDEX.json: %v", err)
}
if len(index) != 3 {
t.Fatalf("expected 3 messages in index, got %d", len(index))
}
if index[1].Content != "This is a test message." {
t.Errorf("unexpected content in index entry: %s", index[1].Content)
}
// Verify attachment
attachmentPath := filepath.Join(expectedBaseDir, "attachments", "file with spaces.txt")
attachmentContent, err := os.ReadFile(attachmentPath)
if err != nil {
t.Fatalf("failed to read attachment: %v", err)
}
if string(attachmentContent) != "attachment content" {
t.Errorf("unexpected content in attachment. Got: %s", string(attachmentContent))
}
// Verify random.md
randomMdPath := filepath.Join(expectedBaseDir, "channels", "random.md")
randomMdContent, err := os.ReadFile(randomMdPath)
if err != nil {
t.Fatalf("failed to read random.md: %v", err)
}
expectedRandomContent := "# random\n\n---\n**User2** `2024-01-01 12:01:00`\n\nThis is a test message.\n\n[file with spaces.txt](../attachments/file with spaces.txt)\n\n"
if string(randomMdContent) != expectedRandomContent {
t.Errorf("unexpected content in random.md.\nGot:\n%s\nExpected:\n%s", string(randomMdContent), expectedRandomContent)
}
}
func TestCollectDiscordImportCmd_Bad(t *testing.T) {
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetCollectCmd())
// Execute command with non-existent file
_, err := executeCommand(rootCmd, "collect", "discord", "import", "non-existent.json")
if err == nil {
t.Fatal("expected an error, but got none")
}
if !strings.Contains(err.Error(), "could not open file") {
t.Errorf("unexpected error message: %v", err)
}
}
func TestCollectDiscordImportCmd_Ugly(t *testing.T) {
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetCollectCmd())
_, err := executeCommand(rootCmd, "collect", "discord", "import")
if err == nil {
t.Fatal("expected an error for no arguments, but got none")
}
if !strings.Contains(err.Error(), "accepts 1 arg(s), received 0") {
t.Errorf("unexpected error message: %v", err)
}
}

59
cmd/testdata/sample_export.json vendored Normal file
View file

@ -0,0 +1,59 @@
{
"guild": {
"id": "12345",
"name": "Test/Server"
},
"channels": [
{
"id": "channel1",
"name": "general"
},
{
"id": "channel2",
"name": "random"
}
],
"messages": [
{
"id": "msg1",
"channelId": "channel1",
"author": {
"id": "user1",
"name": "Jules",
"avatarUrl": ""
},
"timestamp": "2024-01-01T12:00:00Z",
"content": "Hello, world!",
"attachments": []
},
{
"id": "msg2",
"channelId": "channel2",
"author": {
"id": "user2",
"name": "User2",
"avatarUrl": ""
},
"timestamp": "2024-01-01T12:01:00Z",
"content": "This is a test message.",
"attachments": [
{
"url": "https://example.com/file.txt",
"fileName": "file with spaces.txt"
}
]
},
{
"id": "msg3",
"channelId": "channel1",
"author": {
"id": "user1",
"name": "Jules",
"avatarUrl": ""
},
"timestamp": "2024-01-01T12:02:00Z",
"content": "Another message in general.",
"attachments": []
}
]
}

View file

@ -21,8 +21,10 @@ Subcommands:
- `borg collect github repos <org-or-user> [--output <file>] [--format ...] [--compression ...]`
- `borg collect website <url> [--depth N] [--output <file>] [--format ...] [--compression ...]`
- `borg collect pwa --uri <url> [--output <file>] [--format ...] [--compression ...]`
- `borg collect discord import <path/to/export.json>`
Examples:
- `borg collect discord import ./discord-export/export.json`
- `borg collect github repo https://github.com/Snider/Borg --output borg.dat`
- `borg collect website https://example.com --depth 1 --output site.dat`
- `borg collect pwa --uri https://squoosh.app --output squoosh.dat`