Merge 642ceb458b into a77024aad4
This commit is contained in:
commit
9f6e1b35a9
5 changed files with 430 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -10,3 +10,4 @@ demo-track.smsg
|
|||
|
||||
# Dev artifacts
|
||||
.playwright-mcp/
|
||||
discord/
|
||||
|
|
|
|||
236
cmd/collect_discord.go
Normal file
236
cmd/collect_discord.go
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/spf13/cobra"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// collectDiscordCmd represents the collect discord command
|
||||
var collectDiscordCmd = &cobra.Command{
|
||||
Use: "discord",
|
||||
Short: "Collect a Discord server export.",
|
||||
Long: `Collect a Discord server export from DiscordChatExporter and store it in a searchable archive.`,
|
||||
}
|
||||
|
||||
// DiscordExport represents the top-level structure of a DiscordChatExporter JSON export.
|
||||
// This struct is based on a common format, but may need adjustments for different export versions.
|
||||
type DiscordExport struct {
|
||||
Guild Guild `json:"guild"`
|
||||
Channels []Channel `json:"channels"`
|
||||
Messages []Message `json:"messages"`
|
||||
}
|
||||
|
||||
// Guild represents the server information.
|
||||
type Guild struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
// Channel represents a channel in the server.
|
||||
type Channel struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
// Message represents a single message in a channel.
|
||||
type Message struct {
|
||||
ID string `json:"id"`
|
||||
ChannelID string `json:"channelId"`
|
||||
Author Author `json:"author"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Content string `json:"content"`
|
||||
Attachments []Attachment `json:"attachments"`
|
||||
}
|
||||
|
||||
// Author represents the message author.
|
||||
type Author struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
AvatarURL string `json:"avatarUrl"`
|
||||
}
|
||||
|
||||
// Attachment represents a file attached to a message.
|
||||
type Attachment struct {
|
||||
URL string `json:"url"`
|
||||
FileName string `json:"fileName"`
|
||||
}
|
||||
|
||||
// sanitizeFilename removes characters that are invalid in file paths.
|
||||
func sanitizeFilename(name string) string {
|
||||
// Replace path separators and other problematic characters with a dash.
|
||||
return strings.Map(func(r rune) rune {
|
||||
switch r {
|
||||
case '/', '\\', ':', '*', '?', '"', '<', '>', '|':
|
||||
return '-'
|
||||
}
|
||||
return r
|
||||
}, name)
|
||||
}
|
||||
|
||||
var collectDiscordImportCmd = &cobra.Command{
|
||||
Use: "import [path]",
|
||||
Short: "Import a DiscordChatExporter JSON export.",
|
||||
Long: `Import a DiscordChatExporter JSON export and convert it to a searchable archive.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
filePath := args[0]
|
||||
fmt.Println("Importing Discord export from:", filePath)
|
||||
|
||||
// Read the JSON file
|
||||
jsonFile, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open file: %w", err)
|
||||
}
|
||||
defer jsonFile.Close()
|
||||
|
||||
byteValue, err := io.ReadAll(jsonFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not read file: %w", err)
|
||||
}
|
||||
|
||||
// Unmarshal the JSON data
|
||||
var export DiscordExport
|
||||
if err := json.Unmarshal(byteValue, &export); err != nil {
|
||||
return fmt.Errorf("could not unmarshal json: %w", err)
|
||||
}
|
||||
|
||||
// Group messages by channel
|
||||
messagesByChannel := make(map[string][]Message)
|
||||
for _, msg := range export.Messages {
|
||||
messagesByChannel[msg.ChannelID] = append(messagesByChannel[msg.ChannelID], msg)
|
||||
}
|
||||
|
||||
// Sanitize server name for the directory path
|
||||
sanitizedServerName := sanitizeFilename(export.Guild.Name)
|
||||
|
||||
// Create a searchable index
|
||||
type SearchEntry struct {
|
||||
ID string `json:"id"`
|
||||
Channel string `json:"channel"`
|
||||
Author string `json:"author"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
channelNames := make(map[string]string)
|
||||
for _, ch := range export.Channels {
|
||||
channelNames[ch.ID] = ch.Name
|
||||
}
|
||||
|
||||
var searchIndex []SearchEntry
|
||||
for _, msg := range export.Messages {
|
||||
searchIndex = append(searchIndex, SearchEntry{
|
||||
ID: msg.ID,
|
||||
Channel: channelNames[msg.ChannelID],
|
||||
Author: msg.Author.Name,
|
||||
Timestamp: msg.Timestamp,
|
||||
Content: msg.Content,
|
||||
})
|
||||
}
|
||||
|
||||
// Create the main output directory
|
||||
outputDir := filepath.Join("discord", sanitizedServerName)
|
||||
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||
return fmt.Errorf("could not create output directory: %w", err)
|
||||
}
|
||||
|
||||
// Save the index to a file
|
||||
indexData, err := json.MarshalIndent(searchIndex, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not marshal search index: %w", err)
|
||||
}
|
||||
|
||||
indexPath := filepath.Join(outputDir, "INDEX.json")
|
||||
if err := os.WriteFile(indexPath, indexData, 0644); err != nil {
|
||||
return fmt.Errorf("could not write search index: %w", err)
|
||||
}
|
||||
|
||||
// Process each channel and convert messages to Markdown
|
||||
for _, channel := range export.Channels {
|
||||
// Sort messages by timestamp
|
||||
sort.Slice(messagesByChannel[channel.ID], func(i, j int) bool {
|
||||
return messagesByChannel[channel.ID][i].Timestamp.Before(messagesByChannel[channel.ID][j].Timestamp)
|
||||
})
|
||||
|
||||
var markdownContent strings.Builder
|
||||
markdownContent.WriteString(fmt.Sprintf("# %s\n\n", channel.Name))
|
||||
|
||||
for _, msg := range messagesByChannel[channel.ID] {
|
||||
markdownContent.WriteString("---\n")
|
||||
markdownContent.WriteString(fmt.Sprintf("**%s** `%s`\n\n", msg.Author.Name, msg.Timestamp.Format("2006-01-02 15:04:05")))
|
||||
markdownContent.WriteString(msg.Content)
|
||||
markdownContent.WriteString("\n")
|
||||
|
||||
for _, att := range msg.Attachments {
|
||||
// Download attachment
|
||||
resp, err := http.Get(att.URL)
|
||||
if err != nil {
|
||||
// Log the error but don't block the entire process
|
||||
fmt.Printf("Warning: could not download attachment %s: %v\n", att.URL, err)
|
||||
markdownContent.WriteString(fmt.Sprintf("\n[Failed to download %s](%s)", att.FileName, att.URL))
|
||||
continue
|
||||
}
|
||||
|
||||
// Create attachments directory
|
||||
attachmentsDir := filepath.Join(outputDir, "attachments")
|
||||
if err := os.MkdirAll(attachmentsDir, 0755); err != nil {
|
||||
return fmt.Errorf("could not create attachments directory: %w", err)
|
||||
}
|
||||
|
||||
// Save attachment
|
||||
sanitizedAttachmentName := sanitizeFilename(att.FileName)
|
||||
attachmentPath := filepath.Join(attachmentsDir, sanitizedAttachmentName)
|
||||
outFile, err := os.Create(attachmentPath)
|
||||
if err != nil {
|
||||
resp.Body.Close()
|
||||
return fmt.Errorf("could not create attachment file: %w", err)
|
||||
}
|
||||
|
||||
if _, err := io.Copy(outFile, resp.Body); err != nil {
|
||||
outFile.Close()
|
||||
resp.Body.Close()
|
||||
return fmt.Errorf("could not save attachment: %w", err)
|
||||
}
|
||||
outFile.Close()
|
||||
resp.Body.Close()
|
||||
|
||||
// Update markdown to link to local file
|
||||
localPath := filepath.Join("..", "attachments", sanitizedAttachmentName)
|
||||
markdownContent.WriteString(fmt.Sprintf("\n[%s](%s)", att.FileName, localPath))
|
||||
}
|
||||
markdownContent.WriteString("\n\n")
|
||||
}
|
||||
|
||||
// Create the output directory for markdown files
|
||||
channelsDir := filepath.Join(outputDir, "channels")
|
||||
if err := os.MkdirAll(channelsDir, 0755); err != nil {
|
||||
return fmt.Errorf("could not create output directory: %w", err)
|
||||
}
|
||||
|
||||
// Sanitize channel name for the filename
|
||||
sanitizedChannelName := sanitizeFilename(channel.Name)
|
||||
|
||||
// Write the markdown to a file
|
||||
filePath := filepath.Join(channelsDir, fmt.Sprintf("%s.md", sanitizedChannelName))
|
||||
if err := os.WriteFile(filePath, []byte(markdownContent.String()), 0644); err != nil {
|
||||
return fmt.Errorf("could not write markdown file for channel %s: %w", channel.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("Successfully created archive in discord/%s\n", sanitizedServerName)
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
collectCmd.AddCommand(collectDiscordCmd)
|
||||
collectDiscordCmd.AddCommand(collectDiscordImportCmd)
|
||||
}
|
||||
132
cmd/collect_discord_test.go
Normal file
132
cmd/collect_discord_test.go
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/Snider/Borg/pkg/mocks"
|
||||
)
|
||||
|
||||
func TestCollectDiscordImportCmd_Good(t *testing.T) {
|
||||
// Mock HTTP client
|
||||
mockClient := mocks.NewMockClient(map[string]*http.Response{
|
||||
"https://example.com/file.txt": {
|
||||
StatusCode: http.StatusOK,
|
||||
Body: io.NopCloser(strings.NewReader("attachment content")),
|
||||
},
|
||||
})
|
||||
http.DefaultClient = mockClient
|
||||
|
||||
// Create a temporary directory
|
||||
tempDir := t.TempDir()
|
||||
|
||||
// Read the sample export from testdata
|
||||
sampleData, err := os.ReadFile("testdata/sample_export.json")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read sample export file: %v", err)
|
||||
}
|
||||
jsonPath := filepath.Join(tempDir, "export.json")
|
||||
if err := os.WriteFile(jsonPath, sampleData, 0644); err != nil {
|
||||
t.Fatalf("failed to write sample json: %v", err)
|
||||
}
|
||||
|
||||
// Change working directory to tempDir to check relative output path
|
||||
oldWd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get current working directory: %v", err)
|
||||
}
|
||||
if err := os.Chdir(tempDir); err != nil {
|
||||
t.Fatalf("failed to change working directory: %v", err)
|
||||
}
|
||||
defer os.Chdir(oldWd)
|
||||
|
||||
rootCmd := NewRootCmd()
|
||||
rootCmd.AddCommand(GetCollectCmd())
|
||||
|
||||
// Execute command
|
||||
_, err = executeCommand(rootCmd, "collect", "discord", "import", "export.json")
|
||||
if err != nil {
|
||||
t.Fatalf("collect discord import command failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify output
|
||||
sanitizedServerName := "Test-Server"
|
||||
expectedBaseDir := filepath.Join("discord", sanitizedServerName)
|
||||
|
||||
// Verify INDEX.json
|
||||
indexPath := filepath.Join(expectedBaseDir, "INDEX.json")
|
||||
indexContent, err := os.ReadFile(indexPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read INDEX.json: %v", err)
|
||||
}
|
||||
type SearchEntry struct {
|
||||
ID string `json:"id"`
|
||||
Channel string `json:"channel"`
|
||||
Author string `json:"author"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
var index []SearchEntry
|
||||
if err := json.Unmarshal(indexContent, &index); err != nil {
|
||||
t.Fatalf("failed to unmarshal INDEX.json: %v", err)
|
||||
}
|
||||
if len(index) != 3 {
|
||||
t.Fatalf("expected 3 messages in index, got %d", len(index))
|
||||
}
|
||||
if index[1].Content != "This is a test message." {
|
||||
t.Errorf("unexpected content in index entry: %s", index[1].Content)
|
||||
}
|
||||
|
||||
// Verify attachment
|
||||
attachmentPath := filepath.Join(expectedBaseDir, "attachments", "file with spaces.txt")
|
||||
attachmentContent, err := os.ReadFile(attachmentPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read attachment: %v", err)
|
||||
}
|
||||
if string(attachmentContent) != "attachment content" {
|
||||
t.Errorf("unexpected content in attachment. Got: %s", string(attachmentContent))
|
||||
}
|
||||
|
||||
// Verify random.md
|
||||
randomMdPath := filepath.Join(expectedBaseDir, "channels", "random.md")
|
||||
randomMdContent, err := os.ReadFile(randomMdPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read random.md: %v", err)
|
||||
}
|
||||
expectedRandomContent := "# random\n\n---\n**User2** `2024-01-01 12:01:00`\n\nThis is a test message.\n\n[file with spaces.txt](../attachments/file with spaces.txt)\n\n"
|
||||
if string(randomMdContent) != expectedRandomContent {
|
||||
t.Errorf("unexpected content in random.md.\nGot:\n%s\nExpected:\n%s", string(randomMdContent), expectedRandomContent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCollectDiscordImportCmd_Bad(t *testing.T) {
|
||||
rootCmd := NewRootCmd()
|
||||
rootCmd.AddCommand(GetCollectCmd())
|
||||
|
||||
// Execute command with non-existent file
|
||||
_, err := executeCommand(rootCmd, "collect", "discord", "import", "non-existent.json")
|
||||
if err == nil {
|
||||
t.Fatal("expected an error, but got none")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "could not open file") {
|
||||
t.Errorf("unexpected error message: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCollectDiscordImportCmd_Ugly(t *testing.T) {
|
||||
rootCmd := NewRootCmd()
|
||||
rootCmd.AddCommand(GetCollectCmd())
|
||||
_, err := executeCommand(rootCmd, "collect", "discord", "import")
|
||||
if err == nil {
|
||||
t.Fatal("expected an error for no arguments, but got none")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "accepts 1 arg(s), received 0") {
|
||||
t.Errorf("unexpected error message: %v", err)
|
||||
}
|
||||
}
|
||||
59
cmd/testdata/sample_export.json
vendored
Normal file
59
cmd/testdata/sample_export.json
vendored
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
{
|
||||
"guild": {
|
||||
"id": "12345",
|
||||
"name": "Test/Server"
|
||||
},
|
||||
"channels": [
|
||||
{
|
||||
"id": "channel1",
|
||||
"name": "general"
|
||||
},
|
||||
{
|
||||
"id": "channel2",
|
||||
"name": "random"
|
||||
}
|
||||
],
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg1",
|
||||
"channelId": "channel1",
|
||||
"author": {
|
||||
"id": "user1",
|
||||
"name": "Jules",
|
||||
"avatarUrl": ""
|
||||
},
|
||||
"timestamp": "2024-01-01T12:00:00Z",
|
||||
"content": "Hello, world!",
|
||||
"attachments": []
|
||||
},
|
||||
{
|
||||
"id": "msg2",
|
||||
"channelId": "channel2",
|
||||
"author": {
|
||||
"id": "user2",
|
||||
"name": "User2",
|
||||
"avatarUrl": ""
|
||||
},
|
||||
"timestamp": "2024-01-01T12:01:00Z",
|
||||
"content": "This is a test message.",
|
||||
"attachments": [
|
||||
{
|
||||
"url": "https://example.com/file.txt",
|
||||
"fileName": "file with spaces.txt"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "msg3",
|
||||
"channelId": "channel1",
|
||||
"author": {
|
||||
"id": "user1",
|
||||
"name": "Jules",
|
||||
"avatarUrl": ""
|
||||
},
|
||||
"timestamp": "2024-01-01T12:02:00Z",
|
||||
"content": "Another message in general.",
|
||||
"attachments": []
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -21,8 +21,10 @@ Subcommands:
|
|||
- `borg collect github repos <org-or-user> [--output <file>] [--format ...] [--compression ...]`
|
||||
- `borg collect website <url> [--depth N] [--output <file>] [--format ...] [--compression ...]`
|
||||
- `borg collect pwa --uri <url> [--output <file>] [--format ...] [--compression ...]`
|
||||
- `borg collect discord import <path/to/export.json>`
|
||||
|
||||
Examples:
|
||||
- `borg collect discord import ./discord-export/export.json`
|
||||
- `borg collect github repo https://github.com/Snider/Borg --output borg.dat`
|
||||
- `borg collect website https://example.com --depth 1 --output site.dat`
|
||||
- `borg collect pwa --uri https://squoosh.app --output squoosh.dat`
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue