From d8bbda85b56cadc8e80ab555eba903781daa53f8 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 2 Feb 2026 00:41:14 +0000 Subject: [PATCH] feat: Telegram channel/group export This commit introduces a new command, `borg collect telegram import`, which allows users to import and archive Telegram channel and group exports. The new functionality includes: - A new `telegram` subcommand under the `collect` command. - An `import` command that parses Telegram Desktop JSON exports. - Conversion of messages to Markdown, with support for rich text formatting, replies, and forwards. - Preservation of media files, which are stored in a `media` subdirectory. - Organization of messages into monthly Markdown files for easy navigation. - Creation of an `INDEX.json` file to store the original export metadata. The feature is accompanied by unit tests to ensure its correctness and reliability. Co-authored-by: Snider <631881+Snider@users.noreply.github.com> --- cmd/collect_telegram.go | 114 ++++++++++++ pkg/telegram/telegram.go | 170 ++++++++++++++++++ pkg/telegram/telegram_test.go | 99 ++++++++++ .../photos/photo_1@10-02-2024_18-30-00.jpg | 1 + pkg/telegram/testdata/result.json | 42 +++++ 5 files changed, 426 insertions(+) create mode 100644 cmd/collect_telegram.go create mode 100644 pkg/telegram/telegram.go create mode 100644 pkg/telegram/telegram_test.go create mode 100644 pkg/telegram/testdata/photos/photo_1@10-02-2024_18-30-00.jpg create mode 100644 pkg/telegram/testdata/result.json diff --git a/cmd/collect_telegram.go b/cmd/collect_telegram.go new file mode 100644 index 0000000..889dcc3 --- /dev/null +++ b/cmd/collect_telegram.go @@ -0,0 +1,114 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/Snider/Borg/pkg/compress" + "github.com/Snider/Borg/pkg/telegram" + "github.com/Snider/Borg/pkg/tim" + "github.com/Snider/Borg/pkg/trix" + "github.com/Snider/Borg/pkg/ui" + "github.com/spf13/cobra" +) + +var collectTelegramCmd = NewCollectTelegramCmd() + +func init() { + GetCollectCmd().AddCommand(GetCollectTelegramCmd()) +} + +func GetCollectTelegramCmd() *cobra.Command { + return collectTelegramCmd +} + +func NewCollectTelegramCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "telegram", + Short: "Collect a Telegram export", + Long: `Collect a Telegram export and store it in a DataNode.`, + } + cmd.AddCommand(NewCollectTelegramImportCmd()) + return cmd +} + +func NewCollectTelegramImportCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "import [path]", + Short: "Import a Telegram export", + Long: `Import a Telegram export and store it in a DataNode.`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + exportPath := args[0] + outputFile, _ := cmd.Flags().GetString("output") + format, _ := cmd.Flags().GetString("format") + compression, _ := cmd.Flags().GetString("compression") + password, _ := cmd.Flags().GetString("password") + + if format != "datanode" && format != "tim" && format != "trix" { + return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', or 'trix')", format) + } + + prompter := ui.NewNonInteractivePrompter(ui.GetWebsiteQuote) + prompter.Start() + defer prompter.Stop() + + dn, err := telegram.Parse(exportPath) + if err != nil { + return fmt.Errorf("error parsing telegram export: %w", err) + } + + if dn == nil { + return fmt.Errorf("parsing telegram export resulted in an empty datanode") + } + + var data []byte + switch format { + case "tim": + t, err := tim.FromDataNode(dn) + if err != nil { + return fmt.Errorf("error creating tim: %w", err) + } + data, err = t.ToTar() + if err != nil { + return fmt.Errorf("error serializing tim: %w", err) + } + case "trix": + data, err = trix.ToTrix(dn, password) + if err != nil { + return fmt.Errorf("error serializing trix: %w", err) + } + default: // datanode + data, err = dn.ToTar() + if err != nil { + return fmt.Errorf("error serializing DataNode: %w", err) + } + } + + compressedData, err := compress.Compress(data, compression) + if err != nil { + return fmt.Errorf("error compressing data: %w", err) + } + + if outputFile == "" { + outputFile = "telegram." + format + if compression != "none" { + outputFile += "." + compression + } + } + + err = os.WriteFile(outputFile, compressedData, 0644) + if err != nil { + return fmt.Errorf("error writing telegram export to file: %w", err) + } + + fmt.Fprintln(cmd.OutOrStdout(), "Telegram export saved to", outputFile) + return nil + }, + } + cmd.Flags().String("output", "", "Output file for the DataNode") + cmd.Flags().String("format", "datanode", "Output format (datanode, tim, or trix)") + cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)") + cmd.Flags().String("password", "", "Password for encryption") + return cmd +} diff --git a/pkg/telegram/telegram.go b/pkg/telegram/telegram.go new file mode 100644 index 0000000..d592352 --- /dev/null +++ b/pkg/telegram/telegram.go @@ -0,0 +1,170 @@ +package telegram + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/Snider/Borg/pkg/datanode" +) + +// TelegramExport represents the overall structure of the Telegram JSON export. +type TelegramExport struct { + Name string `json:"name"` + Type string `json:"type"` + ID int `json:"id"` + Messages []Message `json:"messages"` +} + +// Message represents a single message in the Telegram export. +type Message struct { + ID int `json:"id"` + Type string `json:"type"` + Date string `json:"date"` + From string `json:"from"` + Text interface{} `json:"text"` + File string `json:"file"` + ReplyToID int `json:"reply_to_message_id"` + Photo string `json:"photo"` + Width int `json:"width"` + Height int `json:"height"` + ForwardedFrom string `json:"forwarded_from"` +} + +// TextEntity represents a formatted part of a message text. +type TextEntity struct { + Type string `json:"type"` + Text string `json:"text"` + Href string `json:"href,omitempty"` +} + +// parseText converts the 'text' field (which can be a string or a slice of entities) +// into a Markdown formatted string. +func parseText(text interface{}) string { + switch v := text.(type) { + case string: + return v + case []interface{}: + var builder strings.Builder + for _, item := range v { + switch e := item.(type) { + case string: + builder.WriteString(e) + case map[string]interface{}: + // A simple approach to convert map to TextEntity + var entity TextEntity + if t, ok := e["type"].(string); ok { + entity.Type = t + } + if t, ok := e["text"].(string); ok { + entity.Text = t + } + if h, ok := e["href"].(string); ok { + entity.Href = h + } + + switch entity.Type { + case "bold": + builder.WriteString(fmt.Sprintf("**%s**", entity.Text)) + case "italic": + builder.WriteString(fmt.Sprintf("*%s*", entity.Text)) + case "link", "text_link": + builder.WriteString(fmt.Sprintf("[%s](%s)", entity.Text, entity.Href)) + case "pre", "code": + builder.WriteString(fmt.Sprintf("`%s`", entity.Text)) + default: + builder.WriteString(entity.Text) + } + } + } + return builder.String() + } + return "" +} + +// Parse parses a Telegram export directory and returns a DataNode. +func Parse(path string) (*datanode.DataNode, error) { + jsonPath := filepath.Join(path, "result.json") + jsonBytes, err := os.ReadFile(jsonPath) + if err != nil { + return nil, fmt.Errorf("failed to read result.json: %w", err) + } + + var export TelegramExport + if err := json.Unmarshal(jsonBytes, &export); err != nil { + return nil, fmt.Errorf("failed to unmarshal json: %w", err) + } + + dn := datanode.New() + channelName := export.Name + + // Create INDEX.json + indexData, err := json.MarshalIndent(export, "", " ") + if err != nil { + return nil, fmt.Errorf("failed to marshal index data: %w", err) + } + indexPath := filepath.Join("telegram", channelName, "INDEX.json") + dn.AddData(indexPath, indexData) + + messagesByMonth := make(map[string][]Message) + for _, msg := range export.Messages { + if msg.Type != "message" { + continue + } + t, err := time.Parse("2006-01-02T15:04:05", msg.Date) + if err != nil { + continue // Skip messages with invalid date format + } + month := t.Format("2006-01") + messagesByMonth[month] = append(messagesByMonth[month], msg) + } + + for month, messages := range messagesByMonth { + sort.Slice(messages, func(i, j int) bool { + return messages[i].Date < messages[j].Date + }) + + var mdBuilder strings.Builder + for _, msg := range messages { + mdBuilder.WriteString(fmt.Sprintf("### %s from %s (ID: %d)\n\n", msg.Date, msg.From, msg.ID)) + if msg.ReplyToID != 0 { + mdBuilder.WriteString(fmt.Sprintf("> Reply to message %d\n\n", msg.ReplyToID)) + } + if msg.ForwardedFrom != "" { + mdBuilder.WriteString(fmt.Sprintf("> Forwarded from %s\n\n", msg.ForwardedFrom)) + } + + text := parseText(msg.Text) + mdBuilder.WriteString(text) + mdBuilder.WriteString("\n\n") + + mediaPath := "" + if msg.File != "" { + mediaPath = msg.File + } else if msg.Photo != "" { + mediaPath = msg.Photo + } + + if mediaPath != "" { + mdBuilder.WriteString(fmt.Sprintf("![Media](media/%s)\n\n", filepath.Base(mediaPath))) + + srcMediaPath := filepath.Join(path, mediaPath) + mediaBytes, err := os.ReadFile(srcMediaPath) + if err == nil { + destMediaPath := filepath.Join("telegram", channelName, "media", filepath.Base(mediaPath)) + dn.AddData(destMediaPath, mediaBytes) + } + } + mdBuilder.WriteString("---\n\n") + } + + mdPath := filepath.Join("telegram", channelName, "messages", month+".md") + dn.AddData(mdPath, []byte(mdBuilder.String())) + } + + return dn, nil +} diff --git a/pkg/telegram/telegram_test.go b/pkg/telegram/telegram_test.go new file mode 100644 index 0000000..607d29a --- /dev/null +++ b/pkg/telegram/telegram_test.go @@ -0,0 +1,99 @@ +package telegram + +import ( + "io" + "path/filepath" + "strings" + "testing" +) + +func TestParse_Good(t *testing.T) { + testDataPath := "testdata" + + dn, err := Parse(testDataPath) + if err != nil { + t.Fatalf("Parse() error = %v, wantErr nil", err) + } + + if dn == nil { + t.Fatal("Parse() returned a nil DataNode") + } + + // Check for INDEX.json + indexPath := "telegram/Test Channel/INDEX.json" + exists, err := dn.Exists(indexPath) + if err != nil { + t.Fatalf("dn.Exists(%q) error: %v", indexPath, err) + } + if !exists { + t.Errorf("Expected file to exist: %s", indexPath) + } + + // Check for January messages markdown file + janMessagesPath := "telegram/Test Channel/messages/2024-01.md" + exists, err = dn.Exists(janMessagesPath) + if err != nil { + t.Fatalf("dn.Exists(%q) error: %v", janMessagesPath, err) + } + if !exists { + t.Errorf("Expected file to exist: %s", janMessagesPath) + } else { + // Verify content of the January markdown file + f, err := dn.Open(janMessagesPath) + if err != nil { + t.Fatalf("Failed to open %s: %v", janMessagesPath, err) + } + defer f.Close() + + contentBytes, err := io.ReadAll(f) + if err != nil { + t.Fatalf("Failed to read from %s: %v", janMessagesPath, err) + } + + content := string(contentBytes) + if !strings.Contains(content, "Hello, world!") { + t.Errorf("Expected to find 'Hello, world!' in %s", janMessagesPath) + } + if !strings.Contains(content, "**This** is a *test* message with formatting.") { + t.Errorf("Expected to find formatted message in %s", janMessagesPath) + } + } + + // Check for February messages markdown file + febMessagesPath := "telegram/Test Channel/messages/2024-02.md" + exists, err = dn.Exists(febMessagesPath) + if err != nil { + t.Fatalf("dn.Exists(%q) error: %v", febMessagesPath, err) + } + if !exists { + t.Errorf("Expected file to exist: %s", febMessagesPath) + } else { + f, err := dn.Open(febMessagesPath) + if err != nil { + t.Fatalf("Failed to open %s: %v", febMessagesPath, err) + } + defer f.Close() + + contentBytes, err := io.ReadAll(f) + if err != nil { + t.Fatalf("Failed to read from %s: %v", febMessagesPath, err) + } + + content := string(contentBytes) + if !strings.Contains(content, "Here is a photo.") { + t.Errorf("Expected to find 'Here is a photo.' in %s", febMessagesPath) + } + } + + // Check for media file + mediaFileName := "photo_1@10-02-2024_18-30-00.jpg" + mediaPath := filepath.Join("telegram", "Test Channel", "media", mediaFileName) + mediaPath = filepath.ToSlash(mediaPath) // Ensure cross-platform path separators + exists, err = dn.Exists(mediaPath) + if err != nil { + t.Fatalf("dn.Exists(%q) error: %v", mediaPath, err) + } + if !exists { + t.Errorf("Expected media file to exist: %s", mediaPath) + } +} diff --git a/pkg/telegram/testdata/photos/photo_1@10-02-2024_18-30-00.jpg b/pkg/telegram/testdata/photos/photo_1@10-02-2024_18-30-00.jpg new file mode 100644 index 0000000..ed3ddd1 --- /dev/null +++ b/pkg/telegram/testdata/photos/photo_1@10-02-2024_18-30-00.jpg @@ -0,0 +1 @@ +dummy image data \ No newline at end of file diff --git a/pkg/telegram/testdata/result.json b/pkg/telegram/testdata/result.json new file mode 100644 index 0000000..b7ed6ab --- /dev/null +++ b/pkg/telegram/testdata/result.json @@ -0,0 +1,42 @@ +{ + "name": "Test Channel", + "type": "public_channel", + "id": 123456789, + "messages": [ + { + "id": 1, + "type": "message", + "date": "2024-01-15T12:00:00", + "from": "User1", + "text": "Hello, world!" + }, + { + "id": 2, + "type": "message", + "date": "2024-01-15T12:01:00", + "from": "User2", + "text": [ + { + "type": "bold", + "text": "This" + }, + " is a ", + { + "type": "italic", + "text": "test" + }, + " message with formatting." + ] + }, + { + "id": 3, + "type": "message", + "date": "2024-02-10T18:30:00", + "from": "User1", + "photo": "photos/photo_1@10-02-2024_18-30-00.jpg", + "width": 800, + "height": 600, + "text": "Here is a photo." + } + ] +}