Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
google-labs-jules[bot]
d8bbda85b5 feat: Telegram channel/group export
This commit introduces a new command, `borg collect telegram import`, which allows users to import and archive Telegram channel and group exports.

The new functionality includes:
- A new `telegram` subcommand under the `collect` command.
- An `import` command that parses Telegram Desktop JSON exports.
- Conversion of messages to Markdown, with support for rich text formatting, replies, and forwards.
- Preservation of media files, which are stored in a `media` subdirectory.
- Organization of messages into monthly Markdown files for easy navigation.
- Creation of an `INDEX.json` file to store the original export metadata.

The feature is accompanied by unit tests to ensure its correctness and reliability.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
2026-02-02 00:41:14 +00:00
5 changed files with 426 additions and 0 deletions

114
cmd/collect_telegram.go Normal file
View file

@ -0,0 +1,114 @@
package cmd
import (
"fmt"
"os"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/telegram"
"github.com/Snider/Borg/pkg/tim"
"github.com/Snider/Borg/pkg/trix"
"github.com/Snider/Borg/pkg/ui"
"github.com/spf13/cobra"
)
var collectTelegramCmd = NewCollectTelegramCmd()
func init() {
GetCollectCmd().AddCommand(GetCollectTelegramCmd())
}
func GetCollectTelegramCmd() *cobra.Command {
return collectTelegramCmd
}
func NewCollectTelegramCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "telegram",
Short: "Collect a Telegram export",
Long: `Collect a Telegram export and store it in a DataNode.`,
}
cmd.AddCommand(NewCollectTelegramImportCmd())
return cmd
}
func NewCollectTelegramImportCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "import [path]",
Short: "Import a Telegram export",
Long: `Import a Telegram export and store it in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
exportPath := args[0]
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
password, _ := cmd.Flags().GetString("password")
if format != "datanode" && format != "tim" && format != "trix" {
return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', or 'trix')", format)
}
prompter := ui.NewNonInteractivePrompter(ui.GetWebsiteQuote)
prompter.Start()
defer prompter.Stop()
dn, err := telegram.Parse(exportPath)
if err != nil {
return fmt.Errorf("error parsing telegram export: %w", err)
}
if dn == nil {
return fmt.Errorf("parsing telegram export resulted in an empty datanode")
}
var data []byte
switch format {
case "tim":
t, err := tim.FromDataNode(dn)
if err != nil {
return fmt.Errorf("error creating tim: %w", err)
}
data, err = t.ToTar()
if err != nil {
return fmt.Errorf("error serializing tim: %w", err)
}
case "trix":
data, err = trix.ToTrix(dn, password)
if err != nil {
return fmt.Errorf("error serializing trix: %w", err)
}
default: // datanode
data, err = dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}
}
compressedData, err := compress.Compress(data, compression)
if err != nil {
return fmt.Errorf("error compressing data: %w", err)
}
if outputFile == "" {
outputFile = "telegram." + format
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
return fmt.Errorf("error writing telegram export to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "Telegram export saved to", outputFile)
return nil
},
}
cmd.Flags().String("output", "", "Output file for the DataNode")
cmd.Flags().String("format", "datanode", "Output format (datanode, tim, or trix)")
cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
cmd.Flags().String("password", "", "Password for encryption")
return cmd
}

170
pkg/telegram/telegram.go Normal file
View file

@ -0,0 +1,170 @@
package telegram
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/Snider/Borg/pkg/datanode"
)
// TelegramExport represents the overall structure of the Telegram JSON export.
type TelegramExport struct {
Name string `json:"name"`
Type string `json:"type"`
ID int `json:"id"`
Messages []Message `json:"messages"`
}
// Message represents a single message in the Telegram export.
type Message struct {
ID int `json:"id"`
Type string `json:"type"`
Date string `json:"date"`
From string `json:"from"`
Text interface{} `json:"text"`
File string `json:"file"`
ReplyToID int `json:"reply_to_message_id"`
Photo string `json:"photo"`
Width int `json:"width"`
Height int `json:"height"`
ForwardedFrom string `json:"forwarded_from"`
}
// TextEntity represents a formatted part of a message text.
type TextEntity struct {
Type string `json:"type"`
Text string `json:"text"`
Href string `json:"href,omitempty"`
}
// parseText converts the 'text' field (which can be a string or a slice of entities)
// into a Markdown formatted string.
func parseText(text interface{}) string {
switch v := text.(type) {
case string:
return v
case []interface{}:
var builder strings.Builder
for _, item := range v {
switch e := item.(type) {
case string:
builder.WriteString(e)
case map[string]interface{}:
// A simple approach to convert map to TextEntity
var entity TextEntity
if t, ok := e["type"].(string); ok {
entity.Type = t
}
if t, ok := e["text"].(string); ok {
entity.Text = t
}
if h, ok := e["href"].(string); ok {
entity.Href = h
}
switch entity.Type {
case "bold":
builder.WriteString(fmt.Sprintf("**%s**", entity.Text))
case "italic":
builder.WriteString(fmt.Sprintf("*%s*", entity.Text))
case "link", "text_link":
builder.WriteString(fmt.Sprintf("[%s](%s)", entity.Text, entity.Href))
case "pre", "code":
builder.WriteString(fmt.Sprintf("`%s`", entity.Text))
default:
builder.WriteString(entity.Text)
}
}
}
return builder.String()
}
return ""
}
// Parse parses a Telegram export directory and returns a DataNode.
func Parse(path string) (*datanode.DataNode, error) {
jsonPath := filepath.Join(path, "result.json")
jsonBytes, err := os.ReadFile(jsonPath)
if err != nil {
return nil, fmt.Errorf("failed to read result.json: %w", err)
}
var export TelegramExport
if err := json.Unmarshal(jsonBytes, &export); err != nil {
return nil, fmt.Errorf("failed to unmarshal json: %w", err)
}
dn := datanode.New()
channelName := export.Name
// Create INDEX.json
indexData, err := json.MarshalIndent(export, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal index data: %w", err)
}
indexPath := filepath.Join("telegram", channelName, "INDEX.json")
dn.AddData(indexPath, indexData)
messagesByMonth := make(map[string][]Message)
for _, msg := range export.Messages {
if msg.Type != "message" {
continue
}
t, err := time.Parse("2006-01-02T15:04:05", msg.Date)
if err != nil {
continue // Skip messages with invalid date format
}
month := t.Format("2006-01")
messagesByMonth[month] = append(messagesByMonth[month], msg)
}
for month, messages := range messagesByMonth {
sort.Slice(messages, func(i, j int) bool {
return messages[i].Date < messages[j].Date
})
var mdBuilder strings.Builder
for _, msg := range messages {
mdBuilder.WriteString(fmt.Sprintf("### %s from %s (ID: %d)\n\n", msg.Date, msg.From, msg.ID))
if msg.ReplyToID != 0 {
mdBuilder.WriteString(fmt.Sprintf("> Reply to message %d\n\n", msg.ReplyToID))
}
if msg.ForwardedFrom != "" {
mdBuilder.WriteString(fmt.Sprintf("> Forwarded from %s\n\n", msg.ForwardedFrom))
}
text := parseText(msg.Text)
mdBuilder.WriteString(text)
mdBuilder.WriteString("\n\n")
mediaPath := ""
if msg.File != "" {
mediaPath = msg.File
} else if msg.Photo != "" {
mediaPath = msg.Photo
}
if mediaPath != "" {
mdBuilder.WriteString(fmt.Sprintf("![Media](media/%s)\n\n", filepath.Base(mediaPath)))
srcMediaPath := filepath.Join(path, mediaPath)
mediaBytes, err := os.ReadFile(srcMediaPath)
if err == nil {
destMediaPath := filepath.Join("telegram", channelName, "media", filepath.Base(mediaPath))
dn.AddData(destMediaPath, mediaBytes)
}
}
mdBuilder.WriteString("---\n\n")
}
mdPath := filepath.Join("telegram", channelName, "messages", month+".md")
dn.AddData(mdPath, []byte(mdBuilder.String()))
}
return dn, nil
}

View file

@ -0,0 +1,99 @@
package telegram
import (
"io"
"path/filepath"
"strings"
"testing"
)
func TestParse_Good(t *testing.T) {
testDataPath := "testdata"
dn, err := Parse(testDataPath)
if err != nil {
t.Fatalf("Parse() error = %v, wantErr nil", err)
}
if dn == nil {
t.Fatal("Parse() returned a nil DataNode")
}
// Check for INDEX.json
indexPath := "telegram/Test Channel/INDEX.json"
exists, err := dn.Exists(indexPath)
if err != nil {
t.Fatalf("dn.Exists(%q) error: %v", indexPath, err)
}
if !exists {
t.Errorf("Expected file to exist: %s", indexPath)
}
// Check for January messages markdown file
janMessagesPath := "telegram/Test Channel/messages/2024-01.md"
exists, err = dn.Exists(janMessagesPath)
if err != nil {
t.Fatalf("dn.Exists(%q) error: %v", janMessagesPath, err)
}
if !exists {
t.Errorf("Expected file to exist: %s", janMessagesPath)
} else {
// Verify content of the January markdown file
f, err := dn.Open(janMessagesPath)
if err != nil {
t.Fatalf("Failed to open %s: %v", janMessagesPath, err)
}
defer f.Close()
contentBytes, err := io.ReadAll(f)
if err != nil {
t.Fatalf("Failed to read from %s: %v", janMessagesPath, err)
}
content := string(contentBytes)
if !strings.Contains(content, "Hello, world!") {
t.Errorf("Expected to find 'Hello, world!' in %s", janMessagesPath)
}
if !strings.Contains(content, "**This** is a *test* message with formatting.") {
t.Errorf("Expected to find formatted message in %s", janMessagesPath)
}
}
// Check for February messages markdown file
febMessagesPath := "telegram/Test Channel/messages/2024-02.md"
exists, err = dn.Exists(febMessagesPath)
if err != nil {
t.Fatalf("dn.Exists(%q) error: %v", febMessagesPath, err)
}
if !exists {
t.Errorf("Expected file to exist: %s", febMessagesPath)
} else {
f, err := dn.Open(febMessagesPath)
if err != nil {
t.Fatalf("Failed to open %s: %v", febMessagesPath, err)
}
defer f.Close()
contentBytes, err := io.ReadAll(f)
if err != nil {
t.Fatalf("Failed to read from %s: %v", febMessagesPath, err)
}
content := string(contentBytes)
if !strings.Contains(content, "Here is a photo.") {
t.Errorf("Expected to find 'Here is a photo.' in %s", febMessagesPath)
}
}
// Check for media file
mediaFileName := "photo_1@10-02-2024_18-30-00.jpg"
mediaPath := filepath.Join("telegram", "Test Channel", "media", mediaFileName)
mediaPath = filepath.ToSlash(mediaPath) // Ensure cross-platform path separators
exists, err = dn.Exists(mediaPath)
if err != nil {
t.Fatalf("dn.Exists(%q) error: %v", mediaPath, err)
}
if !exists {
t.Errorf("Expected media file to exist: %s", mediaPath)
}
}

View file

@ -0,0 +1 @@
dummy image data

42
pkg/telegram/testdata/result.json vendored Normal file
View file

@ -0,0 +1,42 @@
{
"name": "Test Channel",
"type": "public_channel",
"id": 123456789,
"messages": [
{
"id": 1,
"type": "message",
"date": "2024-01-15T12:00:00",
"from": "User1",
"text": "Hello, world!"
},
{
"id": 2,
"type": "message",
"date": "2024-01-15T12:01:00",
"from": "User2",
"text": [
{
"type": "bold",
"text": "This"
},
" is a ",
{
"type": "italic",
"text": "test"
},
" message with formatting."
]
},
{
"id": 3,
"type": "message",
"date": "2024-02-10T18:30:00",
"from": "User1",
"photo": "photos/photo_1@10-02-2024_18-30-00.jpg",
"width": 800,
"height": 600,
"text": "Here is a photo."
}
]
}