feat: Telegram channel/group export
This commit introduces a new command, `borg collect telegram import`, which allows users to import and archive Telegram channel and group exports. The new functionality includes: - A new `telegram` subcommand under the `collect` command. - An `import` command that parses Telegram Desktop JSON exports. - Conversion of messages to Markdown, with support for rich text formatting, replies, and forwards. - Preservation of media files, which are stored in a `media` subdirectory. - Organization of messages into monthly Markdown files for easy navigation. - Creation of an `INDEX.json` file to store the original export metadata. The feature is accompanied by unit tests to ensure its correctness and reliability. Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
This commit is contained in:
parent
cf2af53ed3
commit
d8bbda85b5
5 changed files with 426 additions and 0 deletions
114
cmd/collect_telegram.go
Normal file
114
cmd/collect_telegram.go
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/Snider/Borg/pkg/compress"
|
||||||
|
"github.com/Snider/Borg/pkg/telegram"
|
||||||
|
"github.com/Snider/Borg/pkg/tim"
|
||||||
|
"github.com/Snider/Borg/pkg/trix"
|
||||||
|
"github.com/Snider/Borg/pkg/ui"
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
)
|
||||||
|
|
||||||
|
var collectTelegramCmd = NewCollectTelegramCmd()
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
GetCollectCmd().AddCommand(GetCollectTelegramCmd())
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetCollectTelegramCmd() *cobra.Command {
|
||||||
|
return collectTelegramCmd
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewCollectTelegramCmd() *cobra.Command {
|
||||||
|
cmd := &cobra.Command{
|
||||||
|
Use: "telegram",
|
||||||
|
Short: "Collect a Telegram export",
|
||||||
|
Long: `Collect a Telegram export and store it in a DataNode.`,
|
||||||
|
}
|
||||||
|
cmd.AddCommand(NewCollectTelegramImportCmd())
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewCollectTelegramImportCmd() *cobra.Command {
|
||||||
|
cmd := &cobra.Command{
|
||||||
|
Use: "import [path]",
|
||||||
|
Short: "Import a Telegram export",
|
||||||
|
Long: `Import a Telegram export and store it in a DataNode.`,
|
||||||
|
Args: cobra.ExactArgs(1),
|
||||||
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
|
exportPath := args[0]
|
||||||
|
outputFile, _ := cmd.Flags().GetString("output")
|
||||||
|
format, _ := cmd.Flags().GetString("format")
|
||||||
|
compression, _ := cmd.Flags().GetString("compression")
|
||||||
|
password, _ := cmd.Flags().GetString("password")
|
||||||
|
|
||||||
|
if format != "datanode" && format != "tim" && format != "trix" {
|
||||||
|
return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', or 'trix')", format)
|
||||||
|
}
|
||||||
|
|
||||||
|
prompter := ui.NewNonInteractivePrompter(ui.GetWebsiteQuote)
|
||||||
|
prompter.Start()
|
||||||
|
defer prompter.Stop()
|
||||||
|
|
||||||
|
dn, err := telegram.Parse(exportPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error parsing telegram export: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if dn == nil {
|
||||||
|
return fmt.Errorf("parsing telegram export resulted in an empty datanode")
|
||||||
|
}
|
||||||
|
|
||||||
|
var data []byte
|
||||||
|
switch format {
|
||||||
|
case "tim":
|
||||||
|
t, err := tim.FromDataNode(dn)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error creating tim: %w", err)
|
||||||
|
}
|
||||||
|
data, err = t.ToTar()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error serializing tim: %w", err)
|
||||||
|
}
|
||||||
|
case "trix":
|
||||||
|
data, err = trix.ToTrix(dn, password)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error serializing trix: %w", err)
|
||||||
|
}
|
||||||
|
default: // datanode
|
||||||
|
data, err = dn.ToTar()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error serializing DataNode: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
compressedData, err := compress.Compress(data, compression)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error compressing data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if outputFile == "" {
|
||||||
|
outputFile = "telegram." + format
|
||||||
|
if compression != "none" {
|
||||||
|
outputFile += "." + compression
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = os.WriteFile(outputFile, compressedData, 0644)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error writing telegram export to file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintln(cmd.OutOrStdout(), "Telegram export saved to", outputFile)
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cmd.Flags().String("output", "", "Output file for the DataNode")
|
||||||
|
cmd.Flags().String("format", "datanode", "Output format (datanode, tim, or trix)")
|
||||||
|
cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
|
||||||
|
cmd.Flags().String("password", "", "Password for encryption")
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
170
pkg/telegram/telegram.go
Normal file
170
pkg/telegram/telegram.go
Normal file
|
|
@ -0,0 +1,170 @@
|
||||||
|
package telegram
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/Snider/Borg/pkg/datanode"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TelegramExport represents the overall structure of the Telegram JSON export.
|
||||||
|
type TelegramExport struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
ID int `json:"id"`
|
||||||
|
Messages []Message `json:"messages"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Message represents a single message in the Telegram export.
|
||||||
|
type Message struct {
|
||||||
|
ID int `json:"id"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
Date string `json:"date"`
|
||||||
|
From string `json:"from"`
|
||||||
|
Text interface{} `json:"text"`
|
||||||
|
File string `json:"file"`
|
||||||
|
ReplyToID int `json:"reply_to_message_id"`
|
||||||
|
Photo string `json:"photo"`
|
||||||
|
Width int `json:"width"`
|
||||||
|
Height int `json:"height"`
|
||||||
|
ForwardedFrom string `json:"forwarded_from"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// TextEntity represents a formatted part of a message text.
|
||||||
|
type TextEntity struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Text string `json:"text"`
|
||||||
|
Href string `json:"href,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseText converts the 'text' field (which can be a string or a slice of entities)
|
||||||
|
// into a Markdown formatted string.
|
||||||
|
func parseText(text interface{}) string {
|
||||||
|
switch v := text.(type) {
|
||||||
|
case string:
|
||||||
|
return v
|
||||||
|
case []interface{}:
|
||||||
|
var builder strings.Builder
|
||||||
|
for _, item := range v {
|
||||||
|
switch e := item.(type) {
|
||||||
|
case string:
|
||||||
|
builder.WriteString(e)
|
||||||
|
case map[string]interface{}:
|
||||||
|
// A simple approach to convert map to TextEntity
|
||||||
|
var entity TextEntity
|
||||||
|
if t, ok := e["type"].(string); ok {
|
||||||
|
entity.Type = t
|
||||||
|
}
|
||||||
|
if t, ok := e["text"].(string); ok {
|
||||||
|
entity.Text = t
|
||||||
|
}
|
||||||
|
if h, ok := e["href"].(string); ok {
|
||||||
|
entity.Href = h
|
||||||
|
}
|
||||||
|
|
||||||
|
switch entity.Type {
|
||||||
|
case "bold":
|
||||||
|
builder.WriteString(fmt.Sprintf("**%s**", entity.Text))
|
||||||
|
case "italic":
|
||||||
|
builder.WriteString(fmt.Sprintf("*%s*", entity.Text))
|
||||||
|
case "link", "text_link":
|
||||||
|
builder.WriteString(fmt.Sprintf("[%s](%s)", entity.Text, entity.Href))
|
||||||
|
case "pre", "code":
|
||||||
|
builder.WriteString(fmt.Sprintf("`%s`", entity.Text))
|
||||||
|
default:
|
||||||
|
builder.WriteString(entity.Text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return builder.String()
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse parses a Telegram export directory and returns a DataNode.
|
||||||
|
func Parse(path string) (*datanode.DataNode, error) {
|
||||||
|
jsonPath := filepath.Join(path, "result.json")
|
||||||
|
jsonBytes, err := os.ReadFile(jsonPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read result.json: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var export TelegramExport
|
||||||
|
if err := json.Unmarshal(jsonBytes, &export); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal json: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
dn := datanode.New()
|
||||||
|
channelName := export.Name
|
||||||
|
|
||||||
|
// Create INDEX.json
|
||||||
|
indexData, err := json.MarshalIndent(export, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to marshal index data: %w", err)
|
||||||
|
}
|
||||||
|
indexPath := filepath.Join("telegram", channelName, "INDEX.json")
|
||||||
|
dn.AddData(indexPath, indexData)
|
||||||
|
|
||||||
|
messagesByMonth := make(map[string][]Message)
|
||||||
|
for _, msg := range export.Messages {
|
||||||
|
if msg.Type != "message" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
t, err := time.Parse("2006-01-02T15:04:05", msg.Date)
|
||||||
|
if err != nil {
|
||||||
|
continue // Skip messages with invalid date format
|
||||||
|
}
|
||||||
|
month := t.Format("2006-01")
|
||||||
|
messagesByMonth[month] = append(messagesByMonth[month], msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
for month, messages := range messagesByMonth {
|
||||||
|
sort.Slice(messages, func(i, j int) bool {
|
||||||
|
return messages[i].Date < messages[j].Date
|
||||||
|
})
|
||||||
|
|
||||||
|
var mdBuilder strings.Builder
|
||||||
|
for _, msg := range messages {
|
||||||
|
mdBuilder.WriteString(fmt.Sprintf("### %s from %s (ID: %d)\n\n", msg.Date, msg.From, msg.ID))
|
||||||
|
if msg.ReplyToID != 0 {
|
||||||
|
mdBuilder.WriteString(fmt.Sprintf("> Reply to message %d\n\n", msg.ReplyToID))
|
||||||
|
}
|
||||||
|
if msg.ForwardedFrom != "" {
|
||||||
|
mdBuilder.WriteString(fmt.Sprintf("> Forwarded from %s\n\n", msg.ForwardedFrom))
|
||||||
|
}
|
||||||
|
|
||||||
|
text := parseText(msg.Text)
|
||||||
|
mdBuilder.WriteString(text)
|
||||||
|
mdBuilder.WriteString("\n\n")
|
||||||
|
|
||||||
|
mediaPath := ""
|
||||||
|
if msg.File != "" {
|
||||||
|
mediaPath = msg.File
|
||||||
|
} else if msg.Photo != "" {
|
||||||
|
mediaPath = msg.Photo
|
||||||
|
}
|
||||||
|
|
||||||
|
if mediaPath != "" {
|
||||||
|
mdBuilder.WriteString(fmt.Sprintf("\n\n", filepath.Base(mediaPath)))
|
||||||
|
|
||||||
|
srcMediaPath := filepath.Join(path, mediaPath)
|
||||||
|
mediaBytes, err := os.ReadFile(srcMediaPath)
|
||||||
|
if err == nil {
|
||||||
|
destMediaPath := filepath.Join("telegram", channelName, "media", filepath.Base(mediaPath))
|
||||||
|
dn.AddData(destMediaPath, mediaBytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mdBuilder.WriteString("---\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
mdPath := filepath.Join("telegram", channelName, "messages", month+".md")
|
||||||
|
dn.AddData(mdPath, []byte(mdBuilder.String()))
|
||||||
|
}
|
||||||
|
|
||||||
|
return dn, nil
|
||||||
|
}
|
||||||
99
pkg/telegram/telegram_test.go
Normal file
99
pkg/telegram/telegram_test.go
Normal file
|
|
@ -0,0 +1,99 @@
|
||||||
|
package telegram
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParse_Good(t *testing.T) {
|
||||||
|
testDataPath := "testdata"
|
||||||
|
|
||||||
|
dn, err := Parse(testDataPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Parse() error = %v, wantErr nil", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if dn == nil {
|
||||||
|
t.Fatal("Parse() returned a nil DataNode")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for INDEX.json
|
||||||
|
indexPath := "telegram/Test Channel/INDEX.json"
|
||||||
|
exists, err := dn.Exists(indexPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("dn.Exists(%q) error: %v", indexPath, err)
|
||||||
|
}
|
||||||
|
if !exists {
|
||||||
|
t.Errorf("Expected file to exist: %s", indexPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for January messages markdown file
|
||||||
|
janMessagesPath := "telegram/Test Channel/messages/2024-01.md"
|
||||||
|
exists, err = dn.Exists(janMessagesPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("dn.Exists(%q) error: %v", janMessagesPath, err)
|
||||||
|
}
|
||||||
|
if !exists {
|
||||||
|
t.Errorf("Expected file to exist: %s", janMessagesPath)
|
||||||
|
} else {
|
||||||
|
// Verify content of the January markdown file
|
||||||
|
f, err := dn.Open(janMessagesPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to open %s: %v", janMessagesPath, err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
contentBytes, err := io.ReadAll(f)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to read from %s: %v", janMessagesPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
content := string(contentBytes)
|
||||||
|
if !strings.Contains(content, "Hello, world!") {
|
||||||
|
t.Errorf("Expected to find 'Hello, world!' in %s", janMessagesPath)
|
||||||
|
}
|
||||||
|
if !strings.Contains(content, "**This** is a *test* message with formatting.") {
|
||||||
|
t.Errorf("Expected to find formatted message in %s", janMessagesPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for February messages markdown file
|
||||||
|
febMessagesPath := "telegram/Test Channel/messages/2024-02.md"
|
||||||
|
exists, err = dn.Exists(febMessagesPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("dn.Exists(%q) error: %v", febMessagesPath, err)
|
||||||
|
}
|
||||||
|
if !exists {
|
||||||
|
t.Errorf("Expected file to exist: %s", febMessagesPath)
|
||||||
|
} else {
|
||||||
|
f, err := dn.Open(febMessagesPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to open %s: %v", febMessagesPath, err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
contentBytes, err := io.ReadAll(f)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to read from %s: %v", febMessagesPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
content := string(contentBytes)
|
||||||
|
if !strings.Contains(content, "Here is a photo.") {
|
||||||
|
t.Errorf("Expected to find 'Here is a photo.' in %s", febMessagesPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for media file
|
||||||
|
mediaFileName := "photo_1@10-02-2024_18-30-00.jpg"
|
||||||
|
mediaPath := filepath.Join("telegram", "Test Channel", "media", mediaFileName)
|
||||||
|
mediaPath = filepath.ToSlash(mediaPath) // Ensure cross-platform path separators
|
||||||
|
exists, err = dn.Exists(mediaPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("dn.Exists(%q) error: %v", mediaPath, err)
|
||||||
|
}
|
||||||
|
if !exists {
|
||||||
|
t.Errorf("Expected media file to exist: %s", mediaPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
1
pkg/telegram/testdata/photos/photo_1@10-02-2024_18-30-00.jpg
vendored
Normal file
1
pkg/telegram/testdata/photos/photo_1@10-02-2024_18-30-00.jpg
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
dummy image data
|
||||||
42
pkg/telegram/testdata/result.json
vendored
Normal file
42
pkg/telegram/testdata/result.json
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
{
|
||||||
|
"name": "Test Channel",
|
||||||
|
"type": "public_channel",
|
||||||
|
"id": 123456789,
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "message",
|
||||||
|
"date": "2024-01-15T12:00:00",
|
||||||
|
"from": "User1",
|
||||||
|
"text": "Hello, world!"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "message",
|
||||||
|
"date": "2024-01-15T12:01:00",
|
||||||
|
"from": "User2",
|
||||||
|
"text": [
|
||||||
|
{
|
||||||
|
"type": "bold",
|
||||||
|
"text": "This"
|
||||||
|
},
|
||||||
|
" is a ",
|
||||||
|
{
|
||||||
|
"type": "italic",
|
||||||
|
"text": "test"
|
||||||
|
},
|
||||||
|
" message with formatting."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "message",
|
||||||
|
"date": "2024-02-10T18:30:00",
|
||||||
|
"from": "User1",
|
||||||
|
"photo": "photos/photo_1@10-02-2024_18-30-00.jpg",
|
||||||
|
"width": 800,
|
||||||
|
"height": 600,
|
||||||
|
"text": "Here is a photo."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue