Compare commits
1 commit
main
...
feat-teleg
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d8bbda85b5 |
5 changed files with 426 additions and 0 deletions
114
cmd/collect_telegram.go
Normal file
114
cmd/collect_telegram.go
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/Snider/Borg/pkg/compress"
|
||||
"github.com/Snider/Borg/pkg/telegram"
|
||||
"github.com/Snider/Borg/pkg/tim"
|
||||
"github.com/Snider/Borg/pkg/trix"
|
||||
"github.com/Snider/Borg/pkg/ui"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var collectTelegramCmd = NewCollectTelegramCmd()
|
||||
|
||||
func init() {
|
||||
GetCollectCmd().AddCommand(GetCollectTelegramCmd())
|
||||
}
|
||||
|
||||
func GetCollectTelegramCmd() *cobra.Command {
|
||||
return collectTelegramCmd
|
||||
}
|
||||
|
||||
func NewCollectTelegramCmd() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "telegram",
|
||||
Short: "Collect a Telegram export",
|
||||
Long: `Collect a Telegram export and store it in a DataNode.`,
|
||||
}
|
||||
cmd.AddCommand(NewCollectTelegramImportCmd())
|
||||
return cmd
|
||||
}
|
||||
|
||||
func NewCollectTelegramImportCmd() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "import [path]",
|
||||
Short: "Import a Telegram export",
|
||||
Long: `Import a Telegram export and store it in a DataNode.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
exportPath := args[0]
|
||||
outputFile, _ := cmd.Flags().GetString("output")
|
||||
format, _ := cmd.Flags().GetString("format")
|
||||
compression, _ := cmd.Flags().GetString("compression")
|
||||
password, _ := cmd.Flags().GetString("password")
|
||||
|
||||
if format != "datanode" && format != "tim" && format != "trix" {
|
||||
return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', or 'trix')", format)
|
||||
}
|
||||
|
||||
prompter := ui.NewNonInteractivePrompter(ui.GetWebsiteQuote)
|
||||
prompter.Start()
|
||||
defer prompter.Stop()
|
||||
|
||||
dn, err := telegram.Parse(exportPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing telegram export: %w", err)
|
||||
}
|
||||
|
||||
if dn == nil {
|
||||
return fmt.Errorf("parsing telegram export resulted in an empty datanode")
|
||||
}
|
||||
|
||||
var data []byte
|
||||
switch format {
|
||||
case "tim":
|
||||
t, err := tim.FromDataNode(dn)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating tim: %w", err)
|
||||
}
|
||||
data, err = t.ToTar()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error serializing tim: %w", err)
|
||||
}
|
||||
case "trix":
|
||||
data, err = trix.ToTrix(dn, password)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error serializing trix: %w", err)
|
||||
}
|
||||
default: // datanode
|
||||
data, err = dn.ToTar()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error serializing DataNode: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
compressedData, err := compress.Compress(data, compression)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error compressing data: %w", err)
|
||||
}
|
||||
|
||||
if outputFile == "" {
|
||||
outputFile = "telegram." + format
|
||||
if compression != "none" {
|
||||
outputFile += "." + compression
|
||||
}
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, compressedData, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error writing telegram export to file: %w", err)
|
||||
}
|
||||
|
||||
fmt.Fprintln(cmd.OutOrStdout(), "Telegram export saved to", outputFile)
|
||||
return nil
|
||||
},
|
||||
}
|
||||
cmd.Flags().String("output", "", "Output file for the DataNode")
|
||||
cmd.Flags().String("format", "datanode", "Output format (datanode, tim, or trix)")
|
||||
cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
|
||||
cmd.Flags().String("password", "", "Password for encryption")
|
||||
return cmd
|
||||
}
|
||||
170
pkg/telegram/telegram.go
Normal file
170
pkg/telegram/telegram.go
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
package telegram
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
)
|
||||
|
||||
// TelegramExport represents the overall structure of the Telegram JSON export.
|
||||
type TelegramExport struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
ID int `json:"id"`
|
||||
Messages []Message `json:"messages"`
|
||||
}
|
||||
|
||||
// Message represents a single message in the Telegram export.
|
||||
type Message struct {
|
||||
ID int `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Date string `json:"date"`
|
||||
From string `json:"from"`
|
||||
Text interface{} `json:"text"`
|
||||
File string `json:"file"`
|
||||
ReplyToID int `json:"reply_to_message_id"`
|
||||
Photo string `json:"photo"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
ForwardedFrom string `json:"forwarded_from"`
|
||||
}
|
||||
|
||||
// TextEntity represents a formatted part of a message text.
|
||||
type TextEntity struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text"`
|
||||
Href string `json:"href,omitempty"`
|
||||
}
|
||||
|
||||
// parseText converts the 'text' field (which can be a string or a slice of entities)
|
||||
// into a Markdown formatted string.
|
||||
func parseText(text interface{}) string {
|
||||
switch v := text.(type) {
|
||||
case string:
|
||||
return v
|
||||
case []interface{}:
|
||||
var builder strings.Builder
|
||||
for _, item := range v {
|
||||
switch e := item.(type) {
|
||||
case string:
|
||||
builder.WriteString(e)
|
||||
case map[string]interface{}:
|
||||
// A simple approach to convert map to TextEntity
|
||||
var entity TextEntity
|
||||
if t, ok := e["type"].(string); ok {
|
||||
entity.Type = t
|
||||
}
|
||||
if t, ok := e["text"].(string); ok {
|
||||
entity.Text = t
|
||||
}
|
||||
if h, ok := e["href"].(string); ok {
|
||||
entity.Href = h
|
||||
}
|
||||
|
||||
switch entity.Type {
|
||||
case "bold":
|
||||
builder.WriteString(fmt.Sprintf("**%s**", entity.Text))
|
||||
case "italic":
|
||||
builder.WriteString(fmt.Sprintf("*%s*", entity.Text))
|
||||
case "link", "text_link":
|
||||
builder.WriteString(fmt.Sprintf("[%s](%s)", entity.Text, entity.Href))
|
||||
case "pre", "code":
|
||||
builder.WriteString(fmt.Sprintf("`%s`", entity.Text))
|
||||
default:
|
||||
builder.WriteString(entity.Text)
|
||||
}
|
||||
}
|
||||
}
|
||||
return builder.String()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Parse parses a Telegram export directory and returns a DataNode.
|
||||
func Parse(path string) (*datanode.DataNode, error) {
|
||||
jsonPath := filepath.Join(path, "result.json")
|
||||
jsonBytes, err := os.ReadFile(jsonPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read result.json: %w", err)
|
||||
}
|
||||
|
||||
var export TelegramExport
|
||||
if err := json.Unmarshal(jsonBytes, &export); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal json: %w", err)
|
||||
}
|
||||
|
||||
dn := datanode.New()
|
||||
channelName := export.Name
|
||||
|
||||
// Create INDEX.json
|
||||
indexData, err := json.MarshalIndent(export, "", " ")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal index data: %w", err)
|
||||
}
|
||||
indexPath := filepath.Join("telegram", channelName, "INDEX.json")
|
||||
dn.AddData(indexPath, indexData)
|
||||
|
||||
messagesByMonth := make(map[string][]Message)
|
||||
for _, msg := range export.Messages {
|
||||
if msg.Type != "message" {
|
||||
continue
|
||||
}
|
||||
t, err := time.Parse("2006-01-02T15:04:05", msg.Date)
|
||||
if err != nil {
|
||||
continue // Skip messages with invalid date format
|
||||
}
|
||||
month := t.Format("2006-01")
|
||||
messagesByMonth[month] = append(messagesByMonth[month], msg)
|
||||
}
|
||||
|
||||
for month, messages := range messagesByMonth {
|
||||
sort.Slice(messages, func(i, j int) bool {
|
||||
return messages[i].Date < messages[j].Date
|
||||
})
|
||||
|
||||
var mdBuilder strings.Builder
|
||||
for _, msg := range messages {
|
||||
mdBuilder.WriteString(fmt.Sprintf("### %s from %s (ID: %d)\n\n", msg.Date, msg.From, msg.ID))
|
||||
if msg.ReplyToID != 0 {
|
||||
mdBuilder.WriteString(fmt.Sprintf("> Reply to message %d\n\n", msg.ReplyToID))
|
||||
}
|
||||
if msg.ForwardedFrom != "" {
|
||||
mdBuilder.WriteString(fmt.Sprintf("> Forwarded from %s\n\n", msg.ForwardedFrom))
|
||||
}
|
||||
|
||||
text := parseText(msg.Text)
|
||||
mdBuilder.WriteString(text)
|
||||
mdBuilder.WriteString("\n\n")
|
||||
|
||||
mediaPath := ""
|
||||
if msg.File != "" {
|
||||
mediaPath = msg.File
|
||||
} else if msg.Photo != "" {
|
||||
mediaPath = msg.Photo
|
||||
}
|
||||
|
||||
if mediaPath != "" {
|
||||
mdBuilder.WriteString(fmt.Sprintf("\n\n", filepath.Base(mediaPath)))
|
||||
|
||||
srcMediaPath := filepath.Join(path, mediaPath)
|
||||
mediaBytes, err := os.ReadFile(srcMediaPath)
|
||||
if err == nil {
|
||||
destMediaPath := filepath.Join("telegram", channelName, "media", filepath.Base(mediaPath))
|
||||
dn.AddData(destMediaPath, mediaBytes)
|
||||
}
|
||||
}
|
||||
mdBuilder.WriteString("---\n\n")
|
||||
}
|
||||
|
||||
mdPath := filepath.Join("telegram", channelName, "messages", month+".md")
|
||||
dn.AddData(mdPath, []byte(mdBuilder.String()))
|
||||
}
|
||||
|
||||
return dn, nil
|
||||
}
|
||||
99
pkg/telegram/telegram_test.go
Normal file
99
pkg/telegram/telegram_test.go
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
package telegram
|
||||
|
||||
import (
|
||||
"io"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParse_Good(t *testing.T) {
|
||||
testDataPath := "testdata"
|
||||
|
||||
dn, err := Parse(testDataPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Parse() error = %v, wantErr nil", err)
|
||||
}
|
||||
|
||||
if dn == nil {
|
||||
t.Fatal("Parse() returned a nil DataNode")
|
||||
}
|
||||
|
||||
// Check for INDEX.json
|
||||
indexPath := "telegram/Test Channel/INDEX.json"
|
||||
exists, err := dn.Exists(indexPath)
|
||||
if err != nil {
|
||||
t.Fatalf("dn.Exists(%q) error: %v", indexPath, err)
|
||||
}
|
||||
if !exists {
|
||||
t.Errorf("Expected file to exist: %s", indexPath)
|
||||
}
|
||||
|
||||
// Check for January messages markdown file
|
||||
janMessagesPath := "telegram/Test Channel/messages/2024-01.md"
|
||||
exists, err = dn.Exists(janMessagesPath)
|
||||
if err != nil {
|
||||
t.Fatalf("dn.Exists(%q) error: %v", janMessagesPath, err)
|
||||
}
|
||||
if !exists {
|
||||
t.Errorf("Expected file to exist: %s", janMessagesPath)
|
||||
} else {
|
||||
// Verify content of the January markdown file
|
||||
f, err := dn.Open(janMessagesPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open %s: %v", janMessagesPath, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
contentBytes, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read from %s: %v", janMessagesPath, err)
|
||||
}
|
||||
|
||||
content := string(contentBytes)
|
||||
if !strings.Contains(content, "Hello, world!") {
|
||||
t.Errorf("Expected to find 'Hello, world!' in %s", janMessagesPath)
|
||||
}
|
||||
if !strings.Contains(content, "**This** is a *test* message with formatting.") {
|
||||
t.Errorf("Expected to find formatted message in %s", janMessagesPath)
|
||||
}
|
||||
}
|
||||
|
||||
// Check for February messages markdown file
|
||||
febMessagesPath := "telegram/Test Channel/messages/2024-02.md"
|
||||
exists, err = dn.Exists(febMessagesPath)
|
||||
if err != nil {
|
||||
t.Fatalf("dn.Exists(%q) error: %v", febMessagesPath, err)
|
||||
}
|
||||
if !exists {
|
||||
t.Errorf("Expected file to exist: %s", febMessagesPath)
|
||||
} else {
|
||||
f, err := dn.Open(febMessagesPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open %s: %v", febMessagesPath, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
contentBytes, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read from %s: %v", febMessagesPath, err)
|
||||
}
|
||||
|
||||
content := string(contentBytes)
|
||||
if !strings.Contains(content, "Here is a photo.") {
|
||||
t.Errorf("Expected to find 'Here is a photo.' in %s", febMessagesPath)
|
||||
}
|
||||
}
|
||||
|
||||
// Check for media file
|
||||
mediaFileName := "photo_1@10-02-2024_18-30-00.jpg"
|
||||
mediaPath := filepath.Join("telegram", "Test Channel", "media", mediaFileName)
|
||||
mediaPath = filepath.ToSlash(mediaPath) // Ensure cross-platform path separators
|
||||
exists, err = dn.Exists(mediaPath)
|
||||
if err != nil {
|
||||
t.Fatalf("dn.Exists(%q) error: %v", mediaPath, err)
|
||||
}
|
||||
if !exists {
|
||||
t.Errorf("Expected media file to exist: %s", mediaPath)
|
||||
}
|
||||
}
|
||||
1
pkg/telegram/testdata/photos/photo_1@10-02-2024_18-30-00.jpg
vendored
Normal file
1
pkg/telegram/testdata/photos/photo_1@10-02-2024_18-30-00.jpg
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
dummy image data
|
||||
42
pkg/telegram/testdata/result.json
vendored
Normal file
42
pkg/telegram/testdata/result.json
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"name": "Test Channel",
|
||||
"type": "public_channel",
|
||||
"id": 123456789,
|
||||
"messages": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "message",
|
||||
"date": "2024-01-15T12:00:00",
|
||||
"from": "User1",
|
||||
"text": "Hello, world!"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "message",
|
||||
"date": "2024-01-15T12:01:00",
|
||||
"from": "User2",
|
||||
"text": [
|
||||
{
|
||||
"type": "bold",
|
||||
"text": "This"
|
||||
},
|
||||
" is a ",
|
||||
{
|
||||
"type": "italic",
|
||||
"text": "test"
|
||||
},
|
||||
" message with formatting."
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "message",
|
||||
"date": "2024-02-10T18:30:00",
|
||||
"from": "User1",
|
||||
"photo": "photos/photo_1@10-02-2024_18-30-00.jpg",
|
||||
"width": 800,
|
||||
"height": 600,
|
||||
"text": "Here is a photo."
|
||||
}
|
||||
]
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue