refactor(collect): replace bash scripts with Borg CLI
Remove custom collection scripts in favour of Borg (github.com/Snider/Borg) for data collection. Skills now document what to collect, with Borg handling the actual collection. Removed: - collect-whitepaper.sh, dispatch.sh, update-index.sh - All skill-specific bash scripts (collect.sh, generate-jobs.sh, etc.) - hooks.json and HOOKS.md Updated: - plugin.json to reference Borg dependency - SKILL.md files with Borg command examples Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
35260ed49e
commit
3c3d3de1a1
20 changed files with 108 additions and 2760 deletions
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "collect",
|
||||
"description": "Data collection skills for cryptocurrency research - whitepapers, forum archives, project archaeology, and blockchain history",
|
||||
"description": "Data collection skills using Borg CLI - whitepapers, forum archives, project archaeology, and blockchain history",
|
||||
"version": "0.1.0",
|
||||
"author": {
|
||||
"name": "Host UK",
|
||||
|
|
@ -14,10 +14,14 @@
|
|||
"license": "EUPL-1.2",
|
||||
"keywords": [
|
||||
"data-collection",
|
||||
"borg",
|
||||
"cryptocurrency",
|
||||
"archive",
|
||||
"whitepapers",
|
||||
"blockchain",
|
||||
"research"
|
||||
]
|
||||
],
|
||||
"dependencies": {
|
||||
"borg": "github.com/Snider/Borg@v0.1.0"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,90 +0,0 @@
|
|||
# Collection Hooks
|
||||
|
||||
Event-driven hooks that trigger during data collection.
|
||||
|
||||
## Available Hooks
|
||||
|
||||
| Hook | Trigger | Purpose |
|
||||
|------|---------|---------|
|
||||
| `collect-whitepaper.sh` | PDF/paper URL detected | Auto-queue whitepapers |
|
||||
| `on-github-release.sh` | Release found | Archive release metadata |
|
||||
| `on-explorer-block.sh` | Block data fetched | Index blockchain data |
|
||||
|
||||
## Hook Events
|
||||
|
||||
### `on_url_found`
|
||||
Fired when a new URL is discovered during collection.
|
||||
|
||||
```bash
|
||||
# Pattern matching
|
||||
*.pdf → collect-whitepaper.sh
|
||||
*/releases/* → on-github-release.sh
|
||||
*/api/block/* → on-explorer-block.sh
|
||||
```
|
||||
|
||||
### `on_file_collected`
|
||||
Fired after a file is successfully downloaded.
|
||||
|
||||
```bash
|
||||
# Post-processing
|
||||
*.json → validate-json.sh
|
||||
*.html → extract-links.sh
|
||||
*.pdf → extract-metadata.sh
|
||||
```
|
||||
|
||||
### `on_collection_complete`
|
||||
Fired when a job batch finishes.
|
||||
|
||||
```bash
|
||||
# Reporting
|
||||
→ generate-index.sh
|
||||
→ update-registry.sh
|
||||
```
|
||||
|
||||
## Plugin Integration
|
||||
|
||||
For the marketplace plugin system:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "whitepaper-collector",
|
||||
"version": "1.0.0",
|
||||
"hooks": {
|
||||
"on_url_found": {
|
||||
"pattern": "*.pdf",
|
||||
"handler": "./collect-whitepaper.sh"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Registration
|
||||
|
||||
Hooks register in `hooks.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"on_url_found": [
|
||||
{
|
||||
"pattern": "\\.pdf$",
|
||||
"handler": "./hooks/collect-whitepaper.sh",
|
||||
"priority": 10
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Usage in Collectors
|
||||
|
||||
Collectors call hooks via:
|
||||
|
||||
```bash
|
||||
# In job-collector/process.sh
|
||||
source ./hooks/dispatch.sh
|
||||
|
||||
# When URL found
|
||||
dispatch_hook "on_url_found" "$URL"
|
||||
|
||||
# When file collected
|
||||
dispatch_hook "on_file_collected" "$FILE" "$TYPE"
|
||||
```
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Hook: collect-whitepaper.sh
|
||||
# Called when a whitepaper URL is detected during collection
|
||||
# Usage: ./collect-whitepaper.sh <URL> [destination-folder]
|
||||
|
||||
set -e
|
||||
|
||||
URL="$1"
|
||||
DEST="${2:-./whitepapers}"
|
||||
|
||||
if [ -z "$URL" ]; then
|
||||
echo "Usage: $0 <url> [destination]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Detect paper type from URL
|
||||
detect_category() {
|
||||
local url="$1"
|
||||
case "$url" in
|
||||
*cryptonote*) echo "cryptonote" ;;
|
||||
*iacr.org*|*eprint*) echo "research" ;;
|
||||
*arxiv.org*) echo "research" ;;
|
||||
*monero*|*getmonero*) echo "research" ;;
|
||||
*lethean*|*lthn*) echo "lethean" ;;
|
||||
*) echo "uncategorized" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Generate safe filename from URL
|
||||
safe_filename() {
|
||||
local url="$1"
|
||||
basename "$url" | sed 's/[^a-zA-Z0-9._-]/-/g'
|
||||
}
|
||||
|
||||
CATEGORY=$(detect_category "$URL")
|
||||
FILENAME=$(safe_filename "$URL")
|
||||
TARGET_DIR="$DEST/$CATEGORY"
|
||||
TARGET_FILE="$TARGET_DIR/$FILENAME"
|
||||
|
||||
mkdir -p "$TARGET_DIR"
|
||||
|
||||
# Check if already collected
|
||||
if [ -f "$TARGET_FILE" ]; then
|
||||
echo "Already collected: $TARGET_FILE"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Collecting whitepaper:"
|
||||
echo " URL: $URL"
|
||||
echo " Category: $CATEGORY"
|
||||
echo " Destination: $TARGET_FILE"
|
||||
|
||||
# Create job entry for proxy collection
|
||||
echo "$URL|$FILENAME|whitepaper|category=$CATEGORY" >> "$DEST/.pending-jobs.txt"
|
||||
|
||||
echo "Job queued: $DEST/.pending-jobs.txt"
|
||||
echo ""
|
||||
echo "To collect immediately (if you have direct access):"
|
||||
echo " curl -L -o '$TARGET_FILE' '$URL'"
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Hook dispatcher - source this in collectors
|
||||
# Usage: source ./hooks/dispatch.sh
|
||||
|
||||
HOOKS_DIR="$(dirname "${BASH_SOURCE[0]}")"
|
||||
HOOKS_JSON="$HOOKS_DIR/hooks.json"
|
||||
|
||||
# Dispatch a hook event
|
||||
# dispatch_hook <event> <arg1> [arg2] ...
|
||||
dispatch_hook() {
|
||||
local event="$1"
|
||||
shift
|
||||
local args=("$@")
|
||||
|
||||
if [ ! -f "$HOOKS_JSON" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Get handlers for this event (requires jq)
|
||||
if ! command -v jq &> /dev/null; then
|
||||
echo "Warning: jq not installed, hooks disabled" >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
local handlers
|
||||
handlers=$(jq -r ".hooks[\"$event\"][]? | select(.enabled == true) | @json" "$HOOKS_JSON" 2>/dev/null)
|
||||
|
||||
if [ -z "$handlers" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "$handlers" | while read -r handler_json; do
|
||||
local name pattern handler_script priority
|
||||
name=$(echo "$handler_json" | jq -r '.name')
|
||||
pattern=$(echo "$handler_json" | jq -r '.pattern // ""')
|
||||
handler_script=$(echo "$handler_json" | jq -r '.handler')
|
||||
|
||||
# Check pattern match if pattern exists
|
||||
if [ -n "$pattern" ] && [ -n "${args[0]}" ]; then
|
||||
if ! echo "${args[0]}" | grep -qE "$pattern"; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# Execute handler
|
||||
local full_path="$HOOKS_DIR/$handler_script"
|
||||
if [ -x "$full_path" ]; then
|
||||
echo "[hook] $name: ${args[*]}" >&2
|
||||
"$full_path" "${args[@]}"
|
||||
elif [ -f "$full_path" ]; then
|
||||
echo "[hook] $name: ${args[*]}" >&2
|
||||
bash "$full_path" "${args[@]}"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Register a new hook dynamically
|
||||
# register_hook <event> <name> <pattern> <handler>
|
||||
register_hook() {
|
||||
local event="$1"
|
||||
local name="$2"
|
||||
local pattern="$3"
|
||||
local handler="$4"
|
||||
|
||||
if ! command -v jq &> /dev/null; then
|
||||
echo "Error: jq required for hook registration" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
local new_hook
|
||||
new_hook=$(jq -n \
|
||||
--arg name "$name" \
|
||||
--arg pattern "$pattern" \
|
||||
--arg handler "$handler" \
|
||||
'{name: $name, pattern: $pattern, handler: $handler, priority: 50, enabled: true}')
|
||||
|
||||
# Add to hooks.json
|
||||
jq ".hooks[\"$event\"] += [$new_hook]" "$HOOKS_JSON" > "$HOOKS_JSON.tmp" \
|
||||
&& mv "$HOOKS_JSON.tmp" "$HOOKS_JSON"
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
{
|
||||
"version": "1.0.0",
|
||||
"hooks": {
|
||||
"on_url_found": [
|
||||
{
|
||||
"name": "whitepaper-collector",
|
||||
"pattern": "\\.pdf$",
|
||||
"handler": "./collect-whitepaper.sh",
|
||||
"priority": 10,
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
"name": "whitepaper-iacr",
|
||||
"pattern": "eprint\\.iacr\\.org",
|
||||
"handler": "./collect-whitepaper.sh",
|
||||
"priority": 10,
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
"name": "whitepaper-arxiv",
|
||||
"pattern": "arxiv\\.org",
|
||||
"handler": "./collect-whitepaper.sh",
|
||||
"priority": 10,
|
||||
"enabled": true
|
||||
}
|
||||
],
|
||||
"on_file_collected": [
|
||||
{
|
||||
"name": "pdf-metadata",
|
||||
"pattern": "\\.pdf$",
|
||||
"handler": "./extract-pdf-metadata.sh",
|
||||
"priority": 5,
|
||||
"enabled": false
|
||||
}
|
||||
],
|
||||
"on_collection_complete": [
|
||||
{
|
||||
"name": "update-index",
|
||||
"handler": "./update-index.sh",
|
||||
"priority": 100,
|
||||
"enabled": true
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
@ -1,269 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# BitcoinTalk Thread Collector
|
||||
# Usage: ./collect.sh <topic-id-or-url> [--pages=N] [--output=DIR]
|
||||
|
||||
set -e
|
||||
|
||||
DELAY=2 # Be respectful to BTT servers
|
||||
MAX_PAGES=0 # 0 = all pages
|
||||
OUTPUT_BASE="."
|
||||
|
||||
# Parse topic ID from URL or direct input
|
||||
parse_topic_id() {
|
||||
local input="$1"
|
||||
if [[ "$input" =~ topic=([0-9]+) ]]; then
|
||||
echo "${BASH_REMATCH[1]}"
|
||||
else
|
||||
echo "$input" | grep -oE '[0-9]+'
|
||||
fi
|
||||
}
|
||||
|
||||
# Fetch a single page
|
||||
fetch_page() {
|
||||
local topic_id="$1"
|
||||
local offset="$2"
|
||||
local output_file="$3"
|
||||
|
||||
local url="https://bitcointalk.org/index.php?topic=${topic_id}.${offset}"
|
||||
echo " Fetching: $url"
|
||||
|
||||
curl -s -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \
|
||||
-H "Accept: text/html" \
|
||||
"$url" > "$output_file"
|
||||
|
||||
sleep $DELAY
|
||||
}
|
||||
|
||||
# Check if page has posts
|
||||
page_has_posts() {
|
||||
local html_file="$1"
|
||||
grep -q 'class="post"' "$html_file" 2>/dev/null
|
||||
}
|
||||
|
||||
# Get last page number from first page
|
||||
get_last_page() {
|
||||
local html_file="$1"
|
||||
# Look for navigation like "Pages: [1] 2 3 ... 50"
|
||||
local max_page=$(grep -oE 'topic=[0-9]+\.[0-9]+' "$html_file" | \
|
||||
sed 's/.*\.//' | sort -rn | head -1)
|
||||
echo "${max_page:-0}"
|
||||
}
|
||||
|
||||
# Extract posts from HTML (simplified - works for basic extraction)
|
||||
extract_posts_simple() {
|
||||
local html_file="$1"
|
||||
local output_dir="$2"
|
||||
local post_offset="$3"
|
||||
|
||||
# Use Python for reliable HTML parsing
|
||||
python3 << PYEOF
|
||||
import re
|
||||
import html
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
html_content = open('$html_file', 'r', encoding='utf-8', errors='ignore').read()
|
||||
|
||||
# Pattern to find posts - BTT structure
|
||||
post_pattern = r'<td class="td_headerandpost">(.*?)</td>\s*</tr>\s*</table>\s*</td>\s*</tr>'
|
||||
author_pattern = r'<a href="https://bitcointalk\.org/index\.php\?action=profile;u=\d+"[^>]*>([^<]+)</a>'
|
||||
date_pattern = r'<div class="smalltext">([A-Za-z]+ \d+, \d+, \d+:\d+:\d+ [AP]M)</div>'
|
||||
post_content_pattern = r'<div class="post"[^>]*>(.*?)</div>\s*(?:<div class="moderatorbar"|</td>)'
|
||||
|
||||
posts = re.findall(post_pattern, html_content, re.DOTALL)
|
||||
post_num = $post_offset
|
||||
|
||||
for post_html in posts:
|
||||
post_num += 1
|
||||
|
||||
# Extract author
|
||||
author_match = re.search(author_pattern, post_html)
|
||||
author = author_match.group(1) if author_match else "Unknown"
|
||||
|
||||
# Extract date
|
||||
date_match = re.search(date_pattern, post_html)
|
||||
date_str = date_match.group(1) if date_match else "Unknown date"
|
||||
|
||||
# Extract content
|
||||
content_match = re.search(post_content_pattern, post_html, re.DOTALL)
|
||||
if content_match:
|
||||
content = content_match.group(1)
|
||||
# Clean HTML
|
||||
content = re.sub(r'<br\s*/?>', '\n', content)
|
||||
content = re.sub(r'<[^>]+>', '', content)
|
||||
content = html.unescape(content)
|
||||
content = content.strip()
|
||||
else:
|
||||
content = "(Could not extract content)"
|
||||
|
||||
# Determine post type/score
|
||||
score = "COMMUNITY"
|
||||
if post_num == 1:
|
||||
score = "ANN"
|
||||
elif re.search(r'\[UPDATE\]|\[RELEASE\]|\[ANNOUNCEMENT\]', content, re.I):
|
||||
score = "UPDATE"
|
||||
elif '?' in content[:200]:
|
||||
score = "QUESTION"
|
||||
|
||||
# Write post file
|
||||
filename = f"$output_dir/POST-{post_num:04d}.md"
|
||||
with open(filename, 'w') as f:
|
||||
f.write(f"# Post #{post_num}\n\n")
|
||||
f.write(f"## Metadata\n\n")
|
||||
f.write(f"| Field | Value |\n")
|
||||
f.write(f"|-------|-------|\n")
|
||||
f.write(f"| Author | {author} |\n")
|
||||
f.write(f"| Date | {date_str} |\n")
|
||||
f.write(f"| Type | **{score}** |\n\n")
|
||||
f.write(f"---\n\n")
|
||||
f.write(f"## Content\n\n")
|
||||
f.write(content)
|
||||
f.write(f"\n")
|
||||
|
||||
print(f" Created POST-{post_num:04d}.md ({score}) by {author}")
|
||||
|
||||
print(f"EXTRACTED:{post_num}")
|
||||
PYEOF
|
||||
}
|
||||
|
||||
# Main collection function
|
||||
collect_thread() {
|
||||
local topic_id="$1"
|
||||
local output_dir="$OUTPUT_BASE/bitcointalk-$topic_id"
|
||||
|
||||
mkdir -p "$output_dir/pages" "$output_dir/posts"
|
||||
|
||||
echo "=== Collecting BitcoinTalk Topic: $topic_id ==="
|
||||
|
||||
# Fetch first page to get thread info
|
||||
fetch_page "$topic_id" 0 "$output_dir/pages/page-0.html"
|
||||
|
||||
# Extract thread title
|
||||
local title=$(grep -oP '<title>\K[^<]+' "$output_dir/pages/page-0.html" | head -1)
|
||||
echo "Thread: $title"
|
||||
|
||||
# Get total pages
|
||||
local last_offset=$(get_last_page "$output_dir/pages/page-0.html")
|
||||
local total_pages=$(( (last_offset / 20) + 1 ))
|
||||
echo "Total pages: $total_pages"
|
||||
|
||||
if [ "$MAX_PAGES" -gt 0 ] && [ "$MAX_PAGES" -lt "$total_pages" ]; then
|
||||
total_pages=$MAX_PAGES
|
||||
echo "Limiting to: $total_pages pages"
|
||||
fi
|
||||
|
||||
# Extract posts from first page
|
||||
local post_count=0
|
||||
local result=$(extract_posts_simple "$output_dir/pages/page-0.html" "$output_dir/posts" 0)
|
||||
post_count=$(echo "$result" | grep "EXTRACTED:" | cut -d: -f2)
|
||||
|
||||
# Fetch remaining pages
|
||||
for (( page=1; page<total_pages; page++ )); do
|
||||
local offset=$((page * 20))
|
||||
fetch_page "$topic_id" "$offset" "$output_dir/pages/page-$offset.html"
|
||||
|
||||
if ! page_has_posts "$output_dir/pages/page-$offset.html"; then
|
||||
echo " No more posts found, stopping."
|
||||
break
|
||||
fi
|
||||
|
||||
result=$(extract_posts_simple "$output_dir/pages/page-$offset.html" "$output_dir/posts" "$post_count")
|
||||
post_count=$(echo "$result" | grep "EXTRACTED:" | cut -d: -f2)
|
||||
done
|
||||
|
||||
# Generate index
|
||||
generate_index "$output_dir" "$title" "$topic_id" "$post_count"
|
||||
|
||||
echo ""
|
||||
echo "=== Collection Complete ==="
|
||||
echo "Posts: $post_count"
|
||||
echo "Output: $output_dir/"
|
||||
}
|
||||
|
||||
# Generate index file
|
||||
generate_index() {
|
||||
local output_dir="$1"
|
||||
local title="$2"
|
||||
local topic_id="$3"
|
||||
local post_count="$4"
|
||||
|
||||
cat > "$output_dir/INDEX.md" << EOF
|
||||
# BitcoinTalk Thread Archive
|
||||
|
||||
## Thread Info
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| Title | $title |
|
||||
| Topic ID | $topic_id |
|
||||
| URL | https://bitcointalk.org/index.php?topic=$topic_id.0 |
|
||||
| Posts Archived | $post_count |
|
||||
| Collected | $(date +%Y-%m-%d) |
|
||||
|
||||
---
|
||||
|
||||
## Post Type Legend
|
||||
|
||||
| Type | Meaning |
|
||||
|------|---------|
|
||||
| ANN | Original announcement |
|
||||
| UPDATE | Official team update |
|
||||
| QUESTION | Community question |
|
||||
| ANSWER | Team response |
|
||||
| COMMUNITY | General discussion |
|
||||
| CONCERN | Raised issue/criticism |
|
||||
|
||||
---
|
||||
|
||||
## Posts
|
||||
|
||||
| # | Author | Date | Type |
|
||||
|---|--------|------|------|
|
||||
EOF
|
||||
|
||||
for file in "$output_dir/posts/"POST-*.md; do
|
||||
[ -f "$file" ] || continue
|
||||
local num=$(basename "$file" .md | sed 's/POST-0*//')
|
||||
local author=$(grep "| Author |" "$file" | sed 's/.*| Author | \(.*\) |/\1/')
|
||||
local date=$(grep "| Date |" "$file" | sed 's/.*| Date | \(.*\) |/\1/')
|
||||
local type=$(sed -n '/| Type |/s/.*\*\*\([A-Z]*\)\*\*.*/\1/p' "$file")
|
||||
echo "| [$num](posts/POST-$(printf "%04d" $num).md) | $author | $date | $type |" >> "$output_dir/INDEX.md"
|
||||
done
|
||||
|
||||
echo " Created INDEX.md"
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
main() {
|
||||
local topic_input=""
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--pages=*) MAX_PAGES="${arg#*=}" ;;
|
||||
--output=*) OUTPUT_BASE="${arg#*=}" ;;
|
||||
--delay=*) DELAY="${arg#*=}" ;;
|
||||
*) topic_input="$arg" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$topic_input" ]; then
|
||||
echo "Usage: $0 <topic-id-or-url> [--pages=N] [--output=DIR] [--delay=2]"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 2769739"
|
||||
echo " $0 https://bitcointalk.org/index.php?topic=2769739.0"
|
||||
echo " $0 2769739 --pages=5 --output=./lethean-ann"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local topic_id=$(parse_topic_id "$topic_input")
|
||||
|
||||
if [ -z "$topic_id" ]; then
|
||||
echo "Error: Could not parse topic ID from: $topic_input"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
collect_thread "$topic_id"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Generate block explorer collection jobs
|
||||
# Usage: ./generate-jobs.sh <coin> [--blocks=N] [--sample=daily|weekly|monthly]
|
||||
|
||||
set -e
|
||||
|
||||
COIN=""
|
||||
EXPLORER_URL=""
|
||||
SAMPLE="monthly"
|
||||
BLOCK_COUNT=100
|
||||
|
||||
# Known explorers
|
||||
declare -A EXPLORERS=(
|
||||
["lethean"]="https://explorer.lethean.io"
|
||||
["monero"]="https://xmrchain.net"
|
||||
["haven"]="https://explorer.havenprotocol.org"
|
||||
["karbo"]="https://explorer.karbo.io"
|
||||
["wownero"]="https://explore.wownero.com"
|
||||
["dero"]="https://explorer.dero.io"
|
||||
["masari"]="https://explorer.getmasari.org"
|
||||
["turtlecoin"]="https://explorer.turtlecoin.lol"
|
||||
["conceal"]="https://explorer.conceal.network"
|
||||
)
|
||||
|
||||
# Parse args
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--url=*) EXPLORER_URL="${arg#*=}" ;;
|
||||
--blocks=*) BLOCK_COUNT="${arg#*=}" ;;
|
||||
--sample=*) SAMPLE="${arg#*=}" ;;
|
||||
--*) ;;
|
||||
*) COIN="$arg" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$COIN" ] && [ -z "$EXPLORER_URL" ]; then
|
||||
echo "Usage: $0 <coin> [--url=URL] [--blocks=N] [--sample=daily|weekly|monthly]" >&2
|
||||
echo "" >&2
|
||||
echo "Known coins: ${!EXPLORERS[*]}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get explorer URL
|
||||
if [ -z "$EXPLORER_URL" ]; then
|
||||
EXPLORER_URL="${EXPLORERS[$COIN]}"
|
||||
if [ -z "$EXPLORER_URL" ]; then
|
||||
echo "# ERROR: Unknown coin '$COIN'. Use --url= to specify explorer." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
SLUG=$(echo "$COIN" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
echo "# Block Explorer Jobs for $COIN"
|
||||
echo "# Explorer: $EXPLORER_URL"
|
||||
echo "# Sample: $SAMPLE"
|
||||
echo "# Format: URL|FILENAME|TYPE|METADATA"
|
||||
echo "#"
|
||||
|
||||
# Core API endpoints
|
||||
echo "# === Core Data ==="
|
||||
echo "${EXPLORER_URL}/api/info|explorer-${SLUG}-info.json|explorer-api|coin=$SLUG,type=info"
|
||||
echo "${EXPLORER_URL}/api/emission|explorer-${SLUG}-emission.json|explorer-api|coin=$SLUG,type=emission"
|
||||
echo "${EXPLORER_URL}/api/supply|explorer-${SLUG}-supply.json|explorer-api|coin=$SLUG,type=supply"
|
||||
echo "${EXPLORER_URL}/api/mempool|explorer-${SLUG}-mempool.json|explorer-api|coin=$SLUG,type=mempool"
|
||||
|
||||
# Genesis block
|
||||
echo "#"
|
||||
echo "# === Genesis Block ==="
|
||||
echo "${EXPLORER_URL}/api/block/0|explorer-${SLUG}-block-0.json|explorer-api|coin=$SLUG,block=0"
|
||||
echo "${EXPLORER_URL}/api/block/1|explorer-${SLUG}-block-1.json|explorer-api|coin=$SLUG,block=1"
|
||||
|
||||
# Milestone blocks (if we know the heights)
|
||||
echo "#"
|
||||
echo "# === Milestone Blocks ==="
|
||||
for height in 10000 50000 100000 500000 1000000 2000000; do
|
||||
echo "${EXPLORER_URL}/api/block/${height}|explorer-${SLUG}-block-${height}.json|explorer-api|coin=$SLUG,block=$height"
|
||||
done
|
||||
|
||||
# Sample blocks by time
|
||||
echo "#"
|
||||
echo "# === Sampled Blocks (estimate heights) ==="
|
||||
case "$SAMPLE" in
|
||||
daily)
|
||||
# ~720 blocks/day for 2-min blocks
|
||||
STEP=720
|
||||
;;
|
||||
weekly)
|
||||
STEP=5040
|
||||
;;
|
||||
monthly)
|
||||
STEP=21600
|
||||
;;
|
||||
esac
|
||||
|
||||
for ((i=0; i<BLOCK_COUNT; i++)); do
|
||||
height=$((i * STEP))
|
||||
echo "${EXPLORER_URL}/api/block/${height}|explorer-${SLUG}-sample-${height}.json|explorer-api|coin=$SLUG,block=$height,sample=$SAMPLE"
|
||||
done
|
||||
|
||||
# Web pages (for scraping if API fails)
|
||||
echo "#"
|
||||
echo "# === Web Pages (backup) ==="
|
||||
echo "${EXPLORER_URL}/|explorer-${SLUG}-home.html|explorer-web|coin=$SLUG"
|
||||
echo "${EXPLORER_URL}/blocks|explorer-${SLUG}-blocks.html|explorer-web|coin=$SLUG"
|
||||
echo "${EXPLORER_URL}/stats|explorer-${SLUG}-stats.html|explorer-web|coin=$SLUG"
|
||||
|
|
@ -1,89 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Generate job list for CoinMarketCap collection
|
||||
# Usage: ./generate-jobs.sh <coin-slug> [options] > jobs.txt
|
||||
|
||||
set -e
|
||||
|
||||
COINS=()
|
||||
HISTORICAL=0
|
||||
FROM_DATE="2017-01-01"
|
||||
TO_DATE=$(date +%Y-%m-%d)
|
||||
|
||||
# Parse args
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--historical) HISTORICAL=1 ;;
|
||||
--from=*) FROM_DATE="${arg#*=}" ;;
|
||||
--to=*) TO_DATE="${arg#*=}" ;;
|
||||
--*) ;;
|
||||
*) COINS+=("$arg") ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ ${#COINS[@]} -eq 0 ]; then
|
||||
echo "Usage: $0 <coin-slug> [coin-slug...] [--historical] [--from=DATE] [--to=DATE]" >&2
|
||||
echo "" >&2
|
||||
echo "Examples:" >&2
|
||||
echo " $0 lethean" >&2
|
||||
echo " $0 lethean --historical --from=2018-01-01" >&2
|
||||
echo " $0 lethean monero bitcoin" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Header
|
||||
echo "# CoinMarketCap job list - $(date +%Y-%m-%d)"
|
||||
echo "# Coins: ${COINS[*]}"
|
||||
echo "# Format: URL|FILENAME|TYPE|METADATA"
|
||||
echo "#"
|
||||
|
||||
for COIN in "${COINS[@]}"; do
|
||||
SLUG=$(echo "$COIN" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g')
|
||||
|
||||
echo "# === $SLUG ==="
|
||||
|
||||
# Main page (current data, description, links)
|
||||
echo "https://coinmarketcap.com/currencies/${SLUG}/|cmc-${SLUG}-main.html|cmc-main|coin=$SLUG"
|
||||
|
||||
# Markets/exchanges
|
||||
echo "https://coinmarketcap.com/currencies/${SLUG}/markets/|cmc-${SLUG}-markets.html|cmc-markets|coin=$SLUG"
|
||||
|
||||
# Historical data page
|
||||
echo "https://coinmarketcap.com/currencies/${SLUG}/historical-data/|cmc-${SLUG}-historical.html|cmc-historical|coin=$SLUG"
|
||||
|
||||
# News
|
||||
echo "https://coinmarketcap.com/currencies/${SLUG}/news/|cmc-${SLUG}-news.html|cmc-news|coin=$SLUG"
|
||||
|
||||
# API endpoints (if accessible without auth)
|
||||
# These return JSON and are more reliable than scraping
|
||||
echo "https://api.coinmarketcap.com/data-api/v3/cryptocurrency/detail?slug=${SLUG}|cmc-${SLUG}-api-detail.json|cmc-api|coin=$SLUG,type=detail"
|
||||
echo "https://api.coinmarketcap.com/data-api/v3/cryptocurrency/market-pairs/latest?slug=${SLUG}&limit=100|cmc-${SLUG}-api-markets.json|cmc-api|coin=$SLUG,type=markets"
|
||||
|
||||
# Historical data via API (may need date chunks)
|
||||
if [ "$HISTORICAL" = "1" ]; then
|
||||
echo "#"
|
||||
echo "# Historical data: $FROM_DATE to $TO_DATE"
|
||||
|
||||
# Convert dates to timestamps
|
||||
FROM_TS=$(date -j -f "%Y-%m-%d" "$FROM_DATE" "+%s" 2>/dev/null || date -d "$FROM_DATE" "+%s")
|
||||
TO_TS=$(date -j -f "%Y-%m-%d" "$TO_DATE" "+%s" 2>/dev/null || date -d "$TO_DATE" "+%s")
|
||||
|
||||
# CMC historical API (public, limited)
|
||||
echo "https://api.coinmarketcap.com/data-api/v3/cryptocurrency/historical?slug=${SLUG}&timeStart=${FROM_TS}&timeEnd=${TO_TS}|cmc-${SLUG}-api-historical.json|cmc-api|coin=$SLUG,type=historical"
|
||||
|
||||
# Also try the web scrape version with date range
|
||||
echo "https://coinmarketcap.com/currencies/${SLUG}/historical-data/?start=${FROM_DATE//\-/}&end=${TO_DATE//\-/}|cmc-${SLUG}-historical-range.html|cmc-historical|coin=$SLUG,from=$FROM_DATE,to=$TO_DATE"
|
||||
fi
|
||||
|
||||
echo "#"
|
||||
done
|
||||
|
||||
echo "# === Additional data sources ==="
|
||||
echo "#"
|
||||
|
||||
# CoinGecko as backup (often has more historical data)
|
||||
for COIN in "${COINS[@]}"; do
|
||||
SLUG=$(echo "$COIN" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g')
|
||||
echo "https://www.coingecko.com/en/coins/${SLUG}|coingecko-${SLUG}-main.html|coingecko|coin=$SLUG"
|
||||
echo "https://api.coingecko.com/api/v3/coins/${SLUG}|coingecko-${SLUG}-api.json|coingecko-api|coin=$SLUG"
|
||||
echo "https://api.coingecko.com/api/v3/coins/${SLUG}/market_chart?vs_currency=usd&days=max|coingecko-${SLUG}-history.json|coingecko-api|coin=$SLUG,type=history"
|
||||
done
|
||||
|
|
@ -1,226 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Process downloaded CoinMarketCap data
|
||||
# Usage: ./process.sh <downloads-dir> [--output=DIR]
|
||||
|
||||
set -e
|
||||
|
||||
DOWNLOADS="$1"
|
||||
OUTPUT="./cmc-archive"
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--output=*) OUTPUT="${arg#*=}" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
mkdir -p "$OUTPUT"
|
||||
|
||||
echo "=== Processing CoinMarketCap downloads ==="
|
||||
|
||||
# Process API JSON files first (most reliable)
|
||||
for file in "$DOWNLOADS"/cmc-*-api-detail.json; do
|
||||
[ -f "$file" ] || continue
|
||||
|
||||
COIN=$(basename "$file" | sed 's/cmc-\(.*\)-api-detail.json/\1/')
|
||||
COIN_DIR="$OUTPUT/$COIN"
|
||||
mkdir -p "$COIN_DIR"
|
||||
|
||||
echo "Processing: $COIN"
|
||||
|
||||
python3 << PYEOF
|
||||
import json
|
||||
import os
|
||||
|
||||
try:
|
||||
data = json.load(open('$file', 'r'))
|
||||
|
||||
if 'data' in data:
|
||||
coin = data['data']
|
||||
|
||||
# Extract metadata
|
||||
metadata = {
|
||||
'id': coin.get('id'),
|
||||
'name': coin.get('name'),
|
||||
'symbol': coin.get('symbol'),
|
||||
'slug': coin.get('slug'),
|
||||
'description': coin.get('description', ''),
|
||||
'logo': coin.get('logo'),
|
||||
'category': coin.get('category'),
|
||||
'dateAdded': coin.get('dateAdded'),
|
||||
'urls': coin.get('urls', {}),
|
||||
'tags': coin.get('tags', []),
|
||||
}
|
||||
|
||||
with open('$COIN_DIR/metadata.json', 'w') as f:
|
||||
json.dump(metadata, f, indent=2)
|
||||
print(f" Created metadata.json")
|
||||
|
||||
# Create markdown summary
|
||||
with open('$COIN_DIR/INDEX.md', 'w') as f:
|
||||
f.write(f"# {metadata['name']} ({metadata['symbol']})\n\n")
|
||||
f.write(f"## Metadata\n\n")
|
||||
f.write(f"| Field | Value |\n")
|
||||
f.write(f"|-------|-------|\n")
|
||||
f.write(f"| Name | {metadata['name']} |\n")
|
||||
f.write(f"| Symbol | {metadata['symbol']} |\n")
|
||||
f.write(f"| CMC ID | {metadata['id']} |\n")
|
||||
f.write(f"| Added | {metadata['dateAdded']} |\n")
|
||||
f.write(f"| Category | {metadata.get('category', 'N/A')} |\n\n")
|
||||
|
||||
f.write(f"## Links\n\n")
|
||||
urls = metadata.get('urls', {})
|
||||
for url_type, url_list in urls.items():
|
||||
if url_list:
|
||||
f.write(f"- **{url_type}**: {', '.join(url_list[:3])}\n")
|
||||
|
||||
f.write(f"\n## Description\n\n")
|
||||
f.write(metadata.get('description', 'No description')[:2000])
|
||||
f.write("\n")
|
||||
|
||||
print(f" Created INDEX.md")
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error processing: {e}")
|
||||
PYEOF
|
||||
done
|
||||
|
||||
# Process historical data
|
||||
for file in "$DOWNLOADS"/cmc-*-api-historical.json; do
|
||||
[ -f "$file" ] || continue
|
||||
|
||||
COIN=$(basename "$file" | sed 's/cmc-\(.*\)-api-historical.json/\1/')
|
||||
COIN_DIR="$OUTPUT/$COIN"
|
||||
mkdir -p "$COIN_DIR/historical"
|
||||
|
||||
echo "Processing historical: $COIN"
|
||||
|
||||
python3 << PYEOF
|
||||
import json
|
||||
import csv
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
data = json.load(open('$file', 'r'))
|
||||
|
||||
if 'data' in data and 'quotes' in data['data']:
|
||||
quotes = data['data']['quotes']
|
||||
|
||||
# Group by year
|
||||
by_year = {}
|
||||
for quote in quotes:
|
||||
ts = quote.get('timestamp', quote.get('time', ''))
|
||||
if ts:
|
||||
year = ts[:4]
|
||||
if year not in by_year:
|
||||
by_year[year] = []
|
||||
by_year[year].append({
|
||||
'date': ts[:10],
|
||||
'open': quote.get('quote', {}).get('USD', {}).get('open', quote.get('open')),
|
||||
'high': quote.get('quote', {}).get('USD', {}).get('high', quote.get('high')),
|
||||
'low': quote.get('quote', {}).get('USD', {}).get('low', quote.get('low')),
|
||||
'close': quote.get('quote', {}).get('USD', {}).get('close', quote.get('close')),
|
||||
'volume': quote.get('quote', {}).get('USD', {}).get('volume', quote.get('volume')),
|
||||
'market_cap': quote.get('quote', {}).get('USD', {}).get('market_cap', quote.get('market_cap')),
|
||||
})
|
||||
|
||||
for year, rows in by_year.items():
|
||||
filename = f'$COIN_DIR/historical/{year}.csv'
|
||||
with open(filename, 'w', newline='') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=['date', 'open', 'high', 'low', 'close', 'volume', 'market_cap'])
|
||||
writer.writeheader()
|
||||
writer.writerows(sorted(rows, key=lambda x: x['date']))
|
||||
print(f" Created historical/{year}.csv ({len(rows)} rows)")
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
PYEOF
|
||||
done
|
||||
|
||||
# Process CoinGecko data as backup
|
||||
for file in "$DOWNLOADS"/coingecko-*-api.json; do
|
||||
[ -f "$file" ] || continue
|
||||
|
||||
COIN=$(basename "$file" | sed 's/coingecko-\(.*\)-api.json/\1/')
|
||||
COIN_DIR="$OUTPUT/$COIN"
|
||||
mkdir -p "$COIN_DIR"
|
||||
|
||||
echo "Processing CoinGecko: $COIN"
|
||||
|
||||
python3 << PYEOF
|
||||
import json
|
||||
|
||||
try:
|
||||
data = json.load(open('$file', 'r'))
|
||||
|
||||
# Extract useful fields
|
||||
gecko_data = {
|
||||
'coingecko_id': data.get('id'),
|
||||
'coingecko_rank': data.get('coingecko_rank'),
|
||||
'genesis_date': data.get('genesis_date'),
|
||||
'sentiment_up': data.get('sentiment_votes_up_percentage'),
|
||||
'sentiment_down': data.get('sentiment_votes_down_percentage'),
|
||||
'developer_data': data.get('developer_data', {}),
|
||||
'community_data': data.get('community_data', {}),
|
||||
}
|
||||
|
||||
with open('$COIN_DIR/coingecko.json', 'w') as f:
|
||||
json.dump(gecko_data, f, indent=2)
|
||||
print(f" Created coingecko.json")
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
PYEOF
|
||||
done
|
||||
|
||||
# Process market/exchange data
|
||||
for file in "$DOWNLOADS"/cmc-*-api-markets.json; do
|
||||
[ -f "$file" ] || continue
|
||||
|
||||
COIN=$(basename "$file" | sed 's/cmc-\(.*\)-api-markets.json/\1/')
|
||||
COIN_DIR="$OUTPUT/$COIN"
|
||||
mkdir -p "$COIN_DIR"
|
||||
|
||||
echo "Processing markets: $COIN"
|
||||
|
||||
python3 << PYEOF
|
||||
import json
|
||||
|
||||
try:
|
||||
data = json.load(open('$file', 'r'))
|
||||
|
||||
if 'data' in data and 'marketPairs' in data['data']:
|
||||
pairs = data['data']['marketPairs']
|
||||
|
||||
markets = []
|
||||
for pair in pairs[:50]: # Top 50 markets
|
||||
markets.append({
|
||||
'exchange': pair.get('exchangeName'),
|
||||
'pair': pair.get('marketPair'),
|
||||
'price': pair.get('price'),
|
||||
'volume_24h': pair.get('volumeUsd'),
|
||||
'type': pair.get('marketType'),
|
||||
})
|
||||
|
||||
with open('$COIN_DIR/markets.json', 'w') as f:
|
||||
json.dump(markets, f, indent=2)
|
||||
|
||||
# Add to INDEX.md
|
||||
with open('$COIN_DIR/INDEX.md', 'a') as f:
|
||||
f.write(f"\n## Markets (Top 10)\n\n")
|
||||
f.write(f"| Exchange | Pair | Volume 24h |\n")
|
||||
f.write(f"|----------|------|------------|\n")
|
||||
for m in markets[:10]:
|
||||
vol = m.get('volume_24h', 0)
|
||||
vol_str = f"${vol:,.0f}" if vol else "N/A"
|
||||
f.write(f"| {m['exchange']} | {m['pair']} | {vol_str} |\n")
|
||||
|
||||
print(f" Created markets.json ({len(markets)} pairs)")
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
PYEOF
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=== Processing Complete ==="
|
||||
echo "Output: $OUTPUT/"
|
||||
|
|
@ -1,124 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Discover all collection sources for a CryptoNote project
|
||||
# Usage: ./discover.sh <project-name> | ./discover.sh --abandoned | ./discover.sh --all
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REGISTRY="$SCRIPT_DIR/registry.json"
|
||||
|
||||
# Get project data from registry
|
||||
get_project() {
|
||||
local name="$1"
|
||||
jq -r ".projects[] | select(.name | ascii_downcase == \"$(echo $name | tr '[:upper:]' '[:lower:]')\")" "$REGISTRY"
|
||||
}
|
||||
|
||||
# List abandoned projects
|
||||
list_abandoned() {
|
||||
jq -r '.projects[] | select(.status == "abandoned" or .status == "low-activity" or .status == "dead") | .name' "$REGISTRY"
|
||||
}
|
||||
|
||||
# List all projects
|
||||
list_all() {
|
||||
jq -r '.projects[].name' "$REGISTRY"
|
||||
}
|
||||
|
||||
# Generate sources for a project
|
||||
generate_sources() {
|
||||
local name="$1"
|
||||
local project=$(get_project "$name")
|
||||
|
||||
if [ -z "$project" ] || [ "$project" = "null" ]; then
|
||||
echo "# ERROR: Project '$name' not found in registry" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
local symbol=$(echo "$project" | jq -r '.symbol')
|
||||
local status=$(echo "$project" | jq -r '.status')
|
||||
|
||||
echo "# === $name ($symbol) ==="
|
||||
echo "# Status: $status"
|
||||
echo "#"
|
||||
|
||||
# GitHub repos
|
||||
echo "# GitHub Organizations:"
|
||||
echo "$project" | jq -r '.github[]?' | while read org; do
|
||||
[ -n "$org" ] && echo "github|https://github.com/$org|$name"
|
||||
done
|
||||
|
||||
# BitcoinTalk
|
||||
local btt=$(echo "$project" | jq -r '.bitcointalk // empty')
|
||||
if [ -n "$btt" ]; then
|
||||
echo "#"
|
||||
echo "# BitcoinTalk:"
|
||||
echo "bitcointalk|https://bitcointalk.org/index.php?topic=$btt.0|$name"
|
||||
fi
|
||||
|
||||
# CMC/CoinGecko
|
||||
local cmc=$(echo "$project" | jq -r '.cmc // empty')
|
||||
local gecko=$(echo "$project" | jq -r '.coingecko // empty')
|
||||
echo "#"
|
||||
echo "# Market Data:"
|
||||
[ -n "$cmc" ] && echo "cmc|https://coinmarketcap.com/currencies/$cmc/|$name"
|
||||
[ -n "$gecko" ] && echo "coingecko|https://coingecko.com/en/coins/$gecko|$name"
|
||||
|
||||
# Website/Explorer
|
||||
local website=$(echo "$project" | jq -r '.website // empty')
|
||||
local explorer=$(echo "$project" | jq -r '.explorer // empty')
|
||||
echo "#"
|
||||
echo "# Web Properties:"
|
||||
[ -n "$website" ] && echo "wayback|https://$website|$name"
|
||||
[ -n "$explorer" ] && echo "explorer|https://$explorer|$name"
|
||||
|
||||
# Salvageable features
|
||||
local salvage=$(echo "$project" | jq -r '.salvageable[]?' 2>/dev/null)
|
||||
if [ -n "$salvage" ]; then
|
||||
echo "#"
|
||||
echo "# Salvageable:"
|
||||
echo "$project" | jq -r '.salvageable[]?' | while read item; do
|
||||
echo "# - $item"
|
||||
done
|
||||
fi
|
||||
|
||||
echo "#"
|
||||
}
|
||||
|
||||
# Main
|
||||
case "$1" in
|
||||
--abandoned)
|
||||
echo "# Abandoned CryptoNote Projects (Salvage Candidates)"
|
||||
echo "# Format: source|url|project"
|
||||
echo "#"
|
||||
for proj in $(list_abandoned); do
|
||||
generate_sources "$proj"
|
||||
done
|
||||
;;
|
||||
--all)
|
||||
echo "# All CryptoNote Projects"
|
||||
echo "# Format: source|url|project"
|
||||
echo "#"
|
||||
for proj in $(list_all); do
|
||||
generate_sources "$proj"
|
||||
done
|
||||
;;
|
||||
--list)
|
||||
list_all
|
||||
;;
|
||||
--list-abandoned)
|
||||
list_abandoned
|
||||
;;
|
||||
"")
|
||||
echo "Usage: $0 <project-name> | --abandoned | --all | --list" >&2
|
||||
echo "" >&2
|
||||
echo "Examples:" >&2
|
||||
echo " $0 lethean # Sources for Lethean" >&2
|
||||
echo " $0 monero # Sources for Monero" >&2
|
||||
echo " $0 --abandoned # All abandoned projects" >&2
|
||||
echo " $0 --all # Everything" >&2
|
||||
echo " $0 --list # Just list project names" >&2
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
generate_sources "$1"
|
||||
;;
|
||||
esac
|
||||
|
|
@ -1,137 +1,78 @@
|
|||
# GitHub History Collection Skill
|
||||
|
||||
Collect and score GitHub issues and PRs for triage analysis.
|
||||
Collect GitHub repositories, issues, and PRs for archival and triage analysis.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
```bash
|
||||
# Install Borg
|
||||
go install github.com/Snider/Borg@latest
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# Single repo
|
||||
./collect.sh https://github.com/LetheanNetwork/lthn-app-vpn
|
||||
# Clone a single repository
|
||||
borg collect github repo https://github.com/LetheanNetwork/lthn-app-vpn
|
||||
|
||||
# Entire org (all repos)
|
||||
./collect.sh https://github.com/LetheanNetwork --org
|
||||
# Clone all repos from an org
|
||||
borg collect github repos LetheanNetwork
|
||||
|
||||
# Just issues (skip PRs)
|
||||
./collect.sh https://github.com/LetheanNetwork/lthn-app-vpn --issues-only
|
||||
|
||||
# Just PRs (skip issues)
|
||||
./collect.sh https://github.com/LetheanNetwork/lthn-app-vpn --prs-only
|
||||
|
||||
# Custom rate limit delay
|
||||
./collect.sh https://github.com/LetheanNetwork --org --delay=0.5
|
||||
# Output to encrypted container
|
||||
borg collect github repos LetheanNetwork --format stim -o lethean.stim
|
||||
```
|
||||
|
||||
## Output Structure
|
||||
## Target Registry
|
||||
|
||||
```
|
||||
repo/
|
||||
├── {org}/
|
||||
│ └── {repo}/
|
||||
│ ├── Issue/
|
||||
│ │ ├── 001.md # Sequential, no gaps
|
||||
│ │ ├── 002.md
|
||||
│ │ ├── 003.md
|
||||
│ │ └── INDEX.md # Scored index
|
||||
│ ├── PR/
|
||||
│ │ ├── 001.md
|
||||
│ │ ├── 002.md
|
||||
│ │ └── INDEX.md
|
||||
│ └── .json/ # Raw API responses
|
||||
│ ├── issues-list.json
|
||||
│ ├── issue-{n}.json
|
||||
│ ├── prs-list.json
|
||||
│ └── pr-{n}.json
|
||||
```
|
||||
### Lethean Ecosystem
|
||||
- `LetheanNetwork`
|
||||
- `letheanVPN`
|
||||
- `LetheanMovement`
|
||||
|
||||
### Sequential vs GitHub Numbers
|
||||
### CryptoNote Active
|
||||
- `monero-project`
|
||||
- `hyle-team`
|
||||
- `zanoio`
|
||||
- `kevacoin-project`
|
||||
- `scala-network`
|
||||
- `Karbovanets`
|
||||
- `wownero`
|
||||
- `ConcealNetwork`
|
||||
- `ryo-currency`
|
||||
|
||||
- **Filename**: `001.md`, `002.md`, etc. - sequential, no gaps
|
||||
- **Inside file**: `# Issue #47: ...` - preserves original GitHub number
|
||||
- **INDEX.md**: Maps both: `| 001 | #47 | Title | SCORE |`
|
||||
### Salvage Priority (dead/abandoned)
|
||||
- `haven-protocol-org`
|
||||
- `graft-project`
|
||||
- `graft-community`
|
||||
- `oxen-io`
|
||||
- `loki-project`
|
||||
- `turtlecoin`
|
||||
- `masari-project`
|
||||
- `aeonix`
|
||||
- `nerva-project`
|
||||
- `sumoprojects`
|
||||
- `deroproject`
|
||||
- `bcndev`
|
||||
- `electroneum`
|
||||
|
||||
This ensures clean sequential browsing while maintaining traceability to GitHub.
|
||||
|
||||
## Reception Scores
|
||||
|
||||
| Score | Meaning | Triage Action |
|
||||
|-------|---------|---------------|
|
||||
| ADDRESSED | Closed after discussion | Review if actually fixed |
|
||||
| DISMISSED | Labeled wontfix/invalid | **RECLAIM candidate** |
|
||||
| IGNORED | Closed, no response | **RECLAIM candidate** |
|
||||
| STALE | Open, no replies | Needs attention |
|
||||
| ACTIVE | Open with discussion | In progress |
|
||||
| MERGED | PR accepted | Done |
|
||||
| REJECTED | PR closed unmerged | Review why |
|
||||
| PENDING | PR still open | Needs review |
|
||||
|
||||
## Requirements
|
||||
|
||||
- `gh` CLI authenticated (`gh auth login`)
|
||||
- `jq` installed
|
||||
### Non-CN Reference
|
||||
- `theQRL`
|
||||
- `hyperswarm`
|
||||
- `holepunchto`
|
||||
- `openhive-network`
|
||||
- `octa-space`
|
||||
|
||||
## Batch Collection
|
||||
|
||||
Supports comma-separated targets for batch runs:
|
||||
|
||||
```bash
|
||||
# Batch orgs
|
||||
./collect.sh "LetheanNetwork,graft-project,oxen-io" --org
|
||||
|
||||
# Batch repos
|
||||
./collect.sh "LetheanNetwork/lthn-app-vpn,monero-project/monero"
|
||||
# Collect everything into encrypted archive
|
||||
borg collect github repos LetheanNetwork,monero-project,graft-project \
|
||||
--format stim -o cryptonote-archive.stim
|
||||
```
|
||||
|
||||
## Full Registry List
|
||||
## Triage Workflow
|
||||
|
||||
Copy-paste ready commands for the complete CryptoNote ecosystem:
|
||||
|
||||
```bash
|
||||
# === LETHEAN ECOSYSTEM ===
|
||||
./collect.sh "LetheanNetwork,letheanVPN,LetheanMovement" --org
|
||||
|
||||
# === CRYPTONOTE ACTIVE ===
|
||||
./collect.sh "monero-project,hyle-team,zanoio,kevacoin-project,scala-network" --org
|
||||
./collect.sh "Karbovanets,wownero,ConcealNetwork,ryo-currency" --org
|
||||
|
||||
# === SALVAGE PRIORITY (dead/abandoned) ===
|
||||
./collect.sh "haven-protocol-org,graft-project,graft-community" --org
|
||||
./collect.sh "oxen-io,loki-project" --org
|
||||
./collect.sh "turtlecoin,masari-project,aeonix,nerva-project,sumoprojects" --org
|
||||
./collect.sh "deroproject,bcndev,electroneum" --org
|
||||
|
||||
# === NON-CN REFERENCE ===
|
||||
./collect.sh "theQRL,hyperswarm,holepunchto,openhive-network,octa-space" --org
|
||||
```
|
||||
|
||||
### One-liner for everything
|
||||
|
||||
```bash
|
||||
./collect.sh "LetheanNetwork,letheanVPN,LetheanMovement,monero-project,haven-protocol-org,hyle-team,zanoio,kevacoin-project,scala-network,deroproject,Karbovanets,wownero,turtlecoin,masari-project,aeonix,oxen-io,loki-project,graft-project,graft-community,nerva-project,ConcealNetwork,ryo-currency,sumoprojects,bcndev,electroneum,theQRL,hyperswarm,holepunchto,openhive-network,octa-space" --org
|
||||
```
|
||||
|
||||
## Example Run
|
||||
|
||||
```bash
|
||||
$ ./collect.sh "LetheanNetwork,graft-project" --org
|
||||
|
||||
=== Collecting all repos from org: LetheanNetwork ===
|
||||
=== Collecting: LetheanNetwork/lthn-app-vpn ===
|
||||
Output: ./repo/LetheanNetwork/lthn-app-vpn/
|
||||
Fetching issues...
|
||||
Found 145 issues
|
||||
Fetching issue #1 -> 001.md
|
||||
...
|
||||
Created Issue/INDEX.md
|
||||
Fetching PRs...
|
||||
Found 98 PRs
|
||||
...
|
||||
Created PR/INDEX.md
|
||||
|
||||
=== Collecting all repos from org: graft-project ===
|
||||
=== Collecting: graft-project/graft-network ===
|
||||
Output: ./repo/graft-project/graft-network/
|
||||
...
|
||||
|
||||
=== Collection Complete ===
|
||||
Output: ./repo/
|
||||
```
|
||||
1. Collect repos with Borg
|
||||
2. Review issues marked DISMISSED or IGNORED
|
||||
3. Identify salvageable features
|
||||
4. Document in project-archaeology skill
|
||||
|
|
|
|||
|
|
@ -1,516 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# GitHub History Collector v2
|
||||
# Usage: ./collect.sh <target> [--org] [--issues-only] [--prs-only]
|
||||
#
|
||||
# Supports:
|
||||
# Single repo: ./collect.sh LetheanNetwork/lthn-app-vpn
|
||||
# Single org: ./collect.sh LetheanNetwork --org
|
||||
# Batch orgs: ./collect.sh "LetheanNetwork,graft-project,oxen-io" --org
|
||||
# Batch repos: ./collect.sh "owner/repo1,owner/repo2"
|
||||
#
|
||||
# Output structure:
|
||||
# repo/{org}/{repo}/Issue/001.md, 002.md, ...
|
||||
# repo/{org}/{repo}/PR/001.md, 002.md, ...
|
||||
#
|
||||
# Rate limiting:
|
||||
# --check-rate Just show current rate limit status and exit
|
||||
# Auto-pauses at 25% remaining (75% used) until reset+10s (preserves GraphQL quota)
|
||||
|
||||
set -e
|
||||
|
||||
# GitHub API allows 5000 requests/hour authenticated
|
||||
# 0.05s = 20 req/sec = safe margin, bump to 0.1 if rate limited
|
||||
DELAY=0.05
|
||||
OUTPUT_BASE="./repo"
|
||||
|
||||
# Rate limit protection - check every N calls, pause if under 25% (75% used)
|
||||
API_CALL_COUNT=0
|
||||
RATE_CHECK_INTERVAL=100
|
||||
|
||||
check_rate_limit() {
|
||||
local rate_json=$(gh api rate_limit 2>/dev/null)
|
||||
if [ -z "$rate_json" ]; then
|
||||
echo " [Rate check failed, continuing...]"
|
||||
return
|
||||
fi
|
||||
|
||||
local remaining=$(echo "$rate_json" | jq -r '.resources.core.remaining')
|
||||
local limit=$(echo "$rate_json" | jq -r '.resources.core.limit')
|
||||
local reset=$(echo "$rate_json" | jq -r '.resources.core.reset')
|
||||
|
||||
local percent=$((remaining * 100 / limit))
|
||||
|
||||
echo ""
|
||||
echo ">>> Rate check: ${percent}% remaining ($remaining/$limit)"
|
||||
|
||||
if [ "$percent" -lt 25 ]; then
|
||||
local now=$(date +%s)
|
||||
local wait_time=$((reset - now + 10))
|
||||
|
||||
if [ "$wait_time" -gt 0 ]; then
|
||||
local resume_time=$(date -d "@$((reset + 10))" '+%H:%M:%S' 2>/dev/null || date -r "$((reset + 10))" '+%H:%M:%S' 2>/dev/null || echo "reset+10s")
|
||||
echo ">>> Under 25% - pausing ${wait_time}s until $resume_time"
|
||||
echo ">>> (GraphQL quota preserved for other tools)"
|
||||
sleep "$wait_time"
|
||||
echo ">>> Resuming collection..."
|
||||
fi
|
||||
else
|
||||
echo ">>> Above 25% - continuing..."
|
||||
fi
|
||||
echo ""
|
||||
}
|
||||
|
||||
track_api_call() {
|
||||
API_CALL_COUNT=$((API_CALL_COUNT + 1))
|
||||
|
||||
if [ $((API_CALL_COUNT % RATE_CHECK_INTERVAL)) -eq 0 ]; then
|
||||
check_rate_limit
|
||||
fi
|
||||
}
|
||||
|
||||
# Parse URL into org/repo
|
||||
parse_github_url() {
|
||||
local url="$1"
|
||||
url="${url#https://github.com/}"
|
||||
url="${url#http://github.com/}"
|
||||
url="${url%/}"
|
||||
echo "$url"
|
||||
}
|
||||
|
||||
# Collect single repo
|
||||
collect_repo() {
|
||||
local repo="$1" # format: org/repo-name
|
||||
local org=$(dirname "$repo")
|
||||
local repo_name=$(basename "$repo")
|
||||
|
||||
local issue_dir="$OUTPUT_BASE/$org/$repo_name/Issue"
|
||||
local pr_dir="$OUTPUT_BASE/$org/$repo_name/PR"
|
||||
local json_dir="$OUTPUT_BASE/$org/$repo_name/.json"
|
||||
|
||||
mkdir -p "$issue_dir" "$pr_dir" "$json_dir"
|
||||
|
||||
echo "=== Collecting: $repo ==="
|
||||
echo " Output: $OUTPUT_BASE/$org/$repo_name/"
|
||||
|
||||
# Collect Issues
|
||||
if [ "$SKIP_ISSUES" != "1" ]; then
|
||||
echo "Fetching issues..."
|
||||
if ! gh issue list --repo "$repo" --state all --limit 500 \
|
||||
--json number,title,state,author,labels,createdAt,closedAt,body \
|
||||
> "$json_dir/issues-list.json" 2>/dev/null; then
|
||||
echo " (issues disabled or not accessible)"
|
||||
echo "[]" > "$json_dir/issues-list.json"
|
||||
fi
|
||||
track_api_call
|
||||
|
||||
local issue_count=$(jq length "$json_dir/issues-list.json")
|
||||
echo " Found $issue_count issues"
|
||||
|
||||
# Fetch each issue
|
||||
local seq=0
|
||||
for github_num in $(jq -r '.[].number' "$json_dir/issues-list.json" | sort -n); do
|
||||
seq=$((seq + 1))
|
||||
local seq_padded=$(printf '%03d' $seq)
|
||||
|
||||
# Skip if already fetched
|
||||
if [ -f "$json_dir/issue-$github_num.json" ] && [ -f "$issue_dir/$seq_padded.md" ]; then
|
||||
echo " Skipping issue #$github_num (already exists)"
|
||||
continue
|
||||
fi
|
||||
|
||||
echo " Fetching issue #$github_num -> $seq_padded.md"
|
||||
gh issue view "$github_num" --repo "$repo" \
|
||||
--json number,title,state,author,labels,createdAt,closedAt,body,comments \
|
||||
> "$json_dir/issue-$github_num.json"
|
||||
track_api_call
|
||||
|
||||
# Convert to markdown with sequential filename
|
||||
convert_issue "$json_dir/issue-$github_num.json" "$issue_dir/$seq_padded.md" "$github_num"
|
||||
sleep $DELAY
|
||||
done
|
||||
|
||||
generate_issue_index "$issue_dir"
|
||||
fi
|
||||
|
||||
# Collect PRs
|
||||
if [ "$SKIP_PRS" != "1" ]; then
|
||||
echo "Fetching PRs..."
|
||||
if ! gh pr list --repo "$repo" --state all --limit 500 \
|
||||
--json number,title,state,author,createdAt,closedAt,mergedAt,body \
|
||||
> "$json_dir/prs-list.json" 2>/dev/null; then
|
||||
echo " (PRs disabled or not accessible)"
|
||||
echo "[]" > "$json_dir/prs-list.json"
|
||||
fi
|
||||
track_api_call
|
||||
|
||||
local pr_count=$(jq length "$json_dir/prs-list.json")
|
||||
echo " Found $pr_count PRs"
|
||||
|
||||
# Fetch each PR
|
||||
local seq=0
|
||||
for github_num in $(jq -r '.[].number' "$json_dir/prs-list.json" | sort -n); do
|
||||
seq=$((seq + 1))
|
||||
local seq_padded=$(printf '%03d' $seq)
|
||||
|
||||
# Skip if already fetched
|
||||
if [ -f "$json_dir/pr-$github_num.json" ] && [ -f "$pr_dir/$seq_padded.md" ]; then
|
||||
echo " Skipping PR #$github_num (already exists)"
|
||||
continue
|
||||
fi
|
||||
|
||||
echo " Fetching PR #$github_num -> $seq_padded.md"
|
||||
gh pr view "$github_num" --repo "$repo" \
|
||||
--json number,title,state,author,createdAt,closedAt,mergedAt,body,comments,reviews \
|
||||
> "$json_dir/pr-$github_num.json" 2>/dev/null || true
|
||||
track_api_call
|
||||
|
||||
# Convert to markdown with sequential filename
|
||||
convert_pr "$json_dir/pr-$github_num.json" "$pr_dir/$seq_padded.md" "$github_num"
|
||||
sleep $DELAY
|
||||
done
|
||||
|
||||
generate_pr_index "$pr_dir"
|
||||
fi
|
||||
}
|
||||
|
||||
# Collect all repos in org
|
||||
collect_org() {
|
||||
local org="$1"
|
||||
|
||||
echo "=== Collecting all repos from org: $org ==="
|
||||
|
||||
# Get repo list (1 API call)
|
||||
local repos
|
||||
repos=$(gh repo list "$org" --limit 500 --json nameWithOwner -q '.[].nameWithOwner')
|
||||
track_api_call
|
||||
|
||||
while read -r repo; do
|
||||
[ -n "$repo" ] || continue
|
||||
collect_repo "$repo"
|
||||
sleep $DELAY
|
||||
done <<< "$repos"
|
||||
}
|
||||
|
||||
# Convert issue JSON to markdown
|
||||
convert_issue() {
|
||||
local json_file="$1"
|
||||
local output_file="$2"
|
||||
local github_num="$3"
|
||||
|
||||
local title=$(jq -r '.title' "$json_file")
|
||||
local state=$(jq -r '.state' "$json_file")
|
||||
local author=$(jq -r '.author.login' "$json_file")
|
||||
local created=$(jq -r '.createdAt' "$json_file" | cut -d'T' -f1)
|
||||
local closed=$(jq -r '.closedAt // "N/A"' "$json_file" | cut -d'T' -f1)
|
||||
local body=$(jq -r '.body // "No description"' "$json_file")
|
||||
local labels=$(jq -r '[.labels[].name] | join(", ")' "$json_file")
|
||||
local comment_count=$(jq '.comments | length' "$json_file")
|
||||
|
||||
# Score reception
|
||||
local score="UNKNOWN"
|
||||
local reason=""
|
||||
|
||||
if [ "$state" = "CLOSED" ]; then
|
||||
if echo "$labels" | grep -qi "wontfix\|invalid\|duplicate\|won't fix"; then
|
||||
score="DISMISSED"
|
||||
reason="Labeled as wontfix/invalid/duplicate"
|
||||
elif [ "$comment_count" -eq 0 ]; then
|
||||
score="IGNORED"
|
||||
reason="Closed with no discussion"
|
||||
else
|
||||
score="ADDRESSED"
|
||||
reason="Closed after discussion"
|
||||
fi
|
||||
else
|
||||
if [ "$comment_count" -eq 0 ]; then
|
||||
score="STALE"
|
||||
reason="Open with no response"
|
||||
else
|
||||
score="ACTIVE"
|
||||
reason="Open with discussion"
|
||||
fi
|
||||
fi
|
||||
|
||||
cat > "$output_file" << ISSUE_EOF
|
||||
# Issue #$github_num: $title
|
||||
|
||||
## Reception Score
|
||||
|
||||
| Score | Reason |
|
||||
|-------|--------|
|
||||
| **$score** | $reason |
|
||||
|
||||
---
|
||||
|
||||
## Metadata
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| GitHub # | $github_num |
|
||||
| State | $state |
|
||||
| Author | @$author |
|
||||
| Created | $created |
|
||||
| Closed | $closed |
|
||||
| Labels | $labels |
|
||||
| Comments | $comment_count |
|
||||
|
||||
---
|
||||
|
||||
## Original Post
|
||||
|
||||
**Author:** @$author
|
||||
|
||||
$body
|
||||
|
||||
---
|
||||
|
||||
## Discussion Thread
|
||||
|
||||
ISSUE_EOF
|
||||
|
||||
jq -r '.comments[] | "### Comment by @\(.author.login)\n\n**Date:** \(.createdAt | split("T")[0])\n\n\(.body)\n\n---\n"' "$json_file" >> "$output_file" 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Convert PR JSON to markdown
|
||||
convert_pr() {
|
||||
local json_file="$1"
|
||||
local output_file="$2"
|
||||
local github_num="$3"
|
||||
|
||||
[ -f "$json_file" ] || return
|
||||
|
||||
local title=$(jq -r '.title' "$json_file")
|
||||
local state=$(jq -r '.state' "$json_file")
|
||||
local author=$(jq -r '.author.login' "$json_file")
|
||||
local created=$(jq -r '.createdAt' "$json_file" | cut -d'T' -f1)
|
||||
local merged=$(jq -r '.mergedAt // "N/A"' "$json_file" | cut -d'T' -f1)
|
||||
local body=$(jq -r '.body // "No description"' "$json_file")
|
||||
|
||||
local score="UNKNOWN"
|
||||
local reason=""
|
||||
|
||||
if [ "$state" = "MERGED" ] || { [ "$merged" != "N/A" ] && [ "$merged" != "null" ]; }; then
|
||||
score="MERGED"
|
||||
reason="Contribution accepted"
|
||||
elif [ "$state" = "CLOSED" ]; then
|
||||
score="REJECTED"
|
||||
reason="PR closed without merge"
|
||||
else
|
||||
score="PENDING"
|
||||
reason="Still open"
|
||||
fi
|
||||
|
||||
cat > "$output_file" << PR_EOF
|
||||
# PR #$github_num: $title
|
||||
|
||||
## Reception Score
|
||||
|
||||
| Score | Reason |
|
||||
|-------|--------|
|
||||
| **$score** | $reason |
|
||||
|
||||
---
|
||||
|
||||
## Metadata
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| GitHub # | $github_num |
|
||||
| State | $state |
|
||||
| Author | @$author |
|
||||
| Created | $created |
|
||||
| Merged | $merged |
|
||||
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
$body
|
||||
|
||||
---
|
||||
|
||||
## Reviews & Comments
|
||||
|
||||
PR_EOF
|
||||
|
||||
jq -r '.comments[]? | "### Comment by @\(.author.login)\n\n\(.body)\n\n---\n"' "$json_file" >> "$output_file" 2>/dev/null || true
|
||||
jq -r '.reviews[]? | "### Review by @\(.author.login) [\(.state)]\n\n\(.body // "No comment")\n\n---\n"' "$json_file" >> "$output_file" 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Generate Issue index
|
||||
generate_issue_index() {
|
||||
local dir="$1"
|
||||
|
||||
cat > "$dir/INDEX.md" << 'INDEX_HEADER'
|
||||
# Issues Index
|
||||
|
||||
## Reception Score Legend
|
||||
|
||||
| Score | Meaning | Action |
|
||||
|-------|---------|--------|
|
||||
| ADDRESSED | Closed after discussion | Review if actually fixed |
|
||||
| DISMISSED | Labeled wontfix/invalid | **RECLAIM candidate** |
|
||||
| IGNORED | Closed, no response | **RECLAIM candidate** |
|
||||
| STALE | Open, no replies | Needs attention |
|
||||
| ACTIVE | Open with discussion | In progress |
|
||||
|
||||
---
|
||||
|
||||
## Issues
|
||||
|
||||
| Seq | GitHub # | Title | Score |
|
||||
|-----|----------|-------|-------|
|
||||
INDEX_HEADER
|
||||
|
||||
for file in "$dir"/[0-9]*.md; do
|
||||
[ -f "$file" ] || continue
|
||||
local seq=$(basename "$file" .md)
|
||||
local github_num=$(sed -n 's/^# Issue #\([0-9]*\):.*/\1/p' "$file")
|
||||
local title=$(head -1 "$file" | sed 's/^# Issue #[0-9]*: //')
|
||||
local score=$(sed -n '/\*\*[A-Z]/s/.*\*\*\([A-Z]*\)\*\*.*/\1/p' "$file" | head -1)
|
||||
echo "| [$seq]($seq.md) | #$github_num | $title | $score |" >> "$dir/INDEX.md"
|
||||
done
|
||||
|
||||
echo " Created Issue/INDEX.md"
|
||||
}
|
||||
|
||||
# Generate PR index
|
||||
generate_pr_index() {
|
||||
local dir="$1"
|
||||
|
||||
cat > "$dir/INDEX.md" << 'INDEX_HEADER'
|
||||
# Pull Requests Index
|
||||
|
||||
## Reception Score Legend
|
||||
|
||||
| Score | Meaning | Action |
|
||||
|-------|---------|--------|
|
||||
| MERGED | PR accepted | Done |
|
||||
| REJECTED | PR closed unmerged | Review why |
|
||||
| PENDING | PR still open | Needs review |
|
||||
|
||||
---
|
||||
|
||||
## Pull Requests
|
||||
|
||||
| Seq | GitHub # | Title | Score |
|
||||
|-----|----------|-------|-------|
|
||||
INDEX_HEADER
|
||||
|
||||
for file in "$dir"/[0-9]*.md; do
|
||||
[ -f "$file" ] || continue
|
||||
local seq=$(basename "$file" .md)
|
||||
local github_num=$(sed -n 's/^# PR #\([0-9]*\):.*/\1/p' "$file")
|
||||
local title=$(head -1 "$file" | sed 's/^# PR #[0-9]*: //')
|
||||
local score=$(sed -n '/\*\*[A-Z]/s/.*\*\*\([A-Z]*\)\*\*.*/\1/p' "$file" | head -1)
|
||||
echo "| [$seq]($seq.md) | #$github_num | $title | $score |" >> "$dir/INDEX.md"
|
||||
done
|
||||
|
||||
echo " Created PR/INDEX.md"
|
||||
}
|
||||
|
||||
# Show rate limit status
|
||||
show_rate_status() {
|
||||
local rate_json=$(gh api rate_limit 2>/dev/null)
|
||||
if [ -z "$rate_json" ]; then
|
||||
echo "Failed to fetch rate limit"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== GitHub API Rate Limit Status ==="
|
||||
echo ""
|
||||
echo "Core (REST API):"
|
||||
echo " Remaining: $(echo "$rate_json" | jq -r '.resources.core.remaining') / $(echo "$rate_json" | jq -r '.resources.core.limit')"
|
||||
local core_reset=$(echo "$rate_json" | jq -r '.resources.core.reset')
|
||||
echo " Reset: $(date -d "@$core_reset" '+%H:%M:%S' 2>/dev/null || date -r "$core_reset" '+%H:%M:%S' 2>/dev/null || echo "$core_reset")"
|
||||
echo ""
|
||||
echo "GraphQL:"
|
||||
echo " Remaining: $(echo "$rate_json" | jq -r '.resources.graphql.remaining') / $(echo "$rate_json" | jq -r '.resources.graphql.limit')"
|
||||
local gql_reset=$(echo "$rate_json" | jq -r '.resources.graphql.reset')
|
||||
echo " Reset: $(date -d "@$gql_reset" '+%H:%M:%S' 2>/dev/null || date -r "$gql_reset" '+%H:%M:%S' 2>/dev/null || echo "$gql_reset")"
|
||||
echo ""
|
||||
echo "Search:"
|
||||
echo " Remaining: $(echo "$rate_json" | jq -r '.resources.search.remaining') / $(echo "$rate_json" | jq -r '.resources.search.limit')"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Main
|
||||
main() {
|
||||
local targets=""
|
||||
local is_org=0
|
||||
SKIP_ISSUES=0
|
||||
SKIP_PRS=0
|
||||
|
||||
# Parse args
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--org) is_org=1 ;;
|
||||
--issues-only) SKIP_PRS=1 ;;
|
||||
--prs-only) SKIP_ISSUES=1 ;;
|
||||
--delay=*) DELAY="${arg#*=}" ;;
|
||||
--check-rate) show_rate_status; exit 0 ;;
|
||||
https://*|http://*) targets="$arg" ;;
|
||||
-*) ;; # ignore unknown flags
|
||||
*) targets="$arg" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$targets" ]; then
|
||||
echo "Usage: $0 <target> [--org] [--issues-only] [--prs-only] [--delay=0.05] [--check-rate]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --check-rate Show rate limit status (Core/GraphQL/Search) and exit"
|
||||
echo " --delay=N Delay between requests (default: 0.05s)"
|
||||
echo ""
|
||||
echo "Rate limiting: Auto-pauses at 25% remaining (75% used) until reset+10s"
|
||||
echo ""
|
||||
echo "Target formats:"
|
||||
echo " Single repo: LetheanNetwork/lthn-app-vpn"
|
||||
echo " Single org: LetheanNetwork --org"
|
||||
echo " Batch orgs: \"LetheanNetwork,graft-project,oxen-io\" --org"
|
||||
echo " Batch repos: \"owner/repo1,owner/repo2\""
|
||||
echo ""
|
||||
echo "Output: repo/{org}/{repo}/Issue/ repo/{org}/{repo}/PR/"
|
||||
echo ""
|
||||
echo "Full registry list (copy-paste ready):"
|
||||
echo ""
|
||||
echo " # Lethean ecosystem"
|
||||
echo " $0 \"LetheanNetwork,letheanVPN,LetheanMovement\" --org"
|
||||
echo ""
|
||||
echo " # CryptoNote projects"
|
||||
echo " $0 \"monero-project,haven-protocol-org,hyle-team,zanoio\" --org"
|
||||
echo " $0 \"kevacoin-project,scala-network,deroproject\" --org"
|
||||
echo " $0 \"Karbovanets,wownero,turtlecoin\" --org"
|
||||
echo " $0 \"masari-project,aeonix,nerva-project\" --org"
|
||||
echo " $0 \"ConcealNetwork,ryo-currency,sumoprojects\" --org"
|
||||
echo " $0 \"bcndev,electroneum\" --org"
|
||||
echo ""
|
||||
echo " # Dead/salvage priority"
|
||||
echo " $0 \"graft-project,graft-community,oxen-io,loki-project\" --org"
|
||||
echo ""
|
||||
echo " # Non-CN reference projects"
|
||||
echo " $0 \"theQRL,hyperswarm,holepunchto,openhive-network,octa-space\" --org"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Handle comma-separated list
|
||||
IFS=',' read -ra TARGET_LIST <<< "$targets"
|
||||
|
||||
for target in "${TARGET_LIST[@]}"; do
|
||||
# Trim whitespace
|
||||
target=$(echo "$target" | xargs)
|
||||
local parsed=$(parse_github_url "$target")
|
||||
|
||||
if [ "$is_org" = "1" ]; then
|
||||
collect_org "$parsed"
|
||||
else
|
||||
collect_repo "$parsed"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=== Collection Complete ==="
|
||||
echo "Output: $OUTPUT_BASE/"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
|
|
@ -1,107 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Generate job list for proxy-based collection
|
||||
# Usage: ./generate-jobs.sh <source> <target> [options] > jobs.txt
|
||||
|
||||
set -e
|
||||
|
||||
SOURCE="$1"
|
||||
TARGET="$2"
|
||||
shift 2 || true
|
||||
|
||||
# Defaults
|
||||
LIMIT=1000
|
||||
PAGES=100
|
||||
|
||||
# Parse options
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--limit=*) LIMIT="${arg#*=}" ;;
|
||||
--pages=*) PAGES="${arg#*=}" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Output header
|
||||
echo "# Job list generated $(date +%Y-%m-%d\ %H:%M)"
|
||||
echo "# Source: $SOURCE | Target: $TARGET"
|
||||
echo "# Format: URL|FILENAME|TYPE|METADATA"
|
||||
echo "#"
|
||||
|
||||
case "$SOURCE" in
|
||||
|
||||
bitcointalk|btt)
|
||||
# Extract topic ID
|
||||
TOPIC_ID=$(echo "$TARGET" | grep -oE '[0-9]+' | head -1)
|
||||
echo "# BitcoinTalk topic: $TOPIC_ID"
|
||||
echo "#"
|
||||
|
||||
# Generate page URLs (20 posts per page)
|
||||
for ((i=0; i<PAGES*20; i+=20)); do
|
||||
echo "https://bitcointalk.org/index.php?topic=${TOPIC_ID}.${i}|btt-${TOPIC_ID}-p${i}.html|bitcointalk|page=$((i/20)),offset=$i"
|
||||
done
|
||||
;;
|
||||
|
||||
reddit)
|
||||
# Handle r/subreddit or full URL
|
||||
SUBREDDIT=$(echo "$TARGET" | sed 's|.*/r/||' | sed 's|/.*||')
|
||||
echo "# Reddit: r/$SUBREDDIT"
|
||||
echo "#"
|
||||
|
||||
# Subreddit pages (top, new, hot)
|
||||
for sort in "top" "new" "hot"; do
|
||||
echo "https://old.reddit.com/r/${SUBREDDIT}/${sort}/.json?limit=100|reddit-${SUBREDDIT}-${sort}.json|reddit|sort=$sort"
|
||||
done
|
||||
|
||||
# If it's a specific thread
|
||||
if [[ "$TARGET" =~ comments/([a-z0-9]+) ]]; then
|
||||
THREAD_ID="${BASH_REMATCH[1]}"
|
||||
echo "https://old.reddit.com/r/${SUBREDDIT}/comments/${THREAD_ID}.json|reddit-thread-${THREAD_ID}.json|reddit|thread=$THREAD_ID"
|
||||
fi
|
||||
;;
|
||||
|
||||
wayback|archive)
|
||||
# Clean domain
|
||||
DOMAIN=$(echo "$TARGET" | sed 's|https\?://||' | sed 's|/.*||')
|
||||
echo "# Wayback Machine: $DOMAIN"
|
||||
echo "#"
|
||||
|
||||
# CDX API to get all snapshots
|
||||
echo "https://web.archive.org/cdx/search/cdx?url=${DOMAIN}/*&output=json&limit=${LIMIT}|wayback-${DOMAIN}-cdx.json|wayback-index|domain=$DOMAIN"
|
||||
|
||||
# Common important pages
|
||||
for path in "" "index.html" "about" "roadmap" "team" "whitepaper" "faq"; do
|
||||
echo "https://web.archive.org/web/2020/${DOMAIN}/${path}|wayback-${DOMAIN}-2020-${path:-index}.html|wayback|year=2020,path=$path"
|
||||
echo "https://web.archive.org/web/2021/${DOMAIN}/${path}|wayback-${DOMAIN}-2021-${path:-index}.html|wayback|year=2021,path=$path"
|
||||
echo "https://web.archive.org/web/2022/${DOMAIN}/${path}|wayback-${DOMAIN}-2022-${path:-index}.html|wayback|year=2022,path=$path"
|
||||
done
|
||||
;;
|
||||
|
||||
medium)
|
||||
# Handle @author or publication
|
||||
AUTHOR=$(echo "$TARGET" | sed 's|.*/||' | sed 's|^@||')
|
||||
echo "# Medium: @$AUTHOR"
|
||||
echo "#"
|
||||
|
||||
# Medium RSS feed (easier to parse)
|
||||
echo "https://medium.com/feed/@${AUTHOR}|medium-${AUTHOR}-feed.xml|medium-rss|author=$AUTHOR"
|
||||
|
||||
# Profile page
|
||||
echo "https://medium.com/@${AUTHOR}|medium-${AUTHOR}-profile.html|medium|author=$AUTHOR"
|
||||
;;
|
||||
|
||||
twitter|x)
|
||||
USERNAME=$(echo "$TARGET" | sed 's|.*/||' | sed 's|^@||')
|
||||
echo "# Twitter/X: @$USERNAME"
|
||||
echo "# Note: Twitter requires auth - use nitter or API"
|
||||
echo "#"
|
||||
|
||||
# Nitter instances (public, no auth)
|
||||
echo "https://nitter.net/${USERNAME}|twitter-${USERNAME}.html|nitter|user=$USERNAME"
|
||||
echo "https://nitter.net/${USERNAME}/with_replies|twitter-${USERNAME}-replies.html|nitter|user=$USERNAME,type=replies"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "# ERROR: Unknown source '$SOURCE'" >&2
|
||||
echo "# Supported: bitcointalk, reddit, wayback, medium, twitter" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
|
@ -1,242 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Process downloaded files into markdown
|
||||
# Usage: ./process.sh <source> <downloads-dir> [--output=DIR]
|
||||
|
||||
set -e
|
||||
|
||||
SOURCE="$1"
|
||||
DOWNLOADS="$2"
|
||||
shift 2 || true
|
||||
|
||||
OUTPUT="./processed"
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--output=*) OUTPUT="${arg#*=}" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
mkdir -p "$OUTPUT/posts"
|
||||
|
||||
echo "=== Processing $SOURCE files from $DOWNLOADS ==="
|
||||
|
||||
case "$SOURCE" in
|
||||
|
||||
bitcointalk|btt)
|
||||
echo "Processing BitcoinTalk pages..."
|
||||
POST_NUM=0
|
||||
|
||||
for file in "$DOWNLOADS"/btt-*.html; do
|
||||
[ -f "$file" ] || continue
|
||||
echo " Processing: $(basename "$file")"
|
||||
|
||||
python3 << PYEOF
|
||||
import re
|
||||
import html
|
||||
import os
|
||||
|
||||
html_content = open('$file', 'r', encoding='utf-8', errors='ignore').read()
|
||||
|
||||
# Extract thread title from first page
|
||||
title_match = re.search(r'<title>([^<]+)</title>', html_content)
|
||||
title = title_match.group(1) if title_match else "Unknown Thread"
|
||||
title = title.replace(' - Bitcoin Forum', '').strip()
|
||||
|
||||
with open('$OUTPUT/.thread_title', 'w') as f:
|
||||
f.write(title)
|
||||
|
||||
# Pattern for posts
|
||||
post_blocks = re.findall(r'<div class="post"[^>]*id="msg(\d+)"[^>]*>(.*?)</div>\s*(?:<div class="moderatorbar"|<div class="signature">)', html_content, re.DOTALL)
|
||||
|
||||
for msg_id, content in post_blocks:
|
||||
# Clean content
|
||||
content = re.sub(r'<br\s*/?>', '\n', content)
|
||||
content = re.sub(r'<[^>]+>', '', content)
|
||||
content = html.unescape(content).strip()
|
||||
|
||||
if content:
|
||||
post_num = $POST_NUM + 1
|
||||
$POST_NUM = post_num
|
||||
|
||||
with open(f'$OUTPUT/posts/POST-{post_num:04d}.md', 'w') as f:
|
||||
f.write(f"# Post #{post_num}\\n\\n")
|
||||
f.write(f"Message ID: {msg_id}\\n\\n")
|
||||
f.write(f"---\\n\\n")
|
||||
f.write(content)
|
||||
f.write("\\n")
|
||||
|
||||
print(f" POST-{post_num:04d}.md")
|
||||
|
||||
print(f"TOTAL:{$POST_NUM}")
|
||||
PYEOF
|
||||
done
|
||||
|
||||
# Generate index
|
||||
TITLE=$(cat "$OUTPUT/.thread_title" 2>/dev/null || echo "BitcoinTalk Thread")
|
||||
TOTAL=$(ls "$OUTPUT/posts/"POST-*.md 2>/dev/null | wc -l)
|
||||
|
||||
cat > "$OUTPUT/INDEX.md" << EOF
|
||||
# $TITLE
|
||||
|
||||
Archived from BitcoinTalk
|
||||
|
||||
| Posts | $(echo $TOTAL) |
|
||||
|-------|------|
|
||||
|
||||
## Posts
|
||||
|
||||
EOF
|
||||
for f in "$OUTPUT/posts/"POST-*.md; do
|
||||
[ -f "$f" ] || continue
|
||||
NUM=$(basename "$f" .md | sed 's/POST-0*//')
|
||||
echo "- [Post #$NUM](posts/$(basename $f))" >> "$OUTPUT/INDEX.md"
|
||||
done
|
||||
;;
|
||||
|
||||
reddit)
|
||||
echo "Processing Reddit JSON..."
|
||||
|
||||
for file in "$DOWNLOADS"/reddit-*.json; do
|
||||
[ -f "$file" ] || continue
|
||||
echo " Processing: $(basename "$file")"
|
||||
|
||||
python3 << PYEOF
|
||||
import json
|
||||
import os
|
||||
|
||||
data = json.load(open('$file', 'r'))
|
||||
|
||||
# Handle different Reddit JSON structures
|
||||
posts = []
|
||||
if isinstance(data, list) and len(data) > 0:
|
||||
if 'data' in data[0]:
|
||||
# Thread format
|
||||
posts = data[0]['data']['children']
|
||||
else:
|
||||
posts = data
|
||||
elif isinstance(data, dict) and 'data' in data:
|
||||
posts = data['data']['children']
|
||||
|
||||
for i, post_wrapper in enumerate(posts):
|
||||
post = post_wrapper.get('data', post_wrapper)
|
||||
|
||||
title = post.get('title', post.get('body', '')[:50])
|
||||
author = post.get('author', 'unknown')
|
||||
score = post.get('score', 0)
|
||||
body = post.get('selftext', post.get('body', ''))
|
||||
created = post.get('created_utc', 0)
|
||||
|
||||
filename = f'$OUTPUT/posts/REDDIT-{i+1:04d}.md'
|
||||
with open(filename, 'w') as f:
|
||||
f.write(f"# {title}\\n\\n")
|
||||
f.write(f"| Author | u/{author} |\\n")
|
||||
f.write(f"|--------|----------|\\n")
|
||||
f.write(f"| Score | {score} |\\n\\n")
|
||||
f.write(f"---\\n\\n")
|
||||
f.write(body or "(no content)")
|
||||
f.write("\\n")
|
||||
|
||||
print(f" REDDIT-{i+1:04d}.md - {title[:40]}...")
|
||||
PYEOF
|
||||
done
|
||||
;;
|
||||
|
||||
wayback)
|
||||
echo "Processing Wayback Machine files..."
|
||||
|
||||
for file in "$DOWNLOADS"/wayback-*.html; do
|
||||
[ -f "$file" ] || continue
|
||||
BASENAME=$(basename "$file" .html)
|
||||
echo " Processing: $BASENAME"
|
||||
|
||||
# Extract text content
|
||||
python3 << PYEOF
|
||||
import re
|
||||
import html
|
||||
|
||||
content = open('$file', 'r', encoding='utf-8', errors='ignore').read()
|
||||
|
||||
# Remove scripts and styles
|
||||
content = re.sub(r'<script[^>]*>.*?</script>', '', content, flags=re.DOTALL)
|
||||
content = re.sub(r'<style[^>]*>.*?</style>', '', content, flags=re.DOTALL)
|
||||
|
||||
# Extract title
|
||||
title_match = re.search(r'<title>([^<]+)</title>', content)
|
||||
title = html.unescape(title_match.group(1)) if title_match else "$BASENAME"
|
||||
|
||||
# Get body text
|
||||
body_match = re.search(r'<body[^>]*>(.*?)</body>', content, re.DOTALL)
|
||||
if body_match:
|
||||
body = body_match.group(1)
|
||||
body = re.sub(r'<[^>]+>', ' ', body)
|
||||
body = html.unescape(body)
|
||||
body = re.sub(r'\s+', ' ', body).strip()
|
||||
else:
|
||||
body = "(could not extract body)"
|
||||
|
||||
with open('$OUTPUT/posts/$BASENAME.md', 'w') as f:
|
||||
f.write(f"# {title}\\n\\n")
|
||||
f.write(f"Source: Wayback Machine\\n\\n")
|
||||
f.write(f"---\\n\\n")
|
||||
f.write(body[:5000]) # Limit length
|
||||
f.write("\\n")
|
||||
|
||||
print(f" $BASENAME.md")
|
||||
PYEOF
|
||||
done
|
||||
;;
|
||||
|
||||
medium)
|
||||
echo "Processing Medium files..."
|
||||
|
||||
# Handle RSS feed
|
||||
for file in "$DOWNLOADS"/medium-*-feed.xml; do
|
||||
[ -f "$file" ] || continue
|
||||
echo " Processing RSS: $(basename "$file")"
|
||||
|
||||
python3 << PYEOF
|
||||
import xml.etree.ElementTree as ET
|
||||
import html
|
||||
import re
|
||||
|
||||
tree = ET.parse('$file')
|
||||
root = tree.getroot()
|
||||
|
||||
channel = root.find('channel')
|
||||
items = channel.findall('item') if channel else root.findall('.//item')
|
||||
|
||||
for i, item in enumerate(items):
|
||||
title = item.findtext('title', 'Untitled')
|
||||
author = item.findtext('{http://purl.org/dc/elements/1.1/}creator', 'Unknown')
|
||||
date = item.findtext('pubDate', '')
|
||||
content = item.findtext('{http://purl.org/rss/1.0/modules/content/}encoded', '')
|
||||
|
||||
# Clean content
|
||||
content = re.sub(r'<[^>]+>', '', content)
|
||||
content = html.unescape(content)
|
||||
|
||||
filename = f'$OUTPUT/posts/MEDIUM-{i+1:04d}.md'
|
||||
with open(filename, 'w') as f:
|
||||
f.write(f"# {title}\\n\\n")
|
||||
f.write(f"| Author | {author} |\\n")
|
||||
f.write(f"|--------|----------|\\n")
|
||||
f.write(f"| Date | {date} |\\n\\n")
|
||||
f.write(f"---\\n\\n")
|
||||
f.write(content[:10000])
|
||||
f.write("\\n")
|
||||
|
||||
print(f" MEDIUM-{i+1:04d}.md - {title[:40]}...")
|
||||
PYEOF
|
||||
done
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "ERROR: Unknown source '$SOURCE'"
|
||||
echo "Supported: bitcointalk, reddit, wayback, medium"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
echo "=== Processing Complete ==="
|
||||
echo "Output: $OUTPUT/"
|
||||
|
|
@ -30,32 +30,31 @@ Comprehensive collection of distributed ledger, cryptographic protocol, and dece
|
|||
| oracles | 3 | Chainlink, Band Protocol |
|
||||
| bridges | 3 | Atomic swaps, XCLAIM, THORChain |
|
||||
|
||||
## Usage
|
||||
## Collection with Borg
|
||||
|
||||
```bash
|
||||
# All papers (91+)
|
||||
./discover.sh --all > jobs.txt
|
||||
# Collect papers from academic sources
|
||||
borg collect website https://eprint.iacr.org --depth 2 --format stim -o iacr-papers.stim
|
||||
|
||||
# By category
|
||||
./discover.sh --category=cryptography > jobs.txt
|
||||
./discover.sh --category=defi > jobs.txt
|
||||
# Collect from arXiv
|
||||
borg collect website https://arxiv.org/list/cs.CR/recent --depth 1
|
||||
|
||||
# By topic
|
||||
./discover.sh --topic=bulletproofs > jobs.txt
|
||||
./discover.sh --topic=zk-snarks > jobs.txt
|
||||
|
||||
# IACR search for more
|
||||
./discover.sh --search-iacr > search-jobs.txt
|
||||
|
||||
# List categories
|
||||
./discover.sh --help
|
||||
# Package existing archive
|
||||
borg compile -f Borgfile -e "archive-password" -o ledger-papers.stim
|
||||
```
|
||||
|
||||
## Output Format
|
||||
## Registry
|
||||
|
||||
```
|
||||
URL|FILENAME|TYPE|METADATA
|
||||
https://bitcoin.org/bitcoin.pdf|bitcoin.pdf|paper|category=genesis,title=Bitcoin...
|
||||
Papers are catalogued in `registry.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "paper-id",
|
||||
"title": "Paper Title",
|
||||
"year": 2024,
|
||||
"url": "https://example.com/paper.pdf",
|
||||
"topics": ["topic1", "topic2"]
|
||||
}
|
||||
```
|
||||
|
||||
## CDN Hosting Structure
|
||||
|
|
@ -69,32 +68,14 @@ papers.lethean.io/
|
|||
├── cryptonote/
|
||||
│ ├── cryptonote-v2.pdf
|
||||
│ └── cns/
|
||||
│ ├── cns001.txt
|
||||
│ └── ...
|
||||
├── mrl/
|
||||
│ ├── MRL-0001.pdf
|
||||
│ └── ...
|
||||
│ └── MRL-0001.pdf
|
||||
├── cryptography/
|
||||
│ ├── bulletproofs.pdf
|
||||
│ ├── clsag.pdf
|
||||
│ └── ...
|
||||
│ └── clsag.pdf
|
||||
└── INDEX.json
|
||||
```
|
||||
|
||||
## Adding Papers
|
||||
|
||||
Edit `registry.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "paper-id",
|
||||
"title": "Paper Title",
|
||||
"year": 2024,
|
||||
"url": "https://example.com/paper.pdf",
|
||||
"topics": ["topic1", "topic2"]
|
||||
}
|
||||
```
|
||||
|
||||
## License Note
|
||||
|
||||
Papers collected for archival/educational purposes. Original copyrights remain with authors. CDN hosting as community service under CIC principles.
|
||||
Papers collected for archival/educational purposes. Original copyrights remain with authors.
|
||||
|
|
|
|||
|
|
@ -1,132 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Discover CryptoNote extension papers
|
||||
# Usage: ./discover.sh [--all] [--category=NAME] [--project=NAME] [--topic=NAME]
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REGISTRY="$SCRIPT_DIR/registry.json"
|
||||
|
||||
# Check for jq
|
||||
if ! command -v jq &> /dev/null; then
|
||||
echo "Error: jq is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CATEGORY=""
|
||||
PROJECT=""
|
||||
TOPIC=""
|
||||
ALL=0
|
||||
|
||||
# Parse args
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--all) ALL=1 ;;
|
||||
--category=*) CATEGORY="${arg#*=}" ;;
|
||||
--project=*) PROJECT="${arg#*=}" ;;
|
||||
--topic=*) TOPIC="${arg#*=}" ;;
|
||||
--search-iacr) SEARCH_IACR=1 ;;
|
||||
--help|-h)
|
||||
echo "Usage: $0 [options]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --all All known papers"
|
||||
echo " --category=NAME Filter by category (mrl, iacr, projects, attacks)"
|
||||
echo " --project=NAME Filter by project (monero, haven, masari, etc)"
|
||||
echo " --topic=NAME Filter by topic (bulletproofs, ringct, etc)"
|
||||
echo " --search-iacr Generate IACR search jobs"
|
||||
echo ""
|
||||
echo "Categories:"
|
||||
jq -r '.categories | keys[]' "$REGISTRY"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "# Ledger Papers Archive - $(date +%Y-%m-%d)"
|
||||
echo "# Format: URL|FILENAME|TYPE|METADATA"
|
||||
echo "#"
|
||||
|
||||
emit_paper() {
|
||||
local url="$1"
|
||||
local id="$2"
|
||||
local category="$3"
|
||||
local title="$4"
|
||||
|
||||
local filename="${id}.pdf"
|
||||
local metadata="category=$category,title=$title"
|
||||
|
||||
echo "${url}|${filename}|paper|${metadata}"
|
||||
}
|
||||
|
||||
# Process categories
|
||||
process_category() {
|
||||
local cat_name="$1"
|
||||
|
||||
echo "# === $cat_name ==="
|
||||
|
||||
# Get papers in category
|
||||
local papers
|
||||
papers=$(jq -c ".categories[\"$cat_name\"].papers[]?" "$REGISTRY" 2>/dev/null)
|
||||
|
||||
echo "$papers" | while read -r paper; do
|
||||
[ -z "$paper" ] && continue
|
||||
|
||||
local id title url urls
|
||||
id=$(echo "$paper" | jq -r '.id')
|
||||
title=$(echo "$paper" | jq -r '.title // "Unknown"')
|
||||
|
||||
# Check topic filter
|
||||
if [ -n "$TOPIC" ]; then
|
||||
if ! echo "$paper" | jq -e ".topics[]? | select(. == \"$TOPIC\")" > /dev/null 2>&1; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check project filter
|
||||
if [ -n "$PROJECT" ]; then
|
||||
local paper_project
|
||||
paper_project=$(echo "$paper" | jq -r '.project // ""')
|
||||
if [ "$paper_project" != "$PROJECT" ]; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# Get URL (single or first from array)
|
||||
url=$(echo "$paper" | jq -r '.url // .urls[0] // ""')
|
||||
|
||||
if [ -n "$url" ]; then
|
||||
emit_paper "$url" "$id" "$cat_name" "$title"
|
||||
fi
|
||||
|
||||
# Also emit alternate URLs for wayback
|
||||
urls=$(echo "$paper" | jq -r '.urls[]? // empty' 2>/dev/null)
|
||||
echo "$urls" | while read -r alt_url; do
|
||||
[ -z "$alt_url" ] && continue
|
||||
[ "$alt_url" = "$url" ] && continue
|
||||
echo "# alt: $alt_url"
|
||||
done
|
||||
done
|
||||
|
||||
echo "#"
|
||||
}
|
||||
|
||||
# Main logic
|
||||
if [ "$ALL" = "1" ] || [ -z "$CATEGORY" ]; then
|
||||
# All categories - dynamically from registry
|
||||
jq -r '.categories | keys[]' "$REGISTRY" | while read -r cat; do
|
||||
process_category "$cat"
|
||||
done
|
||||
else
|
||||
# Single category
|
||||
process_category "$CATEGORY"
|
||||
fi
|
||||
|
||||
# IACR search jobs
|
||||
if [ "$SEARCH_IACR" = "1" ]; then
|
||||
echo "# === IACR Search Jobs ==="
|
||||
jq -r '.search_patterns.iacr[]' "$REGISTRY" | while read -r term; do
|
||||
encoded=$(echo "$term" | sed 's/ /+/g')
|
||||
echo "https://eprint.iacr.org/search?q=${encoded}|iacr-search-${encoded}.html|search|source=iacr,term=$term"
|
||||
done
|
||||
fi
|
||||
|
|
@ -1,105 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Generate mining pool collection jobs
|
||||
# Usage: ./generate-jobs.sh <coin> [--url=URL] [--all]
|
||||
|
||||
set -e
|
||||
|
||||
COIN=""
|
||||
POOL_URL=""
|
||||
ALL_POOLS=0
|
||||
|
||||
# Known pools registry
|
||||
declare -A POOLS_LETHEAN=(
|
||||
["herominers"]="https://lethean.herominers.com"
|
||||
["gntl"]="https://lthn.pool.gntl.uk"
|
||||
)
|
||||
|
||||
declare -A POOLS_MONERO=(
|
||||
["supportxmr"]="https://supportxmr.com"
|
||||
["nanopool"]="https://xmr.nanopool.org"
|
||||
["hashvault"]="https://monero.hashvault.pro"
|
||||
)
|
||||
|
||||
declare -A POOLS_WOWNERO=(
|
||||
["herominers"]="https://wownero.herominers.com"
|
||||
)
|
||||
|
||||
# Parse args
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--url=*) POOL_URL="${arg#*=}" ;;
|
||||
--all) ALL_POOLS=1 ;;
|
||||
--*) ;;
|
||||
*) COIN="$arg" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
emit_pool_jobs() {
|
||||
local pool_name="$1"
|
||||
local pool_url="$2"
|
||||
local coin="$3"
|
||||
|
||||
local slug="${coin}-${pool_name}"
|
||||
|
||||
echo "# === ${pool_name} (${coin}) ==="
|
||||
|
||||
# Common nodejs-pool API endpoints
|
||||
echo "${pool_url}/api/stats|pool-${slug}-stats.json|pool-api|coin=$coin,pool=$pool_name"
|
||||
echo "${pool_url}/api/pool/blocks|pool-${slug}-blocks.json|pool-api|coin=$coin,pool=$pool_name"
|
||||
echo "${pool_url}/api/pool/payments|pool-${slug}-payments.json|pool-api|coin=$coin,pool=$pool_name"
|
||||
echo "${pool_url}/api/network/stats|pool-${slug}-network.json|pool-api|coin=$coin,pool=$pool_name"
|
||||
echo "${pool_url}/api/config|pool-${slug}-config.json|pool-api|coin=$coin,pool=$pool_name"
|
||||
|
||||
# Web pages
|
||||
echo "${pool_url}/|pool-${slug}-home.html|pool-web|coin=$coin,pool=$pool_name"
|
||||
echo "${pool_url}/#/blocks|pool-${slug}-blocks-page.html|pool-web|coin=$coin,pool=$pool_name"
|
||||
|
||||
echo "#"
|
||||
}
|
||||
|
||||
echo "# Mining Pool Jobs - $(date +%Y-%m-%d)"
|
||||
echo "# Format: URL|FILENAME|TYPE|METADATA"
|
||||
echo "#"
|
||||
|
||||
if [ "$ALL_POOLS" = "1" ]; then
|
||||
for pool in "${!POOLS_LETHEAN[@]}"; do
|
||||
emit_pool_jobs "$pool" "${POOLS_LETHEAN[$pool]}" "lethean"
|
||||
done
|
||||
for pool in "${!POOLS_MONERO[@]}"; do
|
||||
emit_pool_jobs "$pool" "${POOLS_MONERO[$pool]}" "monero"
|
||||
done
|
||||
for pool in "${!POOLS_WOWNERO[@]}"; do
|
||||
emit_pool_jobs "$pool" "${POOLS_WOWNERO[$pool]}" "wownero"
|
||||
done
|
||||
elif [ -n "$POOL_URL" ]; then
|
||||
pool_name=$(echo "$POOL_URL" | sed 's|.*://||; s|/.*||; s|\..*||')
|
||||
emit_pool_jobs "$pool_name" "$POOL_URL" "${COIN:-unknown}"
|
||||
elif [ -n "$COIN" ]; then
|
||||
case "$COIN" in
|
||||
lethean|lthn)
|
||||
for pool in "${!POOLS_LETHEAN[@]}"; do
|
||||
emit_pool_jobs "$pool" "${POOLS_LETHEAN[$pool]}" "lethean"
|
||||
done
|
||||
;;
|
||||
monero|xmr)
|
||||
for pool in "${!POOLS_MONERO[@]}"; do
|
||||
emit_pool_jobs "$pool" "${POOLS_MONERO[$pool]}" "monero"
|
||||
done
|
||||
;;
|
||||
wownero|wow)
|
||||
for pool in "${!POOLS_WOWNERO[@]}"; do
|
||||
emit_pool_jobs "$pool" "${POOLS_WOWNERO[$pool]}" "wownero"
|
||||
done
|
||||
;;
|
||||
*)
|
||||
echo "# Unknown coin: $COIN" >&2
|
||||
echo "# Use --url= to specify pool URL" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
else
|
||||
echo "Usage: $0 <coin> [--url=URL] [--all]" >&2
|
||||
echo "" >&2
|
||||
echo "Known coins: lethean, monero, wownero" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -11,54 +11,40 @@ When a CryptoNote project dies, its artifacts scatter:
|
|||
- Block explorers shut down
|
||||
- Discord servers empty out
|
||||
|
||||
This skill orchestrates a **full dig** on a dead project — running all collectors in sequence to preserve everything salvageable before it's gone forever.
|
||||
This skill orchestrates a **full dig** using Borg to preserve everything salvageable.
|
||||
|
||||
## Usage
|
||||
## Collection with Borg
|
||||
|
||||
```bash
|
||||
# Full excavation of a project
|
||||
./excavate.sh masari
|
||||
# Clone all repos from a dying project
|
||||
borg collect github repos masari-project --format stim -o masari-github.stim
|
||||
|
||||
# Quick scan (just check what's still accessible)
|
||||
./excavate.sh masari --scan-only
|
||||
# Archive the website via Wayback
|
||||
borg collect website https://web.archive.org/web/*/getmasari.org --depth 3
|
||||
|
||||
# Specific collectors only
|
||||
./excavate.sh masari --only=github,bitcointalk
|
||||
|
||||
# Resume interrupted dig
|
||||
./excavate.sh masari --resume
|
||||
# Package everything into encrypted archive
|
||||
borg compile -f Borgfile -e "archive-password" -o masari-full-dig.stim
|
||||
```
|
||||
|
||||
## What Gets Collected
|
||||
|
||||
| Source | Collector Used | Priority |
|
||||
|--------|----------------|----------|
|
||||
| GitHub repos | `github-history` | P1 - often deleted first |
|
||||
| GitHub releases | `wallet-releases` | P1 - binaries disappear |
|
||||
| BitcoinTalk ANN | `bitcointalk` | P2 - usually persists |
|
||||
| Website (Wayback) | `job-collector wayback` | P2 - snapshots exist |
|
||||
| Block explorer | `block-explorer` | P3 - chain data |
|
||||
| CoinMarketCap | `coinmarketcap` | P3 - historical prices |
|
||||
| Whitepapers | `whitepaper-archive` | P1 - research value |
|
||||
| Reddit | `job-collector reddit` | P4 - community context |
|
||||
| Medium posts | `job-collector medium` | P4 - announcements |
|
||||
| Source | Borg Command | Priority |
|
||||
|--------|--------------|----------|
|
||||
| GitHub repos | `borg collect github repos <org>` | P1 |
|
||||
| GitHub releases | `borg collect github repo <url>` | P1 |
|
||||
| Websites | `borg collect website <url>` | P2 |
|
||||
| Wayback snapshots | `borg collect website web.archive.org/...` | P2 |
|
||||
|
||||
## Output Structure
|
||||
|
||||
```
|
||||
digs/
|
||||
└── <project-name>/
|
||||
├── EXCAVATION.md # Dig log with timestamps
|
||||
├── SALVAGE-REPORT.md # What's worth keeping
|
||||
├── LESSONS.md # What killed it, what we learned
|
||||
├── github/ # All repo history
|
||||
├── releases/ # Wallet binaries, checksums
|
||||
├── bitcointalk/ # Thread archive
|
||||
├── website/ # Wayback snapshots
|
||||
├── explorer/ # Chain data samples
|
||||
├── market/ # Price history, volume
|
||||
├── papers/ # Whitepapers, docs
|
||||
└── community/ # Reddit, Medium, etc
|
||||
├── github.stim # All repo history (encrypted)
|
||||
├── website.stim # Website snapshots (encrypted)
|
||||
└── papers/ # Whitepapers, docs
|
||||
```
|
||||
|
||||
## Report Templates
|
||||
|
|
@ -69,32 +55,15 @@ What code/ideas are worth extracting:
|
|||
- Wallet features
|
||||
- Mining algorithms
|
||||
- Community tools
|
||||
- Documentation patterns
|
||||
|
||||
### LESSONS.md
|
||||
Post-mortem analysis:
|
||||
- Timeline of decline
|
||||
- Root causes (dev burnout, drama, funding, tech debt)
|
||||
- Warning signs to watch for
|
||||
- What could have saved it
|
||||
|
||||
## Integration with cryptonote-discovery
|
||||
|
||||
```bash
|
||||
# Get list of abandoned projects
|
||||
cd ../cryptonote-discovery
|
||||
./discover.sh --list-abandoned
|
||||
|
||||
# Excavate all abandoned projects (batch mode)
|
||||
for proj in $(./discover.sh --list-abandoned); do
|
||||
../project-archaeology/excavate.sh "$proj"
|
||||
done
|
||||
```
|
||||
|
||||
## Known Dig Sites
|
||||
|
||||
Projects confirmed dead/dying that need excavation:
|
||||
|
||||
| Project | Symbol | Death Year | Urgency | Notes |
|
||||
|---------|--------|------------|---------|-------|
|
||||
| TurtleCoin | TRTL | 2023 | HIGH | Team burned out, great docs |
|
||||
|
|
@ -104,22 +73,14 @@ Projects confirmed dead/dying that need excavation:
|
|||
| Sumokoin | SUMO | 2021 | LOW | Drama-killed, large ring research |
|
||||
| Ryo | RYO | 2023 | LOW | GPU algo work |
|
||||
|
||||
## Requirements
|
||||
## Batch Excavation
|
||||
|
||||
- All collector skills installed
|
||||
- `gh` CLI authenticated
|
||||
- `jq` installed
|
||||
- Sufficient disk space for archives
|
||||
- Patience (full dig can take hours)
|
||||
|
||||
## Adding New Dig Sites
|
||||
|
||||
When you discover a dead CryptoNote project:
|
||||
|
||||
1. Add to `../cryptonote-discovery/registry.json`
|
||||
2. Include `"salvageable": [...]` field
|
||||
3. Run `./excavate.sh <project> --scan-only` first
|
||||
4. If sources still accessible, run full dig
|
||||
```bash
|
||||
# Collect multiple dead projects
|
||||
for org in turtlecoin masari-project aeonix nerva-project; do
|
||||
borg collect github repos "$org" --format stim -o "digs/${org}.stim"
|
||||
done
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -1,311 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Project Archaeology - Deep excavation of abandoned CryptoNote projects
|
||||
# Usage: ./excavate.sh <project-name> [options]
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SKILLS_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
REGISTRY="$SKILLS_DIR/cryptonote-discovery/registry.json"
|
||||
OUTPUT_DIR="$SCRIPT_DIR/digs"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Defaults
|
||||
SCAN_ONLY=false
|
||||
RESUME=false
|
||||
ONLY_COLLECTORS=""
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <project-name> [options]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --scan-only Check what's accessible without downloading"
|
||||
echo " --resume Resume interrupted excavation"
|
||||
echo " --only=a,b,c Run specific collectors only"
|
||||
echo " --help Show this help"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 masari # Full excavation"
|
||||
echo " $0 masari --scan-only # Quick accessibility check"
|
||||
echo " $0 masari --only=github,btt # GitHub and BitcoinTalk only"
|
||||
exit 1
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "${BLUE}[$(date '+%H:%M:%S')]${NC} $1"
|
||||
}
|
||||
|
||||
success() {
|
||||
echo -e "${GREEN}[✓]${NC} $1"
|
||||
}
|
||||
|
||||
warn() {
|
||||
echo -e "${YELLOW}[!]${NC} $1"
|
||||
}
|
||||
|
||||
error() {
|
||||
echo -e "${RED}[✗]${NC} $1"
|
||||
}
|
||||
|
||||
# Get project data from registry
|
||||
get_project() {
|
||||
local name="$1"
|
||||
jq -r --arg n "$name" '.projects[] | select(.name | ascii_downcase == ($n | ascii_downcase))' "$REGISTRY"
|
||||
}
|
||||
|
||||
# Check if a collector should run
|
||||
should_run() {
|
||||
local collector="$1"
|
||||
if [ -z "$ONLY_COLLECTORS" ]; then
|
||||
return 0
|
||||
fi
|
||||
echo "$ONLY_COLLECTORS" | grep -q "$collector"
|
||||
}
|
||||
|
||||
# Scan a URL to check if accessible
|
||||
check_url() {
|
||||
local url="$1"
|
||||
local status=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "$url" 2>/dev/null || echo "000")
|
||||
if [ "$status" = "200" ] || [ "$status" = "301" ] || [ "$status" = "302" ]; then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
# Main excavation function
|
||||
excavate() {
|
||||
local project_name="$1"
|
||||
local project=$(get_project "$project_name")
|
||||
|
||||
if [ -z "$project" ] || [ "$project" = "null" ]; then
|
||||
error "Project '$project_name' not found in registry"
|
||||
echo "Add it to: $REGISTRY"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract project data
|
||||
local name=$(echo "$project" | jq -r '.name')
|
||||
local symbol=$(echo "$project" | jq -r '.symbol')
|
||||
local status=$(echo "$project" | jq -r '.status')
|
||||
local github_orgs=$(echo "$project" | jq -r '.github[]?' 2>/dev/null)
|
||||
local btt_topic=$(echo "$project" | jq -r '.bitcointalk // empty')
|
||||
local website=$(echo "$project" | jq -r '.website // empty')
|
||||
local explorer=$(echo "$project" | jq -r '.explorer // empty')
|
||||
local cmc=$(echo "$project" | jq -r '.cmc // empty')
|
||||
|
||||
echo ""
|
||||
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
|
||||
echo -e "${BLUE} PROJECT ARCHAEOLOGY: ${name} (${symbol})${NC}"
|
||||
echo -e "${BLUE} Status: ${status}${NC}"
|
||||
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
|
||||
echo ""
|
||||
|
||||
# Create output directory
|
||||
local dig_dir="$OUTPUT_DIR/$project_name"
|
||||
mkdir -p "$dig_dir"/{github,releases,bitcointalk,website,explorer,market,papers,community}
|
||||
|
||||
# Start excavation log
|
||||
local log_file="$dig_dir/EXCAVATION.md"
|
||||
echo "# Excavation Log: $name ($symbol)" > "$log_file"
|
||||
echo "" >> "$log_file"
|
||||
echo "**Started:** $(date)" >> "$log_file"
|
||||
echo "**Status at dig time:** $status" >> "$log_file"
|
||||
echo "" >> "$log_file"
|
||||
echo "---" >> "$log_file"
|
||||
echo "" >> "$log_file"
|
||||
|
||||
# Phase 1: GitHub (highest priority - often deleted first)
|
||||
if should_run "github"; then
|
||||
echo "## GitHub Repositories" >> "$log_file"
|
||||
echo "" >> "$log_file"
|
||||
|
||||
for org in $github_orgs; do
|
||||
log "Checking GitHub org: $org"
|
||||
|
||||
if $SCAN_ONLY; then
|
||||
if check_url "https://github.com/$org"; then
|
||||
success "GitHub org accessible: $org"
|
||||
echo "- [x] \`$org\` - accessible" >> "$log_file"
|
||||
else
|
||||
warn "GitHub org NOT accessible: $org"
|
||||
echo "- [ ] \`$org\` - NOT accessible" >> "$log_file"
|
||||
fi
|
||||
else
|
||||
log "Running github-history collector on $org..."
|
||||
# Would call: $SKILLS_DIR/github-history/collect.sh "https://github.com/$org" --org
|
||||
echo "- Collected: \`$org\`" >> "$log_file"
|
||||
fi
|
||||
done
|
||||
echo "" >> "$log_file"
|
||||
fi
|
||||
|
||||
# Phase 2: BitcoinTalk
|
||||
if should_run "btt" || should_run "bitcointalk"; then
|
||||
echo "## BitcoinTalk Thread" >> "$log_file"
|
||||
echo "" >> "$log_file"
|
||||
|
||||
if [ -n "$btt_topic" ]; then
|
||||
local btt_url="https://bitcointalk.org/index.php?topic=$btt_topic"
|
||||
log "Checking BitcoinTalk topic: $btt_topic"
|
||||
|
||||
if $SCAN_ONLY; then
|
||||
if check_url "$btt_url"; then
|
||||
success "BitcoinTalk thread accessible"
|
||||
echo "- [x] Topic $btt_topic - accessible" >> "$log_file"
|
||||
else
|
||||
warn "BitcoinTalk thread NOT accessible"
|
||||
echo "- [ ] Topic $btt_topic - NOT accessible" >> "$log_file"
|
||||
fi
|
||||
else
|
||||
log "Running bitcointalk collector..."
|
||||
# Would call: $SKILLS_DIR/bitcointalk/collect.sh "$btt_topic"
|
||||
echo "- Collected: Topic $btt_topic" >> "$log_file"
|
||||
fi
|
||||
else
|
||||
warn "No BitcoinTalk topic ID in registry"
|
||||
echo "- [ ] No topic ID recorded" >> "$log_file"
|
||||
fi
|
||||
echo "" >> "$log_file"
|
||||
fi
|
||||
|
||||
# Phase 3: Website via Wayback
|
||||
if should_run "wayback" || should_run "website"; then
|
||||
echo "## Website (Wayback Machine)" >> "$log_file"
|
||||
echo "" >> "$log_file"
|
||||
|
||||
if [ -n "$website" ]; then
|
||||
log "Checking Wayback Machine for: $website"
|
||||
local wayback_api="https://archive.org/wayback/available?url=$website"
|
||||
|
||||
if $SCAN_ONLY; then
|
||||
local wayback_check=$(curl -s "$wayback_api" | jq -r '.archived_snapshots.closest.available // "false"')
|
||||
if [ "$wayback_check" = "true" ]; then
|
||||
success "Wayback snapshots available for $website"
|
||||
echo "- [x] \`$website\` - snapshots available" >> "$log_file"
|
||||
else
|
||||
warn "No Wayback snapshots for $website"
|
||||
echo "- [ ] \`$website\` - no snapshots" >> "$log_file"
|
||||
fi
|
||||
else
|
||||
log "Running wayback collector..."
|
||||
# Would call: $SKILLS_DIR/job-collector/generate-jobs.sh wayback "$website"
|
||||
echo "- Collected: \`$website\`" >> "$log_file"
|
||||
fi
|
||||
else
|
||||
warn "No website in registry"
|
||||
echo "- [ ] No website recorded" >> "$log_file"
|
||||
fi
|
||||
echo "" >> "$log_file"
|
||||
fi
|
||||
|
||||
# Phase 4: Block Explorer
|
||||
if should_run "explorer"; then
|
||||
echo "## Block Explorer" >> "$log_file"
|
||||
echo "" >> "$log_file"
|
||||
|
||||
if [ -n "$explorer" ]; then
|
||||
log "Checking block explorer: $explorer"
|
||||
|
||||
if $SCAN_ONLY; then
|
||||
if check_url "https://$explorer"; then
|
||||
success "Block explorer online: $explorer"
|
||||
echo "- [x] \`$explorer\` - online" >> "$log_file"
|
||||
else
|
||||
warn "Block explorer OFFLINE: $explorer"
|
||||
echo "- [ ] \`$explorer\` - OFFLINE" >> "$log_file"
|
||||
fi
|
||||
else
|
||||
log "Running block-explorer collector..."
|
||||
echo "- Collected: \`$explorer\`" >> "$log_file"
|
||||
fi
|
||||
else
|
||||
warn "No explorer in registry"
|
||||
echo "- [ ] No explorer recorded" >> "$log_file"
|
||||
fi
|
||||
echo "" >> "$log_file"
|
||||
fi
|
||||
|
||||
# Phase 5: Market Data (CMC)
|
||||
if should_run "cmc" || should_run "market"; then
|
||||
echo "## Market Data" >> "$log_file"
|
||||
echo "" >> "$log_file"
|
||||
|
||||
if [ -n "$cmc" ]; then
|
||||
log "Checking CoinMarketCap: $cmc"
|
||||
|
||||
if $SCAN_ONLY; then
|
||||
if check_url "https://coinmarketcap.com/currencies/$cmc/"; then
|
||||
success "CMC page exists: $cmc"
|
||||
echo "- [x] CMC: \`$cmc\` - exists" >> "$log_file"
|
||||
else
|
||||
warn "CMC page NOT found: $cmc"
|
||||
echo "- [ ] CMC: \`$cmc\` - not found" >> "$log_file"
|
||||
fi
|
||||
else
|
||||
log "Running coinmarketcap collector..."
|
||||
echo "- Collected: \`$cmc\`" >> "$log_file"
|
||||
fi
|
||||
else
|
||||
warn "No CMC slug in registry"
|
||||
echo "- [ ] No CMC slug recorded" >> "$log_file"
|
||||
fi
|
||||
echo "" >> "$log_file"
|
||||
fi
|
||||
|
||||
# Finalize log
|
||||
echo "---" >> "$log_file"
|
||||
echo "" >> "$log_file"
|
||||
echo "**Completed:** $(date)" >> "$log_file"
|
||||
|
||||
if $SCAN_ONLY; then
|
||||
echo ""
|
||||
success "Scan complete. See: $log_file"
|
||||
else
|
||||
echo ""
|
||||
success "Excavation complete. Output in: $dig_dir"
|
||||
echo ""
|
||||
log "Next steps:"
|
||||
echo " 1. Review: $log_file"
|
||||
echo " 2. Generate: $dig_dir/SALVAGE-REPORT.md"
|
||||
echo " 3. Write: $dig_dir/LESSONS.md"
|
||||
fi
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
if [ $# -lt 1 ]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
PROJECT="$1"
|
||||
shift
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--scan-only)
|
||||
SCAN_ONLY=true
|
||||
;;
|
||||
--resume)
|
||||
RESUME=true
|
||||
;;
|
||||
--only=*)
|
||||
ONLY_COLLECTORS="${1#*=}"
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
error "Unknown option: $1"
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Run excavation
|
||||
excavate "$PROJECT"
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Hook: update-index.sh
|
||||
# Called after collection completes to update indexes
|
||||
|
||||
WHITEPAPERS_DIR="${1:-./whitepapers}"
|
||||
|
||||
echo "[update-index] Updating whitepaper index..."
|
||||
|
||||
# Count papers in each category
|
||||
for category in cryptonote lethean research uncategorized; do
|
||||
dir="$WHITEPAPERS_DIR/$category"
|
||||
if [ -d "$dir" ]; then
|
||||
count=$(find "$dir" -name "*.pdf" 2>/dev/null | wc -l | tr -d ' ')
|
||||
echo " $category: $count papers"
|
||||
fi
|
||||
done
|
||||
|
||||
# Update INDEX.md with collected papers
|
||||
INDEX="$WHITEPAPERS_DIR/INDEX.md"
|
||||
if [ -f "$INDEX" ]; then
|
||||
# Add collected papers section if not exists
|
||||
if ! grep -q "## Recently Collected" "$INDEX"; then
|
||||
echo "" >> "$INDEX"
|
||||
echo "## Recently Collected" >> "$INDEX"
|
||||
echo "" >> "$INDEX"
|
||||
echo "_Last updated: $(date +%Y-%m-%d)_" >> "$INDEX"
|
||||
echo "" >> "$INDEX"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Process pending jobs
|
||||
PENDING="$WHITEPAPERS_DIR/.pending-jobs.txt"
|
||||
if [ -f "$PENDING" ]; then
|
||||
count=$(wc -l < "$PENDING" | tr -d ' ')
|
||||
echo "[update-index] $count papers queued for collection"
|
||||
fi
|
||||
|
||||
echo "[update-index] Done"
|
||||
Loading…
Add table
Reference in a new issue