From 3c3d3de1a159e3480c2637e7dfc23625db61b2b5 Mon Sep 17 00:00:00 2001 From: Snider Date: Mon, 2 Feb 2026 00:04:24 +0000 Subject: [PATCH] refactor(collect): replace bash scripts with Borg CLI Remove custom collection scripts in favour of Borg (github.com/Snider/Borg) for data collection. Skills now document what to collect, with Borg handling the actual collection. Removed: - collect-whitepaper.sh, dispatch.sh, update-index.sh - All skill-specific bash scripts (collect.sh, generate-jobs.sh, etc.) - hooks.json and HOOKS.md Updated: - plugin.json to reference Borg dependency - SKILL.md files with Borg command examples Co-Authored-By: Claude Opus 4.5 --- claude/collect/.claude-plugin/plugin.json | 8 +- claude/collect/HOOKS.md | 90 --- claude/collect/collect-whitepaper.sh | 59 -- claude/collect/dispatch.sh | 80 --- claude/collect/hooks.json | 45 -- claude/collect/skills/bitcointalk/collect.sh | 269 --------- .../skills/block-explorer/generate-jobs.sh | 106 ---- .../skills/coinmarketcap/generate-jobs.sh | 89 --- .../collect/skills/coinmarketcap/process.sh | 226 -------- .../skills/cryptonote-discovery/discover.sh | 124 ----- claude/collect/skills/github-history/SKILL.md | 173 ++---- .../collect/skills/github-history/collect.sh | 516 ------------------ .../skills/job-collector/generate-jobs.sh | 107 ---- .../collect/skills/job-collector/process.sh | 242 -------- claude/collect/skills/ledger-papers/SKILL.md | 61 +-- .../collect/skills/ledger-papers/discover.sh | 132 ----- .../skills/mining-pools/generate-jobs.sh | 105 ---- .../skills/project-archaeology/SKILL.md | 87 +-- .../skills/project-archaeology/excavate.sh | 311 ----------- claude/collect/update-index.sh | 38 -- 20 files changed, 108 insertions(+), 2760 deletions(-) delete mode 100644 claude/collect/HOOKS.md delete mode 100755 claude/collect/collect-whitepaper.sh delete mode 100755 claude/collect/dispatch.sh delete mode 100644 claude/collect/hooks.json delete mode 100644 claude/collect/skills/bitcointalk/collect.sh delete mode 100644 claude/collect/skills/block-explorer/generate-jobs.sh delete mode 100644 claude/collect/skills/coinmarketcap/generate-jobs.sh delete mode 100644 claude/collect/skills/coinmarketcap/process.sh delete mode 100644 claude/collect/skills/cryptonote-discovery/discover.sh delete mode 100755 claude/collect/skills/github-history/collect.sh delete mode 100644 claude/collect/skills/job-collector/generate-jobs.sh delete mode 100644 claude/collect/skills/job-collector/process.sh delete mode 100755 claude/collect/skills/ledger-papers/discover.sh delete mode 100644 claude/collect/skills/mining-pools/generate-jobs.sh delete mode 100755 claude/collect/skills/project-archaeology/excavate.sh delete mode 100755 claude/collect/update-index.sh diff --git a/claude/collect/.claude-plugin/plugin.json b/claude/collect/.claude-plugin/plugin.json index cb91398..8ce8086 100644 --- a/claude/collect/.claude-plugin/plugin.json +++ b/claude/collect/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "collect", - "description": "Data collection skills for cryptocurrency research - whitepapers, forum archives, project archaeology, and blockchain history", + "description": "Data collection skills using Borg CLI - whitepapers, forum archives, project archaeology, and blockchain history", "version": "0.1.0", "author": { "name": "Host UK", @@ -14,10 +14,14 @@ "license": "EUPL-1.2", "keywords": [ "data-collection", + "borg", "cryptocurrency", "archive", "whitepapers", "blockchain", "research" - ] + ], + "dependencies": { + "borg": "github.com/Snider/Borg@v0.1.0" + } } diff --git a/claude/collect/HOOKS.md b/claude/collect/HOOKS.md deleted file mode 100644 index 840b72b..0000000 --- a/claude/collect/HOOKS.md +++ /dev/null @@ -1,90 +0,0 @@ -# Collection Hooks - -Event-driven hooks that trigger during data collection. - -## Available Hooks - -| Hook | Trigger | Purpose | -|------|---------|---------| -| `collect-whitepaper.sh` | PDF/paper URL detected | Auto-queue whitepapers | -| `on-github-release.sh` | Release found | Archive release metadata | -| `on-explorer-block.sh` | Block data fetched | Index blockchain data | - -## Hook Events - -### `on_url_found` -Fired when a new URL is discovered during collection. - -```bash -# Pattern matching -*.pdf → collect-whitepaper.sh -*/releases/* → on-github-release.sh -*/api/block/* → on-explorer-block.sh -``` - -### `on_file_collected` -Fired after a file is successfully downloaded. - -```bash -# Post-processing -*.json → validate-json.sh -*.html → extract-links.sh -*.pdf → extract-metadata.sh -``` - -### `on_collection_complete` -Fired when a job batch finishes. - -```bash -# Reporting -→ generate-index.sh -→ update-registry.sh -``` - -## Plugin Integration - -For the marketplace plugin system: - -```json -{ - "name": "whitepaper-collector", - "version": "1.0.0", - "hooks": { - "on_url_found": { - "pattern": "*.pdf", - "handler": "./collect-whitepaper.sh" - } - } -} -``` - -## Registration - -Hooks register in `hooks.json`: - -```json -{ - "on_url_found": [ - { - "pattern": "\\.pdf$", - "handler": "./hooks/collect-whitepaper.sh", - "priority": 10 - } - ] -} -``` - -## Usage in Collectors - -Collectors call hooks via: - -```bash -# In job-collector/process.sh -source ./hooks/dispatch.sh - -# When URL found -dispatch_hook "on_url_found" "$URL" - -# When file collected -dispatch_hook "on_file_collected" "$FILE" "$TYPE" -``` diff --git a/claude/collect/collect-whitepaper.sh b/claude/collect/collect-whitepaper.sh deleted file mode 100755 index 495a72a..0000000 --- a/claude/collect/collect-whitepaper.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Hook: collect-whitepaper.sh -# Called when a whitepaper URL is detected during collection -# Usage: ./collect-whitepaper.sh [destination-folder] - -set -e - -URL="$1" -DEST="${2:-./whitepapers}" - -if [ -z "$URL" ]; then - echo "Usage: $0 [destination]" >&2 - exit 1 -fi - -# Detect paper type from URL -detect_category() { - local url="$1" - case "$url" in - *cryptonote*) echo "cryptonote" ;; - *iacr.org*|*eprint*) echo "research" ;; - *arxiv.org*) echo "research" ;; - *monero*|*getmonero*) echo "research" ;; - *lethean*|*lthn*) echo "lethean" ;; - *) echo "uncategorized" ;; - esac -} - -# Generate safe filename from URL -safe_filename() { - local url="$1" - basename "$url" | sed 's/[^a-zA-Z0-9._-]/-/g' -} - -CATEGORY=$(detect_category "$URL") -FILENAME=$(safe_filename "$URL") -TARGET_DIR="$DEST/$CATEGORY" -TARGET_FILE="$TARGET_DIR/$FILENAME" - -mkdir -p "$TARGET_DIR" - -# Check if already collected -if [ -f "$TARGET_FILE" ]; then - echo "Already collected: $TARGET_FILE" - exit 0 -fi - -echo "Collecting whitepaper:" -echo " URL: $URL" -echo " Category: $CATEGORY" -echo " Destination: $TARGET_FILE" - -# Create job entry for proxy collection -echo "$URL|$FILENAME|whitepaper|category=$CATEGORY" >> "$DEST/.pending-jobs.txt" - -echo "Job queued: $DEST/.pending-jobs.txt" -echo "" -echo "To collect immediately (if you have direct access):" -echo " curl -L -o '$TARGET_FILE' '$URL'" diff --git a/claude/collect/dispatch.sh b/claude/collect/dispatch.sh deleted file mode 100755 index 66cfb3f..0000000 --- a/claude/collect/dispatch.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env bash -# Hook dispatcher - source this in collectors -# Usage: source ./hooks/dispatch.sh - -HOOKS_DIR="$(dirname "${BASH_SOURCE[0]}")" -HOOKS_JSON="$HOOKS_DIR/hooks.json" - -# Dispatch a hook event -# dispatch_hook [arg2] ... -dispatch_hook() { - local event="$1" - shift - local args=("$@") - - if [ ! -f "$HOOKS_JSON" ]; then - return 0 - fi - - # Get handlers for this event (requires jq) - if ! command -v jq &> /dev/null; then - echo "Warning: jq not installed, hooks disabled" >&2 - return 0 - fi - - local handlers - handlers=$(jq -r ".hooks[\"$event\"][]? | select(.enabled == true) | @json" "$HOOKS_JSON" 2>/dev/null) - - if [ -z "$handlers" ]; then - return 0 - fi - - echo "$handlers" | while read -r handler_json; do - local name pattern handler_script priority - name=$(echo "$handler_json" | jq -r '.name') - pattern=$(echo "$handler_json" | jq -r '.pattern // ""') - handler_script=$(echo "$handler_json" | jq -r '.handler') - - # Check pattern match if pattern exists - if [ -n "$pattern" ] && [ -n "${args[0]}" ]; then - if ! echo "${args[0]}" | grep -qE "$pattern"; then - continue - fi - fi - - # Execute handler - local full_path="$HOOKS_DIR/$handler_script" - if [ -x "$full_path" ]; then - echo "[hook] $name: ${args[*]}" >&2 - "$full_path" "${args[@]}" - elif [ -f "$full_path" ]; then - echo "[hook] $name: ${args[*]}" >&2 - bash "$full_path" "${args[@]}" - fi - done -} - -# Register a new hook dynamically -# register_hook -register_hook() { - local event="$1" - local name="$2" - local pattern="$3" - local handler="$4" - - if ! command -v jq &> /dev/null; then - echo "Error: jq required for hook registration" >&2 - return 1 - fi - - local new_hook - new_hook=$(jq -n \ - --arg name "$name" \ - --arg pattern "$pattern" \ - --arg handler "$handler" \ - '{name: $name, pattern: $pattern, handler: $handler, priority: 50, enabled: true}') - - # Add to hooks.json - jq ".hooks[\"$event\"] += [$new_hook]" "$HOOKS_JSON" > "$HOOKS_JSON.tmp" \ - && mv "$HOOKS_JSON.tmp" "$HOOKS_JSON" -} diff --git a/claude/collect/hooks.json b/claude/collect/hooks.json deleted file mode 100644 index b2e0205..0000000 --- a/claude/collect/hooks.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "version": "1.0.0", - "hooks": { - "on_url_found": [ - { - "name": "whitepaper-collector", - "pattern": "\\.pdf$", - "handler": "./collect-whitepaper.sh", - "priority": 10, - "enabled": true - }, - { - "name": "whitepaper-iacr", - "pattern": "eprint\\.iacr\\.org", - "handler": "./collect-whitepaper.sh", - "priority": 10, - "enabled": true - }, - { - "name": "whitepaper-arxiv", - "pattern": "arxiv\\.org", - "handler": "./collect-whitepaper.sh", - "priority": 10, - "enabled": true - } - ], - "on_file_collected": [ - { - "name": "pdf-metadata", - "pattern": "\\.pdf$", - "handler": "./extract-pdf-metadata.sh", - "priority": 5, - "enabled": false - } - ], - "on_collection_complete": [ - { - "name": "update-index", - "handler": "./update-index.sh", - "priority": 100, - "enabled": true - } - ] - } -} diff --git a/claude/collect/skills/bitcointalk/collect.sh b/claude/collect/skills/bitcointalk/collect.sh deleted file mode 100644 index ea01052..0000000 --- a/claude/collect/skills/bitcointalk/collect.sh +++ /dev/null @@ -1,269 +0,0 @@ -#!/usr/bin/env bash -# BitcoinTalk Thread Collector -# Usage: ./collect.sh [--pages=N] [--output=DIR] - -set -e - -DELAY=2 # Be respectful to BTT servers -MAX_PAGES=0 # 0 = all pages -OUTPUT_BASE="." - -# Parse topic ID from URL or direct input -parse_topic_id() { - local input="$1" - if [[ "$input" =~ topic=([0-9]+) ]]; then - echo "${BASH_REMATCH[1]}" - else - echo "$input" | grep -oE '[0-9]+' - fi -} - -# Fetch a single page -fetch_page() { - local topic_id="$1" - local offset="$2" - local output_file="$3" - - local url="https://bitcointalk.org/index.php?topic=${topic_id}.${offset}" - echo " Fetching: $url" - - curl -s -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \ - -H "Accept: text/html" \ - "$url" > "$output_file" - - sleep $DELAY -} - -# Check if page has posts -page_has_posts() { - local html_file="$1" - grep -q 'class="post"' "$html_file" 2>/dev/null -} - -# Get last page number from first page -get_last_page() { - local html_file="$1" - # Look for navigation like "Pages: [1] 2 3 ... 50" - local max_page=$(grep -oE 'topic=[0-9]+\.[0-9]+' "$html_file" | \ - sed 's/.*\.//' | sort -rn | head -1) - echo "${max_page:-0}" -} - -# Extract posts from HTML (simplified - works for basic extraction) -extract_posts_simple() { - local html_file="$1" - local output_dir="$2" - local post_offset="$3" - - # Use Python for reliable HTML parsing - python3 << PYEOF -import re -import html -import os -from datetime import datetime - -html_content = open('$html_file', 'r', encoding='utf-8', errors='ignore').read() - -# Pattern to find posts - BTT structure -post_pattern = r'(.*?)\s*\s*\s*\s*' -author_pattern = r']*>([^<]+)' -date_pattern = r'
([A-Za-z]+ \d+, \d+, \d+:\d+:\d+ [AP]M)
' -post_content_pattern = r'
]*>(.*?)
\s*(?:
)' - -posts = re.findall(post_pattern, html_content, re.DOTALL) -post_num = $post_offset - -for post_html in posts: - post_num += 1 - - # Extract author - author_match = re.search(author_pattern, post_html) - author = author_match.group(1) if author_match else "Unknown" - - # Extract date - date_match = re.search(date_pattern, post_html) - date_str = date_match.group(1) if date_match else "Unknown date" - - # Extract content - content_match = re.search(post_content_pattern, post_html, re.DOTALL) - if content_match: - content = content_match.group(1) - # Clean HTML - content = re.sub(r'', '\n', content) - content = re.sub(r'<[^>]+>', '', content) - content = html.unescape(content) - content = content.strip() - else: - content = "(Could not extract content)" - - # Determine post type/score - score = "COMMUNITY" - if post_num == 1: - score = "ANN" - elif re.search(r'\[UPDATE\]|\[RELEASE\]|\[ANNOUNCEMENT\]', content, re.I): - score = "UPDATE" - elif '?' in content[:200]: - score = "QUESTION" - - # Write post file - filename = f"$output_dir/POST-{post_num:04d}.md" - with open(filename, 'w') as f: - f.write(f"# Post #{post_num}\n\n") - f.write(f"## Metadata\n\n") - f.write(f"| Field | Value |\n") - f.write(f"|-------|-------|\n") - f.write(f"| Author | {author} |\n") - f.write(f"| Date | {date_str} |\n") - f.write(f"| Type | **{score}** |\n\n") - f.write(f"---\n\n") - f.write(f"## Content\n\n") - f.write(content) - f.write(f"\n") - - print(f" Created POST-{post_num:04d}.md ({score}) by {author}") - -print(f"EXTRACTED:{post_num}") -PYEOF -} - -# Main collection function -collect_thread() { - local topic_id="$1" - local output_dir="$OUTPUT_BASE/bitcointalk-$topic_id" - - mkdir -p "$output_dir/pages" "$output_dir/posts" - - echo "=== Collecting BitcoinTalk Topic: $topic_id ===" - - # Fetch first page to get thread info - fetch_page "$topic_id" 0 "$output_dir/pages/page-0.html" - - # Extract thread title - local title=$(grep -oP '\K[^<]+' "$output_dir/pages/page-0.html" | head -1) - echo "Thread: $title" - - # Get total pages - local last_offset=$(get_last_page "$output_dir/pages/page-0.html") - local total_pages=$(( (last_offset / 20) + 1 )) - echo "Total pages: $total_pages" - - if [ "$MAX_PAGES" -gt 0 ] && [ "$MAX_PAGES" -lt "$total_pages" ]; then - total_pages=$MAX_PAGES - echo "Limiting to: $total_pages pages" - fi - - # Extract posts from first page - local post_count=0 - local result=$(extract_posts_simple "$output_dir/pages/page-0.html" "$output_dir/posts" 0) - post_count=$(echo "$result" | grep "EXTRACTED:" | cut -d: -f2) - - # Fetch remaining pages - for (( page=1; page<total_pages; page++ )); do - local offset=$((page * 20)) - fetch_page "$topic_id" "$offset" "$output_dir/pages/page-$offset.html" - - if ! page_has_posts "$output_dir/pages/page-$offset.html"; then - echo " No more posts found, stopping." - break - fi - - result=$(extract_posts_simple "$output_dir/pages/page-$offset.html" "$output_dir/posts" "$post_count") - post_count=$(echo "$result" | grep "EXTRACTED:" | cut -d: -f2) - done - - # Generate index - generate_index "$output_dir" "$title" "$topic_id" "$post_count" - - echo "" - echo "=== Collection Complete ===" - echo "Posts: $post_count" - echo "Output: $output_dir/" -} - -# Generate index file -generate_index() { - local output_dir="$1" - local title="$2" - local topic_id="$3" - local post_count="$4" - - cat > "$output_dir/INDEX.md" << EOF -# BitcoinTalk Thread Archive - -## Thread Info - -| Field | Value | -|-------|-------| -| Title | $title | -| Topic ID | $topic_id | -| URL | https://bitcointalk.org/index.php?topic=$topic_id.0 | -| Posts Archived | $post_count | -| Collected | $(date +%Y-%m-%d) | - ---- - -## Post Type Legend - -| Type | Meaning | -|------|---------| -| ANN | Original announcement | -| UPDATE | Official team update | -| QUESTION | Community question | -| ANSWER | Team response | -| COMMUNITY | General discussion | -| CONCERN | Raised issue/criticism | - ---- - -## Posts - -| # | Author | Date | Type | -|---|--------|------|------| -EOF - - for file in "$output_dir/posts/"POST-*.md; do - [ -f "$file" ] || continue - local num=$(basename "$file" .md | sed 's/POST-0*//') - local author=$(grep "| Author |" "$file" | sed 's/.*| Author | \(.*\) |/\1/') - local date=$(grep "| Date |" "$file" | sed 's/.*| Date | \(.*\) |/\1/') - local type=$(sed -n '/| Type |/s/.*\*\*\([A-Z]*\)\*\*.*/\1/p' "$file") - echo "| [$num](posts/POST-$(printf "%04d" $num).md) | $author | $date | $type |" >> "$output_dir/INDEX.md" - done - - echo " Created INDEX.md" -} - -# Parse arguments -main() { - local topic_input="" - - for arg in "$@"; do - case "$arg" in - --pages=*) MAX_PAGES="${arg#*=}" ;; - --output=*) OUTPUT_BASE="${arg#*=}" ;; - --delay=*) DELAY="${arg#*=}" ;; - *) topic_input="$arg" ;; - esac - done - - if [ -z "$topic_input" ]; then - echo "Usage: $0 <topic-id-or-url> [--pages=N] [--output=DIR] [--delay=2]" - echo "" - echo "Examples:" - echo " $0 2769739" - echo " $0 https://bitcointalk.org/index.php?topic=2769739.0" - echo " $0 2769739 --pages=5 --output=./lethean-ann" - exit 1 - fi - - local topic_id=$(parse_topic_id "$topic_input") - - if [ -z "$topic_id" ]; then - echo "Error: Could not parse topic ID from: $topic_input" - exit 1 - fi - - collect_thread "$topic_id" -} - -main "$@" diff --git a/claude/collect/skills/block-explorer/generate-jobs.sh b/claude/collect/skills/block-explorer/generate-jobs.sh deleted file mode 100644 index 2bd8b22..0000000 --- a/claude/collect/skills/block-explorer/generate-jobs.sh +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env bash -# Generate block explorer collection jobs -# Usage: ./generate-jobs.sh <coin> [--blocks=N] [--sample=daily|weekly|monthly] - -set -e - -COIN="" -EXPLORER_URL="" -SAMPLE="monthly" -BLOCK_COUNT=100 - -# Known explorers -declare -A EXPLORERS=( - ["lethean"]="https://explorer.lethean.io" - ["monero"]="https://xmrchain.net" - ["haven"]="https://explorer.havenprotocol.org" - ["karbo"]="https://explorer.karbo.io" - ["wownero"]="https://explore.wownero.com" - ["dero"]="https://explorer.dero.io" - ["masari"]="https://explorer.getmasari.org" - ["turtlecoin"]="https://explorer.turtlecoin.lol" - ["conceal"]="https://explorer.conceal.network" -) - -# Parse args -for arg in "$@"; do - case "$arg" in - --url=*) EXPLORER_URL="${arg#*=}" ;; - --blocks=*) BLOCK_COUNT="${arg#*=}" ;; - --sample=*) SAMPLE="${arg#*=}" ;; - --*) ;; - *) COIN="$arg" ;; - esac -done - -if [ -z "$COIN" ] && [ -z "$EXPLORER_URL" ]; then - echo "Usage: $0 <coin> [--url=URL] [--blocks=N] [--sample=daily|weekly|monthly]" >&2 - echo "" >&2 - echo "Known coins: ${!EXPLORERS[*]}" >&2 - exit 1 -fi - -# Get explorer URL -if [ -z "$EXPLORER_URL" ]; then - EXPLORER_URL="${EXPLORERS[$COIN]}" - if [ -z "$EXPLORER_URL" ]; then - echo "# ERROR: Unknown coin '$COIN'. Use --url= to specify explorer." >&2 - exit 1 - fi -fi - -SLUG=$(echo "$COIN" | tr '[:upper:]' '[:lower:]') - -echo "# Block Explorer Jobs for $COIN" -echo "# Explorer: $EXPLORER_URL" -echo "# Sample: $SAMPLE" -echo "# Format: URL|FILENAME|TYPE|METADATA" -echo "#" - -# Core API endpoints -echo "# === Core Data ===" -echo "${EXPLORER_URL}/api/info|explorer-${SLUG}-info.json|explorer-api|coin=$SLUG,type=info" -echo "${EXPLORER_URL}/api/emission|explorer-${SLUG}-emission.json|explorer-api|coin=$SLUG,type=emission" -echo "${EXPLORER_URL}/api/supply|explorer-${SLUG}-supply.json|explorer-api|coin=$SLUG,type=supply" -echo "${EXPLORER_URL}/api/mempool|explorer-${SLUG}-mempool.json|explorer-api|coin=$SLUG,type=mempool" - -# Genesis block -echo "#" -echo "# === Genesis Block ===" -echo "${EXPLORER_URL}/api/block/0|explorer-${SLUG}-block-0.json|explorer-api|coin=$SLUG,block=0" -echo "${EXPLORER_URL}/api/block/1|explorer-${SLUG}-block-1.json|explorer-api|coin=$SLUG,block=1" - -# Milestone blocks (if we know the heights) -echo "#" -echo "# === Milestone Blocks ===" -for height in 10000 50000 100000 500000 1000000 2000000; do - echo "${EXPLORER_URL}/api/block/${height}|explorer-${SLUG}-block-${height}.json|explorer-api|coin=$SLUG,block=$height" -done - -# Sample blocks by time -echo "#" -echo "# === Sampled Blocks (estimate heights) ===" -case "$SAMPLE" in - daily) - # ~720 blocks/day for 2-min blocks - STEP=720 - ;; - weekly) - STEP=5040 - ;; - monthly) - STEP=21600 - ;; -esac - -for ((i=0; i<BLOCK_COUNT; i++)); do - height=$((i * STEP)) - echo "${EXPLORER_URL}/api/block/${height}|explorer-${SLUG}-sample-${height}.json|explorer-api|coin=$SLUG,block=$height,sample=$SAMPLE" -done - -# Web pages (for scraping if API fails) -echo "#" -echo "# === Web Pages (backup) ===" -echo "${EXPLORER_URL}/|explorer-${SLUG}-home.html|explorer-web|coin=$SLUG" -echo "${EXPLORER_URL}/blocks|explorer-${SLUG}-blocks.html|explorer-web|coin=$SLUG" -echo "${EXPLORER_URL}/stats|explorer-${SLUG}-stats.html|explorer-web|coin=$SLUG" diff --git a/claude/collect/skills/coinmarketcap/generate-jobs.sh b/claude/collect/skills/coinmarketcap/generate-jobs.sh deleted file mode 100644 index d72de60..0000000 --- a/claude/collect/skills/coinmarketcap/generate-jobs.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env bash -# Generate job list for CoinMarketCap collection -# Usage: ./generate-jobs.sh <coin-slug> [options] > jobs.txt - -set -e - -COINS=() -HISTORICAL=0 -FROM_DATE="2017-01-01" -TO_DATE=$(date +%Y-%m-%d) - -# Parse args -for arg in "$@"; do - case "$arg" in - --historical) HISTORICAL=1 ;; - --from=*) FROM_DATE="${arg#*=}" ;; - --to=*) TO_DATE="${arg#*=}" ;; - --*) ;; - *) COINS+=("$arg") ;; - esac -done - -if [ ${#COINS[@]} -eq 0 ]; then - echo "Usage: $0 <coin-slug> [coin-slug...] [--historical] [--from=DATE] [--to=DATE]" >&2 - echo "" >&2 - echo "Examples:" >&2 - echo " $0 lethean" >&2 - echo " $0 lethean --historical --from=2018-01-01" >&2 - echo " $0 lethean monero bitcoin" >&2 - exit 1 -fi - -# Header -echo "# CoinMarketCap job list - $(date +%Y-%m-%d)" -echo "# Coins: ${COINS[*]}" -echo "# Format: URL|FILENAME|TYPE|METADATA" -echo "#" - -for COIN in "${COINS[@]}"; do - SLUG=$(echo "$COIN" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g') - - echo "# === $SLUG ===" - - # Main page (current data, description, links) - echo "https://coinmarketcap.com/currencies/${SLUG}/|cmc-${SLUG}-main.html|cmc-main|coin=$SLUG" - - # Markets/exchanges - echo "https://coinmarketcap.com/currencies/${SLUG}/markets/|cmc-${SLUG}-markets.html|cmc-markets|coin=$SLUG" - - # Historical data page - echo "https://coinmarketcap.com/currencies/${SLUG}/historical-data/|cmc-${SLUG}-historical.html|cmc-historical|coin=$SLUG" - - # News - echo "https://coinmarketcap.com/currencies/${SLUG}/news/|cmc-${SLUG}-news.html|cmc-news|coin=$SLUG" - - # API endpoints (if accessible without auth) - # These return JSON and are more reliable than scraping - echo "https://api.coinmarketcap.com/data-api/v3/cryptocurrency/detail?slug=${SLUG}|cmc-${SLUG}-api-detail.json|cmc-api|coin=$SLUG,type=detail" - echo "https://api.coinmarketcap.com/data-api/v3/cryptocurrency/market-pairs/latest?slug=${SLUG}&limit=100|cmc-${SLUG}-api-markets.json|cmc-api|coin=$SLUG,type=markets" - - # Historical data via API (may need date chunks) - if [ "$HISTORICAL" = "1" ]; then - echo "#" - echo "# Historical data: $FROM_DATE to $TO_DATE" - - # Convert dates to timestamps - FROM_TS=$(date -j -f "%Y-%m-%d" "$FROM_DATE" "+%s" 2>/dev/null || date -d "$FROM_DATE" "+%s") - TO_TS=$(date -j -f "%Y-%m-%d" "$TO_DATE" "+%s" 2>/dev/null || date -d "$TO_DATE" "+%s") - - # CMC historical API (public, limited) - echo "https://api.coinmarketcap.com/data-api/v3/cryptocurrency/historical?slug=${SLUG}&timeStart=${FROM_TS}&timeEnd=${TO_TS}|cmc-${SLUG}-api-historical.json|cmc-api|coin=$SLUG,type=historical" - - # Also try the web scrape version with date range - echo "https://coinmarketcap.com/currencies/${SLUG}/historical-data/?start=${FROM_DATE//\-/}&end=${TO_DATE//\-/}|cmc-${SLUG}-historical-range.html|cmc-historical|coin=$SLUG,from=$FROM_DATE,to=$TO_DATE" - fi - - echo "#" -done - -echo "# === Additional data sources ===" -echo "#" - -# CoinGecko as backup (often has more historical data) -for COIN in "${COINS[@]}"; do - SLUG=$(echo "$COIN" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g') - echo "https://www.coingecko.com/en/coins/${SLUG}|coingecko-${SLUG}-main.html|coingecko|coin=$SLUG" - echo "https://api.coingecko.com/api/v3/coins/${SLUG}|coingecko-${SLUG}-api.json|coingecko-api|coin=$SLUG" - echo "https://api.coingecko.com/api/v3/coins/${SLUG}/market_chart?vs_currency=usd&days=max|coingecko-${SLUG}-history.json|coingecko-api|coin=$SLUG,type=history" -done diff --git a/claude/collect/skills/coinmarketcap/process.sh b/claude/collect/skills/coinmarketcap/process.sh deleted file mode 100644 index 11c7d56..0000000 --- a/claude/collect/skills/coinmarketcap/process.sh +++ /dev/null @@ -1,226 +0,0 @@ -#!/usr/bin/env bash -# Process downloaded CoinMarketCap data -# Usage: ./process.sh <downloads-dir> [--output=DIR] - -set -e - -DOWNLOADS="$1" -OUTPUT="./cmc-archive" - -for arg in "$@"; do - case "$arg" in - --output=*) OUTPUT="${arg#*=}" ;; - esac -done - -mkdir -p "$OUTPUT" - -echo "=== Processing CoinMarketCap downloads ===" - -# Process API JSON files first (most reliable) -for file in "$DOWNLOADS"/cmc-*-api-detail.json; do - [ -f "$file" ] || continue - - COIN=$(basename "$file" | sed 's/cmc-\(.*\)-api-detail.json/\1/') - COIN_DIR="$OUTPUT/$COIN" - mkdir -p "$COIN_DIR" - - echo "Processing: $COIN" - - python3 << PYEOF -import json -import os - -try: - data = json.load(open('$file', 'r')) - - if 'data' in data: - coin = data['data'] - - # Extract metadata - metadata = { - 'id': coin.get('id'), - 'name': coin.get('name'), - 'symbol': coin.get('symbol'), - 'slug': coin.get('slug'), - 'description': coin.get('description', ''), - 'logo': coin.get('logo'), - 'category': coin.get('category'), - 'dateAdded': coin.get('dateAdded'), - 'urls': coin.get('urls', {}), - 'tags': coin.get('tags', []), - } - - with open('$COIN_DIR/metadata.json', 'w') as f: - json.dump(metadata, f, indent=2) - print(f" Created metadata.json") - - # Create markdown summary - with open('$COIN_DIR/INDEX.md', 'w') as f: - f.write(f"# {metadata['name']} ({metadata['symbol']})\n\n") - f.write(f"## Metadata\n\n") - f.write(f"| Field | Value |\n") - f.write(f"|-------|-------|\n") - f.write(f"| Name | {metadata['name']} |\n") - f.write(f"| Symbol | {metadata['symbol']} |\n") - f.write(f"| CMC ID | {metadata['id']} |\n") - f.write(f"| Added | {metadata['dateAdded']} |\n") - f.write(f"| Category | {metadata.get('category', 'N/A')} |\n\n") - - f.write(f"## Links\n\n") - urls = metadata.get('urls', {}) - for url_type, url_list in urls.items(): - if url_list: - f.write(f"- **{url_type}**: {', '.join(url_list[:3])}\n") - - f.write(f"\n## Description\n\n") - f.write(metadata.get('description', 'No description')[:2000]) - f.write("\n") - - print(f" Created INDEX.md") - -except Exception as e: - print(f" Error processing: {e}") -PYEOF -done - -# Process historical data -for file in "$DOWNLOADS"/cmc-*-api-historical.json; do - [ -f "$file" ] || continue - - COIN=$(basename "$file" | sed 's/cmc-\(.*\)-api-historical.json/\1/') - COIN_DIR="$OUTPUT/$COIN" - mkdir -p "$COIN_DIR/historical" - - echo "Processing historical: $COIN" - - python3 << PYEOF -import json -import csv -from datetime import datetime - -try: - data = json.load(open('$file', 'r')) - - if 'data' in data and 'quotes' in data['data']: - quotes = data['data']['quotes'] - - # Group by year - by_year = {} - for quote in quotes: - ts = quote.get('timestamp', quote.get('time', '')) - if ts: - year = ts[:4] - if year not in by_year: - by_year[year] = [] - by_year[year].append({ - 'date': ts[:10], - 'open': quote.get('quote', {}).get('USD', {}).get('open', quote.get('open')), - 'high': quote.get('quote', {}).get('USD', {}).get('high', quote.get('high')), - 'low': quote.get('quote', {}).get('USD', {}).get('low', quote.get('low')), - 'close': quote.get('quote', {}).get('USD', {}).get('close', quote.get('close')), - 'volume': quote.get('quote', {}).get('USD', {}).get('volume', quote.get('volume')), - 'market_cap': quote.get('quote', {}).get('USD', {}).get('market_cap', quote.get('market_cap')), - }) - - for year, rows in by_year.items(): - filename = f'$COIN_DIR/historical/{year}.csv' - with open(filename, 'w', newline='') as f: - writer = csv.DictWriter(f, fieldnames=['date', 'open', 'high', 'low', 'close', 'volume', 'market_cap']) - writer.writeheader() - writer.writerows(sorted(rows, key=lambda x: x['date'])) - print(f" Created historical/{year}.csv ({len(rows)} rows)") - -except Exception as e: - print(f" Error: {e}") -PYEOF -done - -# Process CoinGecko data as backup -for file in "$DOWNLOADS"/coingecko-*-api.json; do - [ -f "$file" ] || continue - - COIN=$(basename "$file" | sed 's/coingecko-\(.*\)-api.json/\1/') - COIN_DIR="$OUTPUT/$COIN" - mkdir -p "$COIN_DIR" - - echo "Processing CoinGecko: $COIN" - - python3 << PYEOF -import json - -try: - data = json.load(open('$file', 'r')) - - # Extract useful fields - gecko_data = { - 'coingecko_id': data.get('id'), - 'coingecko_rank': data.get('coingecko_rank'), - 'genesis_date': data.get('genesis_date'), - 'sentiment_up': data.get('sentiment_votes_up_percentage'), - 'sentiment_down': data.get('sentiment_votes_down_percentage'), - 'developer_data': data.get('developer_data', {}), - 'community_data': data.get('community_data', {}), - } - - with open('$COIN_DIR/coingecko.json', 'w') as f: - json.dump(gecko_data, f, indent=2) - print(f" Created coingecko.json") - -except Exception as e: - print(f" Error: {e}") -PYEOF -done - -# Process market/exchange data -for file in "$DOWNLOADS"/cmc-*-api-markets.json; do - [ -f "$file" ] || continue - - COIN=$(basename "$file" | sed 's/cmc-\(.*\)-api-markets.json/\1/') - COIN_DIR="$OUTPUT/$COIN" - mkdir -p "$COIN_DIR" - - echo "Processing markets: $COIN" - - python3 << PYEOF -import json - -try: - data = json.load(open('$file', 'r')) - - if 'data' in data and 'marketPairs' in data['data']: - pairs = data['data']['marketPairs'] - - markets = [] - for pair in pairs[:50]: # Top 50 markets - markets.append({ - 'exchange': pair.get('exchangeName'), - 'pair': pair.get('marketPair'), - 'price': pair.get('price'), - 'volume_24h': pair.get('volumeUsd'), - 'type': pair.get('marketType'), - }) - - with open('$COIN_DIR/markets.json', 'w') as f: - json.dump(markets, f, indent=2) - - # Add to INDEX.md - with open('$COIN_DIR/INDEX.md', 'a') as f: - f.write(f"\n## Markets (Top 10)\n\n") - f.write(f"| Exchange | Pair | Volume 24h |\n") - f.write(f"|----------|------|------------|\n") - for m in markets[:10]: - vol = m.get('volume_24h', 0) - vol_str = f"${vol:,.0f}" if vol else "N/A" - f.write(f"| {m['exchange']} | {m['pair']} | {vol_str} |\n") - - print(f" Created markets.json ({len(markets)} pairs)") - -except Exception as e: - print(f" Error: {e}") -PYEOF -done - -echo "" -echo "=== Processing Complete ===" -echo "Output: $OUTPUT/" diff --git a/claude/collect/skills/cryptonote-discovery/discover.sh b/claude/collect/skills/cryptonote-discovery/discover.sh deleted file mode 100644 index 22c1ee0..0000000 --- a/claude/collect/skills/cryptonote-discovery/discover.sh +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env bash -# Discover all collection sources for a CryptoNote project -# Usage: ./discover.sh <project-name> | ./discover.sh --abandoned | ./discover.sh --all - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REGISTRY="$SCRIPT_DIR/registry.json" - -# Get project data from registry -get_project() { - local name="$1" - jq -r ".projects[] | select(.name | ascii_downcase == \"$(echo $name | tr '[:upper:]' '[:lower:]')\")" "$REGISTRY" -} - -# List abandoned projects -list_abandoned() { - jq -r '.projects[] | select(.status == "abandoned" or .status == "low-activity" or .status == "dead") | .name' "$REGISTRY" -} - -# List all projects -list_all() { - jq -r '.projects[].name' "$REGISTRY" -} - -# Generate sources for a project -generate_sources() { - local name="$1" - local project=$(get_project "$name") - - if [ -z "$project" ] || [ "$project" = "null" ]; then - echo "# ERROR: Project '$name' not found in registry" >&2 - return 1 - fi - - local symbol=$(echo "$project" | jq -r '.symbol') - local status=$(echo "$project" | jq -r '.status') - - echo "# === $name ($symbol) ===" - echo "# Status: $status" - echo "#" - - # GitHub repos - echo "# GitHub Organizations:" - echo "$project" | jq -r '.github[]?' | while read org; do - [ -n "$org" ] && echo "github|https://github.com/$org|$name" - done - - # BitcoinTalk - local btt=$(echo "$project" | jq -r '.bitcointalk // empty') - if [ -n "$btt" ]; then - echo "#" - echo "# BitcoinTalk:" - echo "bitcointalk|https://bitcointalk.org/index.php?topic=$btt.0|$name" - fi - - # CMC/CoinGecko - local cmc=$(echo "$project" | jq -r '.cmc // empty') - local gecko=$(echo "$project" | jq -r '.coingecko // empty') - echo "#" - echo "# Market Data:" - [ -n "$cmc" ] && echo "cmc|https://coinmarketcap.com/currencies/$cmc/|$name" - [ -n "$gecko" ] && echo "coingecko|https://coingecko.com/en/coins/$gecko|$name" - - # Website/Explorer - local website=$(echo "$project" | jq -r '.website // empty') - local explorer=$(echo "$project" | jq -r '.explorer // empty') - echo "#" - echo "# Web Properties:" - [ -n "$website" ] && echo "wayback|https://$website|$name" - [ -n "$explorer" ] && echo "explorer|https://$explorer|$name" - - # Salvageable features - local salvage=$(echo "$project" | jq -r '.salvageable[]?' 2>/dev/null) - if [ -n "$salvage" ]; then - echo "#" - echo "# Salvageable:" - echo "$project" | jq -r '.salvageable[]?' | while read item; do - echo "# - $item" - done - fi - - echo "#" -} - -# Main -case "$1" in - --abandoned) - echo "# Abandoned CryptoNote Projects (Salvage Candidates)" - echo "# Format: source|url|project" - echo "#" - for proj in $(list_abandoned); do - generate_sources "$proj" - done - ;; - --all) - echo "# All CryptoNote Projects" - echo "# Format: source|url|project" - echo "#" - for proj in $(list_all); do - generate_sources "$proj" - done - ;; - --list) - list_all - ;; - --list-abandoned) - list_abandoned - ;; - "") - echo "Usage: $0 <project-name> | --abandoned | --all | --list" >&2 - echo "" >&2 - echo "Examples:" >&2 - echo " $0 lethean # Sources for Lethean" >&2 - echo " $0 monero # Sources for Monero" >&2 - echo " $0 --abandoned # All abandoned projects" >&2 - echo " $0 --all # Everything" >&2 - echo " $0 --list # Just list project names" >&2 - exit 1 - ;; - *) - generate_sources "$1" - ;; -esac diff --git a/claude/collect/skills/github-history/SKILL.md b/claude/collect/skills/github-history/SKILL.md index 8b50076..640a54b 100644 --- a/claude/collect/skills/github-history/SKILL.md +++ b/claude/collect/skills/github-history/SKILL.md @@ -1,137 +1,78 @@ # GitHub History Collection Skill -Collect and score GitHub issues and PRs for triage analysis. +Collect GitHub repositories, issues, and PRs for archival and triage analysis. + +## Prerequisites + +```bash +# Install Borg +go install github.com/Snider/Borg@latest +``` ## Usage ```bash -# Single repo -./collect.sh https://github.com/LetheanNetwork/lthn-app-vpn +# Clone a single repository +borg collect github repo https://github.com/LetheanNetwork/lthn-app-vpn -# Entire org (all repos) -./collect.sh https://github.com/LetheanNetwork --org +# Clone all repos from an org +borg collect github repos LetheanNetwork -# Just issues (skip PRs) -./collect.sh https://github.com/LetheanNetwork/lthn-app-vpn --issues-only - -# Just PRs (skip issues) -./collect.sh https://github.com/LetheanNetwork/lthn-app-vpn --prs-only - -# Custom rate limit delay -./collect.sh https://github.com/LetheanNetwork --org --delay=0.5 +# Output to encrypted container +borg collect github repos LetheanNetwork --format stim -o lethean.stim ``` -## Output Structure +## Target Registry -``` -repo/ -├── {org}/ -│ └── {repo}/ -│ ├── Issue/ -│ │ ├── 001.md # Sequential, no gaps -│ │ ├── 002.md -│ │ ├── 003.md -│ │ └── INDEX.md # Scored index -│ ├── PR/ -│ │ ├── 001.md -│ │ ├── 002.md -│ │ └── INDEX.md -│ └── .json/ # Raw API responses -│ ├── issues-list.json -│ ├── issue-{n}.json -│ ├── prs-list.json -│ └── pr-{n}.json -``` +### Lethean Ecosystem +- `LetheanNetwork` +- `letheanVPN` +- `LetheanMovement` -### Sequential vs GitHub Numbers +### CryptoNote Active +- `monero-project` +- `hyle-team` +- `zanoio` +- `kevacoin-project` +- `scala-network` +- `Karbovanets` +- `wownero` +- `ConcealNetwork` +- `ryo-currency` -- **Filename**: `001.md`, `002.md`, etc. - sequential, no gaps -- **Inside file**: `# Issue #47: ...` - preserves original GitHub number -- **INDEX.md**: Maps both: `| 001 | #47 | Title | SCORE |` +### Salvage Priority (dead/abandoned) +- `haven-protocol-org` +- `graft-project` +- `graft-community` +- `oxen-io` +- `loki-project` +- `turtlecoin` +- `masari-project` +- `aeonix` +- `nerva-project` +- `sumoprojects` +- `deroproject` +- `bcndev` +- `electroneum` -This ensures clean sequential browsing while maintaining traceability to GitHub. - -## Reception Scores - -| Score | Meaning | Triage Action | -|-------|---------|---------------| -| ADDRESSED | Closed after discussion | Review if actually fixed | -| DISMISSED | Labeled wontfix/invalid | **RECLAIM candidate** | -| IGNORED | Closed, no response | **RECLAIM candidate** | -| STALE | Open, no replies | Needs attention | -| ACTIVE | Open with discussion | In progress | -| MERGED | PR accepted | Done | -| REJECTED | PR closed unmerged | Review why | -| PENDING | PR still open | Needs review | - -## Requirements - -- `gh` CLI authenticated (`gh auth login`) -- `jq` installed +### Non-CN Reference +- `theQRL` +- `hyperswarm` +- `holepunchto` +- `openhive-network` +- `octa-space` ## Batch Collection -Supports comma-separated targets for batch runs: - ```bash -# Batch orgs -./collect.sh "LetheanNetwork,graft-project,oxen-io" --org - -# Batch repos -./collect.sh "LetheanNetwork/lthn-app-vpn,monero-project/monero" +# Collect everything into encrypted archive +borg collect github repos LetheanNetwork,monero-project,graft-project \ + --format stim -o cryptonote-archive.stim ``` -## Full Registry List +## Triage Workflow -Copy-paste ready commands for the complete CryptoNote ecosystem: - -```bash -# === LETHEAN ECOSYSTEM === -./collect.sh "LetheanNetwork,letheanVPN,LetheanMovement" --org - -# === CRYPTONOTE ACTIVE === -./collect.sh "monero-project,hyle-team,zanoio,kevacoin-project,scala-network" --org -./collect.sh "Karbovanets,wownero,ConcealNetwork,ryo-currency" --org - -# === SALVAGE PRIORITY (dead/abandoned) === -./collect.sh "haven-protocol-org,graft-project,graft-community" --org -./collect.sh "oxen-io,loki-project" --org -./collect.sh "turtlecoin,masari-project,aeonix,nerva-project,sumoprojects" --org -./collect.sh "deroproject,bcndev,electroneum" --org - -# === NON-CN REFERENCE === -./collect.sh "theQRL,hyperswarm,holepunchto,openhive-network,octa-space" --org -``` - -### One-liner for everything - -```bash -./collect.sh "LetheanNetwork,letheanVPN,LetheanMovement,monero-project,haven-protocol-org,hyle-team,zanoio,kevacoin-project,scala-network,deroproject,Karbovanets,wownero,turtlecoin,masari-project,aeonix,oxen-io,loki-project,graft-project,graft-community,nerva-project,ConcealNetwork,ryo-currency,sumoprojects,bcndev,electroneum,theQRL,hyperswarm,holepunchto,openhive-network,octa-space" --org -``` - -## Example Run - -```bash -$ ./collect.sh "LetheanNetwork,graft-project" --org - -=== Collecting all repos from org: LetheanNetwork === -=== Collecting: LetheanNetwork/lthn-app-vpn === - Output: ./repo/LetheanNetwork/lthn-app-vpn/ -Fetching issues... - Found 145 issues - Fetching issue #1 -> 001.md - ... - Created Issue/INDEX.md -Fetching PRs... - Found 98 PRs - ... - Created PR/INDEX.md - -=== Collecting all repos from org: graft-project === -=== Collecting: graft-project/graft-network === - Output: ./repo/graft-project/graft-network/ -... - -=== Collection Complete === -Output: ./repo/ -``` +1. Collect repos with Borg +2. Review issues marked DISMISSED or IGNORED +3. Identify salvageable features +4. Document in project-archaeology skill diff --git a/claude/collect/skills/github-history/collect.sh b/claude/collect/skills/github-history/collect.sh deleted file mode 100755 index 2a1584e..0000000 --- a/claude/collect/skills/github-history/collect.sh +++ /dev/null @@ -1,516 +0,0 @@ -#!/usr/bin/env bash -# GitHub History Collector v2 -# Usage: ./collect.sh <target> [--org] [--issues-only] [--prs-only] -# -# Supports: -# Single repo: ./collect.sh LetheanNetwork/lthn-app-vpn -# Single org: ./collect.sh LetheanNetwork --org -# Batch orgs: ./collect.sh "LetheanNetwork,graft-project,oxen-io" --org -# Batch repos: ./collect.sh "owner/repo1,owner/repo2" -# -# Output structure: -# repo/{org}/{repo}/Issue/001.md, 002.md, ... -# repo/{org}/{repo}/PR/001.md, 002.md, ... -# -# Rate limiting: -# --check-rate Just show current rate limit status and exit -# Auto-pauses at 25% remaining (75% used) until reset+10s (preserves GraphQL quota) - -set -e - -# GitHub API allows 5000 requests/hour authenticated -# 0.05s = 20 req/sec = safe margin, bump to 0.1 if rate limited -DELAY=0.05 -OUTPUT_BASE="./repo" - -# Rate limit protection - check every N calls, pause if under 25% (75% used) -API_CALL_COUNT=0 -RATE_CHECK_INTERVAL=100 - -check_rate_limit() { - local rate_json=$(gh api rate_limit 2>/dev/null) - if [ -z "$rate_json" ]; then - echo " [Rate check failed, continuing...]" - return - fi - - local remaining=$(echo "$rate_json" | jq -r '.resources.core.remaining') - local limit=$(echo "$rate_json" | jq -r '.resources.core.limit') - local reset=$(echo "$rate_json" | jq -r '.resources.core.reset') - - local percent=$((remaining * 100 / limit)) - - echo "" - echo ">>> Rate check: ${percent}% remaining ($remaining/$limit)" - - if [ "$percent" -lt 25 ]; then - local now=$(date +%s) - local wait_time=$((reset - now + 10)) - - if [ "$wait_time" -gt 0 ]; then - local resume_time=$(date -d "@$((reset + 10))" '+%H:%M:%S' 2>/dev/null || date -r "$((reset + 10))" '+%H:%M:%S' 2>/dev/null || echo "reset+10s") - echo ">>> Under 25% - pausing ${wait_time}s until $resume_time" - echo ">>> (GraphQL quota preserved for other tools)" - sleep "$wait_time" - echo ">>> Resuming collection..." - fi - else - echo ">>> Above 25% - continuing..." - fi - echo "" -} - -track_api_call() { - API_CALL_COUNT=$((API_CALL_COUNT + 1)) - - if [ $((API_CALL_COUNT % RATE_CHECK_INTERVAL)) -eq 0 ]; then - check_rate_limit - fi -} - -# Parse URL into org/repo -parse_github_url() { - local url="$1" - url="${url#https://github.com/}" - url="${url#http://github.com/}" - url="${url%/}" - echo "$url" -} - -# Collect single repo -collect_repo() { - local repo="$1" # format: org/repo-name - local org=$(dirname "$repo") - local repo_name=$(basename "$repo") - - local issue_dir="$OUTPUT_BASE/$org/$repo_name/Issue" - local pr_dir="$OUTPUT_BASE/$org/$repo_name/PR" - local json_dir="$OUTPUT_BASE/$org/$repo_name/.json" - - mkdir -p "$issue_dir" "$pr_dir" "$json_dir" - - echo "=== Collecting: $repo ===" - echo " Output: $OUTPUT_BASE/$org/$repo_name/" - - # Collect Issues - if [ "$SKIP_ISSUES" != "1" ]; then - echo "Fetching issues..." - if ! gh issue list --repo "$repo" --state all --limit 500 \ - --json number,title,state,author,labels,createdAt,closedAt,body \ - > "$json_dir/issues-list.json" 2>/dev/null; then - echo " (issues disabled or not accessible)" - echo "[]" > "$json_dir/issues-list.json" - fi - track_api_call - - local issue_count=$(jq length "$json_dir/issues-list.json") - echo " Found $issue_count issues" - - # Fetch each issue - local seq=0 - for github_num in $(jq -r '.[].number' "$json_dir/issues-list.json" | sort -n); do - seq=$((seq + 1)) - local seq_padded=$(printf '%03d' $seq) - - # Skip if already fetched - if [ -f "$json_dir/issue-$github_num.json" ] && [ -f "$issue_dir/$seq_padded.md" ]; then - echo " Skipping issue #$github_num (already exists)" - continue - fi - - echo " Fetching issue #$github_num -> $seq_padded.md" - gh issue view "$github_num" --repo "$repo" \ - --json number,title,state,author,labels,createdAt,closedAt,body,comments \ - > "$json_dir/issue-$github_num.json" - track_api_call - - # Convert to markdown with sequential filename - convert_issue "$json_dir/issue-$github_num.json" "$issue_dir/$seq_padded.md" "$github_num" - sleep $DELAY - done - - generate_issue_index "$issue_dir" - fi - - # Collect PRs - if [ "$SKIP_PRS" != "1" ]; then - echo "Fetching PRs..." - if ! gh pr list --repo "$repo" --state all --limit 500 \ - --json number,title,state,author,createdAt,closedAt,mergedAt,body \ - > "$json_dir/prs-list.json" 2>/dev/null; then - echo " (PRs disabled or not accessible)" - echo "[]" > "$json_dir/prs-list.json" - fi - track_api_call - - local pr_count=$(jq length "$json_dir/prs-list.json") - echo " Found $pr_count PRs" - - # Fetch each PR - local seq=0 - for github_num in $(jq -r '.[].number' "$json_dir/prs-list.json" | sort -n); do - seq=$((seq + 1)) - local seq_padded=$(printf '%03d' $seq) - - # Skip if already fetched - if [ -f "$json_dir/pr-$github_num.json" ] && [ -f "$pr_dir/$seq_padded.md" ]; then - echo " Skipping PR #$github_num (already exists)" - continue - fi - - echo " Fetching PR #$github_num -> $seq_padded.md" - gh pr view "$github_num" --repo "$repo" \ - --json number,title,state,author,createdAt,closedAt,mergedAt,body,comments,reviews \ - > "$json_dir/pr-$github_num.json" 2>/dev/null || true - track_api_call - - # Convert to markdown with sequential filename - convert_pr "$json_dir/pr-$github_num.json" "$pr_dir/$seq_padded.md" "$github_num" - sleep $DELAY - done - - generate_pr_index "$pr_dir" - fi -} - -# Collect all repos in org -collect_org() { - local org="$1" - - echo "=== Collecting all repos from org: $org ===" - - # Get repo list (1 API call) - local repos - repos=$(gh repo list "$org" --limit 500 --json nameWithOwner -q '.[].nameWithOwner') - track_api_call - - while read -r repo; do - [ -n "$repo" ] || continue - collect_repo "$repo" - sleep $DELAY - done <<< "$repos" -} - -# Convert issue JSON to markdown -convert_issue() { - local json_file="$1" - local output_file="$2" - local github_num="$3" - - local title=$(jq -r '.title' "$json_file") - local state=$(jq -r '.state' "$json_file") - local author=$(jq -r '.author.login' "$json_file") - local created=$(jq -r '.createdAt' "$json_file" | cut -d'T' -f1) - local closed=$(jq -r '.closedAt // "N/A"' "$json_file" | cut -d'T' -f1) - local body=$(jq -r '.body // "No description"' "$json_file") - local labels=$(jq -r '[.labels[].name] | join(", ")' "$json_file") - local comment_count=$(jq '.comments | length' "$json_file") - - # Score reception - local score="UNKNOWN" - local reason="" - - if [ "$state" = "CLOSED" ]; then - if echo "$labels" | grep -qi "wontfix\|invalid\|duplicate\|won't fix"; then - score="DISMISSED" - reason="Labeled as wontfix/invalid/duplicate" - elif [ "$comment_count" -eq 0 ]; then - score="IGNORED" - reason="Closed with no discussion" - else - score="ADDRESSED" - reason="Closed after discussion" - fi - else - if [ "$comment_count" -eq 0 ]; then - score="STALE" - reason="Open with no response" - else - score="ACTIVE" - reason="Open with discussion" - fi - fi - - cat > "$output_file" << ISSUE_EOF -# Issue #$github_num: $title - -## Reception Score - -| Score | Reason | -|-------|--------| -| **$score** | $reason | - ---- - -## Metadata - -| Field | Value | -|-------|-------| -| GitHub # | $github_num | -| State | $state | -| Author | @$author | -| Created | $created | -| Closed | $closed | -| Labels | $labels | -| Comments | $comment_count | - ---- - -## Original Post - -**Author:** @$author - -$body - ---- - -## Discussion Thread - -ISSUE_EOF - - jq -r '.comments[] | "### Comment by @\(.author.login)\n\n**Date:** \(.createdAt | split("T")[0])\n\n\(.body)\n\n---\n"' "$json_file" >> "$output_file" 2>/dev/null || true -} - -# Convert PR JSON to markdown -convert_pr() { - local json_file="$1" - local output_file="$2" - local github_num="$3" - - [ -f "$json_file" ] || return - - local title=$(jq -r '.title' "$json_file") - local state=$(jq -r '.state' "$json_file") - local author=$(jq -r '.author.login' "$json_file") - local created=$(jq -r '.createdAt' "$json_file" | cut -d'T' -f1) - local merged=$(jq -r '.mergedAt // "N/A"' "$json_file" | cut -d'T' -f1) - local body=$(jq -r '.body // "No description"' "$json_file") - - local score="UNKNOWN" - local reason="" - - if [ "$state" = "MERGED" ] || { [ "$merged" != "N/A" ] && [ "$merged" != "null" ]; }; then - score="MERGED" - reason="Contribution accepted" - elif [ "$state" = "CLOSED" ]; then - score="REJECTED" - reason="PR closed without merge" - else - score="PENDING" - reason="Still open" - fi - - cat > "$output_file" << PR_EOF -# PR #$github_num: $title - -## Reception Score - -| Score | Reason | -|-------|--------| -| **$score** | $reason | - ---- - -## Metadata - -| Field | Value | -|-------|-------| -| GitHub # | $github_num | -| State | $state | -| Author | @$author | -| Created | $created | -| Merged | $merged | - ---- - -## Description - -$body - ---- - -## Reviews & Comments - -PR_EOF - - jq -r '.comments[]? | "### Comment by @\(.author.login)\n\n\(.body)\n\n---\n"' "$json_file" >> "$output_file" 2>/dev/null || true - jq -r '.reviews[]? | "### Review by @\(.author.login) [\(.state)]\n\n\(.body // "No comment")\n\n---\n"' "$json_file" >> "$output_file" 2>/dev/null || true -} - -# Generate Issue index -generate_issue_index() { - local dir="$1" - - cat > "$dir/INDEX.md" << 'INDEX_HEADER' -# Issues Index - -## Reception Score Legend - -| Score | Meaning | Action | -|-------|---------|--------| -| ADDRESSED | Closed after discussion | Review if actually fixed | -| DISMISSED | Labeled wontfix/invalid | **RECLAIM candidate** | -| IGNORED | Closed, no response | **RECLAIM candidate** | -| STALE | Open, no replies | Needs attention | -| ACTIVE | Open with discussion | In progress | - ---- - -## Issues - -| Seq | GitHub # | Title | Score | -|-----|----------|-------|-------| -INDEX_HEADER - - for file in "$dir"/[0-9]*.md; do - [ -f "$file" ] || continue - local seq=$(basename "$file" .md) - local github_num=$(sed -n 's/^# Issue #\([0-9]*\):.*/\1/p' "$file") - local title=$(head -1 "$file" | sed 's/^# Issue #[0-9]*: //') - local score=$(sed -n '/\*\*[A-Z]/s/.*\*\*\([A-Z]*\)\*\*.*/\1/p' "$file" | head -1) - echo "| [$seq]($seq.md) | #$github_num | $title | $score |" >> "$dir/INDEX.md" - done - - echo " Created Issue/INDEX.md" -} - -# Generate PR index -generate_pr_index() { - local dir="$1" - - cat > "$dir/INDEX.md" << 'INDEX_HEADER' -# Pull Requests Index - -## Reception Score Legend - -| Score | Meaning | Action | -|-------|---------|--------| -| MERGED | PR accepted | Done | -| REJECTED | PR closed unmerged | Review why | -| PENDING | PR still open | Needs review | - ---- - -## Pull Requests - -| Seq | GitHub # | Title | Score | -|-----|----------|-------|-------| -INDEX_HEADER - - for file in "$dir"/[0-9]*.md; do - [ -f "$file" ] || continue - local seq=$(basename "$file" .md) - local github_num=$(sed -n 's/^# PR #\([0-9]*\):.*/\1/p' "$file") - local title=$(head -1 "$file" | sed 's/^# PR #[0-9]*: //') - local score=$(sed -n '/\*\*[A-Z]/s/.*\*\*\([A-Z]*\)\*\*.*/\1/p' "$file" | head -1) - echo "| [$seq]($seq.md) | #$github_num | $title | $score |" >> "$dir/INDEX.md" - done - - echo " Created PR/INDEX.md" -} - -# Show rate limit status -show_rate_status() { - local rate_json=$(gh api rate_limit 2>/dev/null) - if [ -z "$rate_json" ]; then - echo "Failed to fetch rate limit" - exit 1 - fi - - echo "=== GitHub API Rate Limit Status ===" - echo "" - echo "Core (REST API):" - echo " Remaining: $(echo "$rate_json" | jq -r '.resources.core.remaining') / $(echo "$rate_json" | jq -r '.resources.core.limit')" - local core_reset=$(echo "$rate_json" | jq -r '.resources.core.reset') - echo " Reset: $(date -d "@$core_reset" '+%H:%M:%S' 2>/dev/null || date -r "$core_reset" '+%H:%M:%S' 2>/dev/null || echo "$core_reset")" - echo "" - echo "GraphQL:" - echo " Remaining: $(echo "$rate_json" | jq -r '.resources.graphql.remaining') / $(echo "$rate_json" | jq -r '.resources.graphql.limit')" - local gql_reset=$(echo "$rate_json" | jq -r '.resources.graphql.reset') - echo " Reset: $(date -d "@$gql_reset" '+%H:%M:%S' 2>/dev/null || date -r "$gql_reset" '+%H:%M:%S' 2>/dev/null || echo "$gql_reset")" - echo "" - echo "Search:" - echo " Remaining: $(echo "$rate_json" | jq -r '.resources.search.remaining') / $(echo "$rate_json" | jq -r '.resources.search.limit')" - echo "" -} - -# Main -main() { - local targets="" - local is_org=0 - SKIP_ISSUES=0 - SKIP_PRS=0 - - # Parse args - for arg in "$@"; do - case "$arg" in - --org) is_org=1 ;; - --issues-only) SKIP_PRS=1 ;; - --prs-only) SKIP_ISSUES=1 ;; - --delay=*) DELAY="${arg#*=}" ;; - --check-rate) show_rate_status; exit 0 ;; - https://*|http://*) targets="$arg" ;; - -*) ;; # ignore unknown flags - *) targets="$arg" ;; - esac - done - - if [ -z "$targets" ]; then - echo "Usage: $0 <target> [--org] [--issues-only] [--prs-only] [--delay=0.05] [--check-rate]" - echo "" - echo "Options:" - echo " --check-rate Show rate limit status (Core/GraphQL/Search) and exit" - echo " --delay=N Delay between requests (default: 0.05s)" - echo "" - echo "Rate limiting: Auto-pauses at 25% remaining (75% used) until reset+10s" - echo "" - echo "Target formats:" - echo " Single repo: LetheanNetwork/lthn-app-vpn" - echo " Single org: LetheanNetwork --org" - echo " Batch orgs: \"LetheanNetwork,graft-project,oxen-io\" --org" - echo " Batch repos: \"owner/repo1,owner/repo2\"" - echo "" - echo "Output: repo/{org}/{repo}/Issue/ repo/{org}/{repo}/PR/" - echo "" - echo "Full registry list (copy-paste ready):" - echo "" - echo " # Lethean ecosystem" - echo " $0 \"LetheanNetwork,letheanVPN,LetheanMovement\" --org" - echo "" - echo " # CryptoNote projects" - echo " $0 \"monero-project,haven-protocol-org,hyle-team,zanoio\" --org" - echo " $0 \"kevacoin-project,scala-network,deroproject\" --org" - echo " $0 \"Karbovanets,wownero,turtlecoin\" --org" - echo " $0 \"masari-project,aeonix,nerva-project\" --org" - echo " $0 \"ConcealNetwork,ryo-currency,sumoprojects\" --org" - echo " $0 \"bcndev,electroneum\" --org" - echo "" - echo " # Dead/salvage priority" - echo " $0 \"graft-project,graft-community,oxen-io,loki-project\" --org" - echo "" - echo " # Non-CN reference projects" - echo " $0 \"theQRL,hyperswarm,holepunchto,openhive-network,octa-space\" --org" - exit 1 - fi - - # Handle comma-separated list - IFS=',' read -ra TARGET_LIST <<< "$targets" - - for target in "${TARGET_LIST[@]}"; do - # Trim whitespace - target=$(echo "$target" | xargs) - local parsed=$(parse_github_url "$target") - - if [ "$is_org" = "1" ]; then - collect_org "$parsed" - else - collect_repo "$parsed" - fi - done - - echo "" - echo "=== Collection Complete ===" - echo "Output: $OUTPUT_BASE/" -} - -main "$@" diff --git a/claude/collect/skills/job-collector/generate-jobs.sh b/claude/collect/skills/job-collector/generate-jobs.sh deleted file mode 100644 index 7778cb4..0000000 --- a/claude/collect/skills/job-collector/generate-jobs.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env bash -# Generate job list for proxy-based collection -# Usage: ./generate-jobs.sh <source> <target> [options] > jobs.txt - -set -e - -SOURCE="$1" -TARGET="$2" -shift 2 || true - -# Defaults -LIMIT=1000 -PAGES=100 - -# Parse options -for arg in "$@"; do - case "$arg" in - --limit=*) LIMIT="${arg#*=}" ;; - --pages=*) PAGES="${arg#*=}" ;; - esac -done - -# Output header -echo "# Job list generated $(date +%Y-%m-%d\ %H:%M)" -echo "# Source: $SOURCE | Target: $TARGET" -echo "# Format: URL|FILENAME|TYPE|METADATA" -echo "#" - -case "$SOURCE" in - - bitcointalk|btt) - # Extract topic ID - TOPIC_ID=$(echo "$TARGET" | grep -oE '[0-9]+' | head -1) - echo "# BitcoinTalk topic: $TOPIC_ID" - echo "#" - - # Generate page URLs (20 posts per page) - for ((i=0; i<PAGES*20; i+=20)); do - echo "https://bitcointalk.org/index.php?topic=${TOPIC_ID}.${i}|btt-${TOPIC_ID}-p${i}.html|bitcointalk|page=$((i/20)),offset=$i" - done - ;; - - reddit) - # Handle r/subreddit or full URL - SUBREDDIT=$(echo "$TARGET" | sed 's|.*/r/||' | sed 's|/.*||') - echo "# Reddit: r/$SUBREDDIT" - echo "#" - - # Subreddit pages (top, new, hot) - for sort in "top" "new" "hot"; do - echo "https://old.reddit.com/r/${SUBREDDIT}/${sort}/.json?limit=100|reddit-${SUBREDDIT}-${sort}.json|reddit|sort=$sort" - done - - # If it's a specific thread - if [[ "$TARGET" =~ comments/([a-z0-9]+) ]]; then - THREAD_ID="${BASH_REMATCH[1]}" - echo "https://old.reddit.com/r/${SUBREDDIT}/comments/${THREAD_ID}.json|reddit-thread-${THREAD_ID}.json|reddit|thread=$THREAD_ID" - fi - ;; - - wayback|archive) - # Clean domain - DOMAIN=$(echo "$TARGET" | sed 's|https\?://||' | sed 's|/.*||') - echo "# Wayback Machine: $DOMAIN" - echo "#" - - # CDX API to get all snapshots - echo "https://web.archive.org/cdx/search/cdx?url=${DOMAIN}/*&output=json&limit=${LIMIT}|wayback-${DOMAIN}-cdx.json|wayback-index|domain=$DOMAIN" - - # Common important pages - for path in "" "index.html" "about" "roadmap" "team" "whitepaper" "faq"; do - echo "https://web.archive.org/web/2020/${DOMAIN}/${path}|wayback-${DOMAIN}-2020-${path:-index}.html|wayback|year=2020,path=$path" - echo "https://web.archive.org/web/2021/${DOMAIN}/${path}|wayback-${DOMAIN}-2021-${path:-index}.html|wayback|year=2021,path=$path" - echo "https://web.archive.org/web/2022/${DOMAIN}/${path}|wayback-${DOMAIN}-2022-${path:-index}.html|wayback|year=2022,path=$path" - done - ;; - - medium) - # Handle @author or publication - AUTHOR=$(echo "$TARGET" | sed 's|.*/||' | sed 's|^@||') - echo "# Medium: @$AUTHOR" - echo "#" - - # Medium RSS feed (easier to parse) - echo "https://medium.com/feed/@${AUTHOR}|medium-${AUTHOR}-feed.xml|medium-rss|author=$AUTHOR" - - # Profile page - echo "https://medium.com/@${AUTHOR}|medium-${AUTHOR}-profile.html|medium|author=$AUTHOR" - ;; - - twitter|x) - USERNAME=$(echo "$TARGET" | sed 's|.*/||' | sed 's|^@||') - echo "# Twitter/X: @$USERNAME" - echo "# Note: Twitter requires auth - use nitter or API" - echo "#" - - # Nitter instances (public, no auth) - echo "https://nitter.net/${USERNAME}|twitter-${USERNAME}.html|nitter|user=$USERNAME" - echo "https://nitter.net/${USERNAME}/with_replies|twitter-${USERNAME}-replies.html|nitter|user=$USERNAME,type=replies" - ;; - - *) - echo "# ERROR: Unknown source '$SOURCE'" >&2 - echo "# Supported: bitcointalk, reddit, wayback, medium, twitter" >&2 - exit 1 - ;; -esac diff --git a/claude/collect/skills/job-collector/process.sh b/claude/collect/skills/job-collector/process.sh deleted file mode 100644 index 88b30eb..0000000 --- a/claude/collect/skills/job-collector/process.sh +++ /dev/null @@ -1,242 +0,0 @@ -#!/usr/bin/env bash -# Process downloaded files into markdown -# Usage: ./process.sh <source> <downloads-dir> [--output=DIR] - -set -e - -SOURCE="$1" -DOWNLOADS="$2" -shift 2 || true - -OUTPUT="./processed" - -for arg in "$@"; do - case "$arg" in - --output=*) OUTPUT="${arg#*=}" ;; - esac -done - -mkdir -p "$OUTPUT/posts" - -echo "=== Processing $SOURCE files from $DOWNLOADS ===" - -case "$SOURCE" in - - bitcointalk|btt) - echo "Processing BitcoinTalk pages..." - POST_NUM=0 - - for file in "$DOWNLOADS"/btt-*.html; do - [ -f "$file" ] || continue - echo " Processing: $(basename "$file")" - - python3 << PYEOF -import re -import html -import os - -html_content = open('$file', 'r', encoding='utf-8', errors='ignore').read() - -# Extract thread title from first page -title_match = re.search(r'<title>([^<]+)', html_content) -title = title_match.group(1) if title_match else "Unknown Thread" -title = title.replace(' - Bitcoin Forum', '').strip() - -with open('$OUTPUT/.thread_title', 'w') as f: - f.write(title) - -# Pattern for posts -post_blocks = re.findall(r'
]*id="msg(\d+)"[^>]*>(.*?)
\s*(?:
)', html_content, re.DOTALL) - -for msg_id, content in post_blocks: - # Clean content - content = re.sub(r'', '\n', content) - content = re.sub(r'<[^>]+>', '', content) - content = html.unescape(content).strip() - - if content: - post_num = $POST_NUM + 1 - $POST_NUM = post_num - - with open(f'$OUTPUT/posts/POST-{post_num:04d}.md', 'w') as f: - f.write(f"# Post #{post_num}\\n\\n") - f.write(f"Message ID: {msg_id}\\n\\n") - f.write(f"---\\n\\n") - f.write(content) - f.write("\\n") - - print(f" POST-{post_num:04d}.md") - -print(f"TOTAL:{$POST_NUM}") -PYEOF - done - - # Generate index - TITLE=$(cat "$OUTPUT/.thread_title" 2>/dev/null || echo "BitcoinTalk Thread") - TOTAL=$(ls "$OUTPUT/posts/"POST-*.md 2>/dev/null | wc -l) - - cat > "$OUTPUT/INDEX.md" << EOF -# $TITLE - -Archived from BitcoinTalk - -| Posts | $(echo $TOTAL) | -|-------|------| - -## Posts - -EOF - for f in "$OUTPUT/posts/"POST-*.md; do - [ -f "$f" ] || continue - NUM=$(basename "$f" .md | sed 's/POST-0*//') - echo "- [Post #$NUM](posts/$(basename $f))" >> "$OUTPUT/INDEX.md" - done - ;; - - reddit) - echo "Processing Reddit JSON..." - - for file in "$DOWNLOADS"/reddit-*.json; do - [ -f "$file" ] || continue - echo " Processing: $(basename "$file")" - - python3 << PYEOF -import json -import os - -data = json.load(open('$file', 'r')) - -# Handle different Reddit JSON structures -posts = [] -if isinstance(data, list) and len(data) > 0: - if 'data' in data[0]: - # Thread format - posts = data[0]['data']['children'] - else: - posts = data -elif isinstance(data, dict) and 'data' in data: - posts = data['data']['children'] - -for i, post_wrapper in enumerate(posts): - post = post_wrapper.get('data', post_wrapper) - - title = post.get('title', post.get('body', '')[:50]) - author = post.get('author', 'unknown') - score = post.get('score', 0) - body = post.get('selftext', post.get('body', '')) - created = post.get('created_utc', 0) - - filename = f'$OUTPUT/posts/REDDIT-{i+1:04d}.md' - with open(filename, 'w') as f: - f.write(f"# {title}\\n\\n") - f.write(f"| Author | u/{author} |\\n") - f.write(f"|--------|----------|\\n") - f.write(f"| Score | {score} |\\n\\n") - f.write(f"---\\n\\n") - f.write(body or "(no content)") - f.write("\\n") - - print(f" REDDIT-{i+1:04d}.md - {title[:40]}...") -PYEOF - done - ;; - - wayback) - echo "Processing Wayback Machine files..." - - for file in "$DOWNLOADS"/wayback-*.html; do - [ -f "$file" ] || continue - BASENAME=$(basename "$file" .html) - echo " Processing: $BASENAME" - - # Extract text content - python3 << PYEOF -import re -import html - -content = open('$file', 'r', encoding='utf-8', errors='ignore').read() - -# Remove scripts and styles -content = re.sub(r']*>.*?', '', content, flags=re.DOTALL) -content = re.sub(r']*>.*?', '', content, flags=re.DOTALL) - -# Extract title -title_match = re.search(r'([^<]+)', content) -title = html.unescape(title_match.group(1)) if title_match else "$BASENAME" - -# Get body text -body_match = re.search(r']*>(.*?)', content, re.DOTALL) -if body_match: - body = body_match.group(1) - body = re.sub(r'<[^>]+>', ' ', body) - body = html.unescape(body) - body = re.sub(r'\s+', ' ', body).strip() -else: - body = "(could not extract body)" - -with open('$OUTPUT/posts/$BASENAME.md', 'w') as f: - f.write(f"# {title}\\n\\n") - f.write(f"Source: Wayback Machine\\n\\n") - f.write(f"---\\n\\n") - f.write(body[:5000]) # Limit length - f.write("\\n") - -print(f" $BASENAME.md") -PYEOF - done - ;; - - medium) - echo "Processing Medium files..." - - # Handle RSS feed - for file in "$DOWNLOADS"/medium-*-feed.xml; do - [ -f "$file" ] || continue - echo " Processing RSS: $(basename "$file")" - - python3 << PYEOF -import xml.etree.ElementTree as ET -import html -import re - -tree = ET.parse('$file') -root = tree.getroot() - -channel = root.find('channel') -items = channel.findall('item') if channel else root.findall('.//item') - -for i, item in enumerate(items): - title = item.findtext('title', 'Untitled') - author = item.findtext('{http://purl.org/dc/elements/1.1/}creator', 'Unknown') - date = item.findtext('pubDate', '') - content = item.findtext('{http://purl.org/rss/1.0/modules/content/}encoded', '') - - # Clean content - content = re.sub(r'<[^>]+>', '', content) - content = html.unescape(content) - - filename = f'$OUTPUT/posts/MEDIUM-{i+1:04d}.md' - with open(filename, 'w') as f: - f.write(f"# {title}\\n\\n") - f.write(f"| Author | {author} |\\n") - f.write(f"|--------|----------|\\n") - f.write(f"| Date | {date} |\\n\\n") - f.write(f"---\\n\\n") - f.write(content[:10000]) - f.write("\\n") - - print(f" MEDIUM-{i+1:04d}.md - {title[:40]}...") -PYEOF - done - ;; - - *) - echo "ERROR: Unknown source '$SOURCE'" - echo "Supported: bitcointalk, reddit, wayback, medium" - exit 1 - ;; -esac - -echo "" -echo "=== Processing Complete ===" -echo "Output: $OUTPUT/" diff --git a/claude/collect/skills/ledger-papers/SKILL.md b/claude/collect/skills/ledger-papers/SKILL.md index 9150767..a9ff038 100644 --- a/claude/collect/skills/ledger-papers/SKILL.md +++ b/claude/collect/skills/ledger-papers/SKILL.md @@ -30,32 +30,31 @@ Comprehensive collection of distributed ledger, cryptographic protocol, and dece | oracles | 3 | Chainlink, Band Protocol | | bridges | 3 | Atomic swaps, XCLAIM, THORChain | -## Usage +## Collection with Borg ```bash -# All papers (91+) -./discover.sh --all > jobs.txt +# Collect papers from academic sources +borg collect website https://eprint.iacr.org --depth 2 --format stim -o iacr-papers.stim -# By category -./discover.sh --category=cryptography > jobs.txt -./discover.sh --category=defi > jobs.txt +# Collect from arXiv +borg collect website https://arxiv.org/list/cs.CR/recent --depth 1 -# By topic -./discover.sh --topic=bulletproofs > jobs.txt -./discover.sh --topic=zk-snarks > jobs.txt - -# IACR search for more -./discover.sh --search-iacr > search-jobs.txt - -# List categories -./discover.sh --help +# Package existing archive +borg compile -f Borgfile -e "archive-password" -o ledger-papers.stim ``` -## Output Format +## Registry -``` -URL|FILENAME|TYPE|METADATA -https://bitcoin.org/bitcoin.pdf|bitcoin.pdf|paper|category=genesis,title=Bitcoin... +Papers are catalogued in `registry.json`: + +```json +{ + "id": "paper-id", + "title": "Paper Title", + "year": 2024, + "url": "https://example.com/paper.pdf", + "topics": ["topic1", "topic2"] +} ``` ## CDN Hosting Structure @@ -69,32 +68,14 @@ papers.lethean.io/ ├── cryptonote/ │ ├── cryptonote-v2.pdf │ └── cns/ -│ ├── cns001.txt -│ └── ... ├── mrl/ -│ ├── MRL-0001.pdf -│ └── ... +│ └── MRL-0001.pdf ├── cryptography/ │ ├── bulletproofs.pdf -│ ├── clsag.pdf -│ └── ... +│ └── clsag.pdf └── INDEX.json ``` -## Adding Papers - -Edit `registry.json`: - -```json -{ - "id": "paper-id", - "title": "Paper Title", - "year": 2024, - "url": "https://example.com/paper.pdf", - "topics": ["topic1", "topic2"] -} -``` - ## License Note -Papers collected for archival/educational purposes. Original copyrights remain with authors. CDN hosting as community service under CIC principles. +Papers collected for archival/educational purposes. Original copyrights remain with authors. diff --git a/claude/collect/skills/ledger-papers/discover.sh b/claude/collect/skills/ledger-papers/discover.sh deleted file mode 100755 index 004d738..0000000 --- a/claude/collect/skills/ledger-papers/discover.sh +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env bash -# Discover CryptoNote extension papers -# Usage: ./discover.sh [--all] [--category=NAME] [--project=NAME] [--topic=NAME] - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REGISTRY="$SCRIPT_DIR/registry.json" - -# Check for jq -if ! command -v jq &> /dev/null; then - echo "Error: jq is required" >&2 - exit 1 -fi - -CATEGORY="" -PROJECT="" -TOPIC="" -ALL=0 - -# Parse args -for arg in "$@"; do - case "$arg" in - --all) ALL=1 ;; - --category=*) CATEGORY="${arg#*=}" ;; - --project=*) PROJECT="${arg#*=}" ;; - --topic=*) TOPIC="${arg#*=}" ;; - --search-iacr) SEARCH_IACR=1 ;; - --help|-h) - echo "Usage: $0 [options]" - echo "" - echo "Options:" - echo " --all All known papers" - echo " --category=NAME Filter by category (mrl, iacr, projects, attacks)" - echo " --project=NAME Filter by project (monero, haven, masari, etc)" - echo " --topic=NAME Filter by topic (bulletproofs, ringct, etc)" - echo " --search-iacr Generate IACR search jobs" - echo "" - echo "Categories:" - jq -r '.categories | keys[]' "$REGISTRY" - exit 0 - ;; - esac -done - -echo "# Ledger Papers Archive - $(date +%Y-%m-%d)" -echo "# Format: URL|FILENAME|TYPE|METADATA" -echo "#" - -emit_paper() { - local url="$1" - local id="$2" - local category="$3" - local title="$4" - - local filename="${id}.pdf" - local metadata="category=$category,title=$title" - - echo "${url}|${filename}|paper|${metadata}" -} - -# Process categories -process_category() { - local cat_name="$1" - - echo "# === $cat_name ===" - - # Get papers in category - local papers - papers=$(jq -c ".categories[\"$cat_name\"].papers[]?" "$REGISTRY" 2>/dev/null) - - echo "$papers" | while read -r paper; do - [ -z "$paper" ] && continue - - local id title url urls - id=$(echo "$paper" | jq -r '.id') - title=$(echo "$paper" | jq -r '.title // "Unknown"') - - # Check topic filter - if [ -n "$TOPIC" ]; then - if ! echo "$paper" | jq -e ".topics[]? | select(. == \"$TOPIC\")" > /dev/null 2>&1; then - continue - fi - fi - - # Check project filter - if [ -n "$PROJECT" ]; then - local paper_project - paper_project=$(echo "$paper" | jq -r '.project // ""') - if [ "$paper_project" != "$PROJECT" ]; then - continue - fi - fi - - # Get URL (single or first from array) - url=$(echo "$paper" | jq -r '.url // .urls[0] // ""') - - if [ -n "$url" ]; then - emit_paper "$url" "$id" "$cat_name" "$title" - fi - - # Also emit alternate URLs for wayback - urls=$(echo "$paper" | jq -r '.urls[]? // empty' 2>/dev/null) - echo "$urls" | while read -r alt_url; do - [ -z "$alt_url" ] && continue - [ "$alt_url" = "$url" ] && continue - echo "# alt: $alt_url" - done - done - - echo "#" -} - -# Main logic -if [ "$ALL" = "1" ] || [ -z "$CATEGORY" ]; then - # All categories - dynamically from registry - jq -r '.categories | keys[]' "$REGISTRY" | while read -r cat; do - process_category "$cat" - done -else - # Single category - process_category "$CATEGORY" -fi - -# IACR search jobs -if [ "$SEARCH_IACR" = "1" ]; then - echo "# === IACR Search Jobs ===" - jq -r '.search_patterns.iacr[]' "$REGISTRY" | while read -r term; do - encoded=$(echo "$term" | sed 's/ /+/g') - echo "https://eprint.iacr.org/search?q=${encoded}|iacr-search-${encoded}.html|search|source=iacr,term=$term" - done -fi diff --git a/claude/collect/skills/mining-pools/generate-jobs.sh b/claude/collect/skills/mining-pools/generate-jobs.sh deleted file mode 100644 index c175aa1..0000000 --- a/claude/collect/skills/mining-pools/generate-jobs.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env bash -# Generate mining pool collection jobs -# Usage: ./generate-jobs.sh [--url=URL] [--all] - -set -e - -COIN="" -POOL_URL="" -ALL_POOLS=0 - -# Known pools registry -declare -A POOLS_LETHEAN=( - ["herominers"]="https://lethean.herominers.com" - ["gntl"]="https://lthn.pool.gntl.uk" -) - -declare -A POOLS_MONERO=( - ["supportxmr"]="https://supportxmr.com" - ["nanopool"]="https://xmr.nanopool.org" - ["hashvault"]="https://monero.hashvault.pro" -) - -declare -A POOLS_WOWNERO=( - ["herominers"]="https://wownero.herominers.com" -) - -# Parse args -for arg in "$@"; do - case "$arg" in - --url=*) POOL_URL="${arg#*=}" ;; - --all) ALL_POOLS=1 ;; - --*) ;; - *) COIN="$arg" ;; - esac -done - -emit_pool_jobs() { - local pool_name="$1" - local pool_url="$2" - local coin="$3" - - local slug="${coin}-${pool_name}" - - echo "# === ${pool_name} (${coin}) ===" - - # Common nodejs-pool API endpoints - echo "${pool_url}/api/stats|pool-${slug}-stats.json|pool-api|coin=$coin,pool=$pool_name" - echo "${pool_url}/api/pool/blocks|pool-${slug}-blocks.json|pool-api|coin=$coin,pool=$pool_name" - echo "${pool_url}/api/pool/payments|pool-${slug}-payments.json|pool-api|coin=$coin,pool=$pool_name" - echo "${pool_url}/api/network/stats|pool-${slug}-network.json|pool-api|coin=$coin,pool=$pool_name" - echo "${pool_url}/api/config|pool-${slug}-config.json|pool-api|coin=$coin,pool=$pool_name" - - # Web pages - echo "${pool_url}/|pool-${slug}-home.html|pool-web|coin=$coin,pool=$pool_name" - echo "${pool_url}/#/blocks|pool-${slug}-blocks-page.html|pool-web|coin=$coin,pool=$pool_name" - - echo "#" -} - -echo "# Mining Pool Jobs - $(date +%Y-%m-%d)" -echo "# Format: URL|FILENAME|TYPE|METADATA" -echo "#" - -if [ "$ALL_POOLS" = "1" ]; then - for pool in "${!POOLS_LETHEAN[@]}"; do - emit_pool_jobs "$pool" "${POOLS_LETHEAN[$pool]}" "lethean" - done - for pool in "${!POOLS_MONERO[@]}"; do - emit_pool_jobs "$pool" "${POOLS_MONERO[$pool]}" "monero" - done - for pool in "${!POOLS_WOWNERO[@]}"; do - emit_pool_jobs "$pool" "${POOLS_WOWNERO[$pool]}" "wownero" - done -elif [ -n "$POOL_URL" ]; then - pool_name=$(echo "$POOL_URL" | sed 's|.*://||; s|/.*||; s|\..*||') - emit_pool_jobs "$pool_name" "$POOL_URL" "${COIN:-unknown}" -elif [ -n "$COIN" ]; then - case "$COIN" in - lethean|lthn) - for pool in "${!POOLS_LETHEAN[@]}"; do - emit_pool_jobs "$pool" "${POOLS_LETHEAN[$pool]}" "lethean" - done - ;; - monero|xmr) - for pool in "${!POOLS_MONERO[@]}"; do - emit_pool_jobs "$pool" "${POOLS_MONERO[$pool]}" "monero" - done - ;; - wownero|wow) - for pool in "${!POOLS_WOWNERO[@]}"; do - emit_pool_jobs "$pool" "${POOLS_WOWNERO[$pool]}" "wownero" - done - ;; - *) - echo "# Unknown coin: $COIN" >&2 - echo "# Use --url= to specify pool URL" >&2 - exit 1 - ;; - esac -else - echo "Usage: $0 [--url=URL] [--all]" >&2 - echo "" >&2 - echo "Known coins: lethean, monero, wownero" >&2 - exit 1 -fi diff --git a/claude/collect/skills/project-archaeology/SKILL.md b/claude/collect/skills/project-archaeology/SKILL.md index edc0d89..60a2254 100644 --- a/claude/collect/skills/project-archaeology/SKILL.md +++ b/claude/collect/skills/project-archaeology/SKILL.md @@ -11,54 +11,40 @@ When a CryptoNote project dies, its artifacts scatter: - Block explorers shut down - Discord servers empty out -This skill orchestrates a **full dig** on a dead project — running all collectors in sequence to preserve everything salvageable before it's gone forever. +This skill orchestrates a **full dig** using Borg to preserve everything salvageable. -## Usage +## Collection with Borg ```bash -# Full excavation of a project -./excavate.sh masari +# Clone all repos from a dying project +borg collect github repos masari-project --format stim -o masari-github.stim -# Quick scan (just check what's still accessible) -./excavate.sh masari --scan-only +# Archive the website via Wayback +borg collect website https://web.archive.org/web/*/getmasari.org --depth 3 -# Specific collectors only -./excavate.sh masari --only=github,bitcointalk - -# Resume interrupted dig -./excavate.sh masari --resume +# Package everything into encrypted archive +borg compile -f Borgfile -e "archive-password" -o masari-full-dig.stim ``` ## What Gets Collected -| Source | Collector Used | Priority | -|--------|----------------|----------| -| GitHub repos | `github-history` | P1 - often deleted first | -| GitHub releases | `wallet-releases` | P1 - binaries disappear | -| BitcoinTalk ANN | `bitcointalk` | P2 - usually persists | -| Website (Wayback) | `job-collector wayback` | P2 - snapshots exist | -| Block explorer | `block-explorer` | P3 - chain data | -| CoinMarketCap | `coinmarketcap` | P3 - historical prices | -| Whitepapers | `whitepaper-archive` | P1 - research value | -| Reddit | `job-collector reddit` | P4 - community context | -| Medium posts | `job-collector medium` | P4 - announcements | +| Source | Borg Command | Priority | +|--------|--------------|----------| +| GitHub repos | `borg collect github repos ` | P1 | +| GitHub releases | `borg collect github repo ` | P1 | +| Websites | `borg collect website ` | P2 | +| Wayback snapshots | `borg collect website web.archive.org/...` | P2 | ## Output Structure ``` digs/ └── / - ├── EXCAVATION.md # Dig log with timestamps ├── SALVAGE-REPORT.md # What's worth keeping ├── LESSONS.md # What killed it, what we learned - ├── github/ # All repo history - ├── releases/ # Wallet binaries, checksums - ├── bitcointalk/ # Thread archive - ├── website/ # Wayback snapshots - ├── explorer/ # Chain data samples - ├── market/ # Price history, volume - ├── papers/ # Whitepapers, docs - └── community/ # Reddit, Medium, etc + ├── github.stim # All repo history (encrypted) + ├── website.stim # Website snapshots (encrypted) + └── papers/ # Whitepapers, docs ``` ## Report Templates @@ -69,32 +55,15 @@ What code/ideas are worth extracting: - Wallet features - Mining algorithms - Community tools -- Documentation patterns ### LESSONS.md Post-mortem analysis: - Timeline of decline - Root causes (dev burnout, drama, funding, tech debt) - Warning signs to watch for -- What could have saved it - -## Integration with cryptonote-discovery - -```bash -# Get list of abandoned projects -cd ../cryptonote-discovery -./discover.sh --list-abandoned - -# Excavate all abandoned projects (batch mode) -for proj in $(./discover.sh --list-abandoned); do - ../project-archaeology/excavate.sh "$proj" -done -``` ## Known Dig Sites -Projects confirmed dead/dying that need excavation: - | Project | Symbol | Death Year | Urgency | Notes | |---------|--------|------------|---------|-------| | TurtleCoin | TRTL | 2023 | HIGH | Team burned out, great docs | @@ -104,22 +73,14 @@ Projects confirmed dead/dying that need excavation: | Sumokoin | SUMO | 2021 | LOW | Drama-killed, large ring research | | Ryo | RYO | 2023 | LOW | GPU algo work | -## Requirements +## Batch Excavation -- All collector skills installed -- `gh` CLI authenticated -- `jq` installed -- Sufficient disk space for archives -- Patience (full dig can take hours) - -## Adding New Dig Sites - -When you discover a dead CryptoNote project: - -1. Add to `../cryptonote-discovery/registry.json` -2. Include `"salvageable": [...]` field -3. Run `./excavate.sh --scan-only` first -4. If sources still accessible, run full dig +```bash +# Collect multiple dead projects +for org in turtlecoin masari-project aeonix nerva-project; do + borg collect github repos "$org" --format stim -o "digs/${org}.stim" +done +``` --- diff --git a/claude/collect/skills/project-archaeology/excavate.sh b/claude/collect/skills/project-archaeology/excavate.sh deleted file mode 100755 index b3ad3dc..0000000 --- a/claude/collect/skills/project-archaeology/excavate.sh +++ /dev/null @@ -1,311 +0,0 @@ -#!/bin/bash -# Project Archaeology - Deep excavation of abandoned CryptoNote projects -# Usage: ./excavate.sh [options] - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -SKILLS_DIR="$(dirname "$SCRIPT_DIR")" -REGISTRY="$SKILLS_DIR/cryptonote-discovery/registry.json" -OUTPUT_DIR="$SCRIPT_DIR/digs" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' - -# Defaults -SCAN_ONLY=false -RESUME=false -ONLY_COLLECTORS="" - -usage() { - echo "Usage: $0 [options]" - echo "" - echo "Options:" - echo " --scan-only Check what's accessible without downloading" - echo " --resume Resume interrupted excavation" - echo " --only=a,b,c Run specific collectors only" - echo " --help Show this help" - echo "" - echo "Examples:" - echo " $0 masari # Full excavation" - echo " $0 masari --scan-only # Quick accessibility check" - echo " $0 masari --only=github,btt # GitHub and BitcoinTalk only" - exit 1 -} - -log() { - echo -e "${BLUE}[$(date '+%H:%M:%S')]${NC} $1" -} - -success() { - echo -e "${GREEN}[✓]${NC} $1" -} - -warn() { - echo -e "${YELLOW}[!]${NC} $1" -} - -error() { - echo -e "${RED}[✗]${NC} $1" -} - -# Get project data from registry -get_project() { - local name="$1" - jq -r --arg n "$name" '.projects[] | select(.name | ascii_downcase == ($n | ascii_downcase))' "$REGISTRY" -} - -# Check if a collector should run -should_run() { - local collector="$1" - if [ -z "$ONLY_COLLECTORS" ]; then - return 0 - fi - echo "$ONLY_COLLECTORS" | grep -q "$collector" -} - -# Scan a URL to check if accessible -check_url() { - local url="$1" - local status=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "$url" 2>/dev/null || echo "000") - if [ "$status" = "200" ] || [ "$status" = "301" ] || [ "$status" = "302" ]; then - return 0 - fi - return 1 -} - -# Main excavation function -excavate() { - local project_name="$1" - local project=$(get_project "$project_name") - - if [ -z "$project" ] || [ "$project" = "null" ]; then - error "Project '$project_name' not found in registry" - echo "Add it to: $REGISTRY" - exit 1 - fi - - # Extract project data - local name=$(echo "$project" | jq -r '.name') - local symbol=$(echo "$project" | jq -r '.symbol') - local status=$(echo "$project" | jq -r '.status') - local github_orgs=$(echo "$project" | jq -r '.github[]?' 2>/dev/null) - local btt_topic=$(echo "$project" | jq -r '.bitcointalk // empty') - local website=$(echo "$project" | jq -r '.website // empty') - local explorer=$(echo "$project" | jq -r '.explorer // empty') - local cmc=$(echo "$project" | jq -r '.cmc // empty') - - echo "" - echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" - echo -e "${BLUE} PROJECT ARCHAEOLOGY: ${name} (${symbol})${NC}" - echo -e "${BLUE} Status: ${status}${NC}" - echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" - echo "" - - # Create output directory - local dig_dir="$OUTPUT_DIR/$project_name" - mkdir -p "$dig_dir"/{github,releases,bitcointalk,website,explorer,market,papers,community} - - # Start excavation log - local log_file="$dig_dir/EXCAVATION.md" - echo "# Excavation Log: $name ($symbol)" > "$log_file" - echo "" >> "$log_file" - echo "**Started:** $(date)" >> "$log_file" - echo "**Status at dig time:** $status" >> "$log_file" - echo "" >> "$log_file" - echo "---" >> "$log_file" - echo "" >> "$log_file" - - # Phase 1: GitHub (highest priority - often deleted first) - if should_run "github"; then - echo "## GitHub Repositories" >> "$log_file" - echo "" >> "$log_file" - - for org in $github_orgs; do - log "Checking GitHub org: $org" - - if $SCAN_ONLY; then - if check_url "https://github.com/$org"; then - success "GitHub org accessible: $org" - echo "- [x] \`$org\` - accessible" >> "$log_file" - else - warn "GitHub org NOT accessible: $org" - echo "- [ ] \`$org\` - NOT accessible" >> "$log_file" - fi - else - log "Running github-history collector on $org..." - # Would call: $SKILLS_DIR/github-history/collect.sh "https://github.com/$org" --org - echo "- Collected: \`$org\`" >> "$log_file" - fi - done - echo "" >> "$log_file" - fi - - # Phase 2: BitcoinTalk - if should_run "btt" || should_run "bitcointalk"; then - echo "## BitcoinTalk Thread" >> "$log_file" - echo "" >> "$log_file" - - if [ -n "$btt_topic" ]; then - local btt_url="https://bitcointalk.org/index.php?topic=$btt_topic" - log "Checking BitcoinTalk topic: $btt_topic" - - if $SCAN_ONLY; then - if check_url "$btt_url"; then - success "BitcoinTalk thread accessible" - echo "- [x] Topic $btt_topic - accessible" >> "$log_file" - else - warn "BitcoinTalk thread NOT accessible" - echo "- [ ] Topic $btt_topic - NOT accessible" >> "$log_file" - fi - else - log "Running bitcointalk collector..." - # Would call: $SKILLS_DIR/bitcointalk/collect.sh "$btt_topic" - echo "- Collected: Topic $btt_topic" >> "$log_file" - fi - else - warn "No BitcoinTalk topic ID in registry" - echo "- [ ] No topic ID recorded" >> "$log_file" - fi - echo "" >> "$log_file" - fi - - # Phase 3: Website via Wayback - if should_run "wayback" || should_run "website"; then - echo "## Website (Wayback Machine)" >> "$log_file" - echo "" >> "$log_file" - - if [ -n "$website" ]; then - log "Checking Wayback Machine for: $website" - local wayback_api="https://archive.org/wayback/available?url=$website" - - if $SCAN_ONLY; then - local wayback_check=$(curl -s "$wayback_api" | jq -r '.archived_snapshots.closest.available // "false"') - if [ "$wayback_check" = "true" ]; then - success "Wayback snapshots available for $website" - echo "- [x] \`$website\` - snapshots available" >> "$log_file" - else - warn "No Wayback snapshots for $website" - echo "- [ ] \`$website\` - no snapshots" >> "$log_file" - fi - else - log "Running wayback collector..." - # Would call: $SKILLS_DIR/job-collector/generate-jobs.sh wayback "$website" - echo "- Collected: \`$website\`" >> "$log_file" - fi - else - warn "No website in registry" - echo "- [ ] No website recorded" >> "$log_file" - fi - echo "" >> "$log_file" - fi - - # Phase 4: Block Explorer - if should_run "explorer"; then - echo "## Block Explorer" >> "$log_file" - echo "" >> "$log_file" - - if [ -n "$explorer" ]; then - log "Checking block explorer: $explorer" - - if $SCAN_ONLY; then - if check_url "https://$explorer"; then - success "Block explorer online: $explorer" - echo "- [x] \`$explorer\` - online" >> "$log_file" - else - warn "Block explorer OFFLINE: $explorer" - echo "- [ ] \`$explorer\` - OFFLINE" >> "$log_file" - fi - else - log "Running block-explorer collector..." - echo "- Collected: \`$explorer\`" >> "$log_file" - fi - else - warn "No explorer in registry" - echo "- [ ] No explorer recorded" >> "$log_file" - fi - echo "" >> "$log_file" - fi - - # Phase 5: Market Data (CMC) - if should_run "cmc" || should_run "market"; then - echo "## Market Data" >> "$log_file" - echo "" >> "$log_file" - - if [ -n "$cmc" ]; then - log "Checking CoinMarketCap: $cmc" - - if $SCAN_ONLY; then - if check_url "https://coinmarketcap.com/currencies/$cmc/"; then - success "CMC page exists: $cmc" - echo "- [x] CMC: \`$cmc\` - exists" >> "$log_file" - else - warn "CMC page NOT found: $cmc" - echo "- [ ] CMC: \`$cmc\` - not found" >> "$log_file" - fi - else - log "Running coinmarketcap collector..." - echo "- Collected: \`$cmc\`" >> "$log_file" - fi - else - warn "No CMC slug in registry" - echo "- [ ] No CMC slug recorded" >> "$log_file" - fi - echo "" >> "$log_file" - fi - - # Finalize log - echo "---" >> "$log_file" - echo "" >> "$log_file" - echo "**Completed:** $(date)" >> "$log_file" - - if $SCAN_ONLY; then - echo "" - success "Scan complete. See: $log_file" - else - echo "" - success "Excavation complete. Output in: $dig_dir" - echo "" - log "Next steps:" - echo " 1. Review: $log_file" - echo " 2. Generate: $dig_dir/SALVAGE-REPORT.md" - echo " 3. Write: $dig_dir/LESSONS.md" - fi -} - -# Parse arguments -if [ $# -lt 1 ]; then - usage -fi - -PROJECT="$1" -shift - -while [ $# -gt 0 ]; do - case "$1" in - --scan-only) - SCAN_ONLY=true - ;; - --resume) - RESUME=true - ;; - --only=*) - ONLY_COLLECTORS="${1#*=}" - ;; - --help) - usage - ;; - *) - error "Unknown option: $1" - usage - ;; - esac - shift -done - -# Run excavation -excavate "$PROJECT" diff --git a/claude/collect/update-index.sh b/claude/collect/update-index.sh deleted file mode 100755 index ee25578..0000000 --- a/claude/collect/update-index.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash -# Hook: update-index.sh -# Called after collection completes to update indexes - -WHITEPAPERS_DIR="${1:-./whitepapers}" - -echo "[update-index] Updating whitepaper index..." - -# Count papers in each category -for category in cryptonote lethean research uncategorized; do - dir="$WHITEPAPERS_DIR/$category" - if [ -d "$dir" ]; then - count=$(find "$dir" -name "*.pdf" 2>/dev/null | wc -l | tr -d ' ') - echo " $category: $count papers" - fi -done - -# Update INDEX.md with collected papers -INDEX="$WHITEPAPERS_DIR/INDEX.md" -if [ -f "$INDEX" ]; then - # Add collected papers section if not exists - if ! grep -q "## Recently Collected" "$INDEX"; then - echo "" >> "$INDEX" - echo "## Recently Collected" >> "$INDEX" - echo "" >> "$INDEX" - echo "_Last updated: $(date +%Y-%m-%d)_" >> "$INDEX" - echo "" >> "$INDEX" - fi -fi - -# Process pending jobs -PENDING="$WHITEPAPERS_DIR/.pending-jobs.txt" -if [ -f "$PENDING" ]; then - count=$(wc -l < "$PENDING" | tr -d ' ') - echo "[update-index] $count papers queued for collection" -fi - -echo "[update-index] Done"