Borg/Taskfile.yml
google-labs-jules[bot] c7e3ba297f feat: PDF metadata extraction
This commit introduces a new feature to extract and index metadata from collected PDF files.

The following changes have been made:
- Added a new `pdf` command with a `metadata` subcommand to extract metadata from a single PDF file.
- Added a new `extract-metadata` command to extract metadata from all PDF files within a given archive and create an `INDEX.json` file.
- Added a `--extract-pdf-metadata` flag to the `collect website` command to extract metadata from downloaded PDF files.
- Created a new `pdf` package to encapsulate the PDF metadata extraction logic, which uses the `pdfinfo` command from the `poppler-utils` package.
- Added unit tests for the new `pdf` package, including mocking the `pdfinfo` command.
- Modified `Taskfile.yml` to install `poppler-utils` as a dependency.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
2026-02-02 00:46:59 +00:00

55 lines
1.1 KiB
YAML

version: '3'
tasks:
install-deps:
cmds:
- sudo apt-get update && sudo apt-get install -y poppler-utils
clean:
cmds:
- rm -f borg
build:
deps:
- install-deps
cmds:
- task: clean
- go build -o borg main.go
sources:
- main.go
- ./pkg/**/*.go
generates:
- borg
run:
cmds:
- task: build
- chmod +x borg
- ./borg
deps:
- build
test:
cmds:
- go test -coverprofile=coverage.txt ./...
test-e2e:
cmds:
- task: build
- chmod +x borg
- ./borg --help
wasm:
desc: Build STMF WASM module for browser
cmds:
- mkdir -p dist
- GOOS=js GOARCH=wasm go build -o dist/stmf.wasm ./pkg/wasm/stmf/
- cp "$(go env GOROOT)/lib/wasm/wasm_exec.js" dist/
sources:
- ./pkg/stmf/**/*.go
- ./pkg/wasm/stmf/*.go
generates:
- dist/stmf.wasm
- dist/wasm_exec.js
wasm-js:
desc: Build STMF WASM and JS wrapper
cmds:
- task: wasm
- cp dist/stmf.wasm js/borg-stmf/dist/
- cp dist/wasm_exec.js js/borg-stmf/dist/
deps:
- wasm