name: Issue Deduplicator on: issues: types: - opened - labeled jobs: gather-duplicates-all: name: Identify potential duplicates (all issues) # Prevent runs on forks (requires OpenAI API key, wastes Actions minutes) if: github.repository == 'openai/codex' && (github.event.action == 'opened' || (github.event.action == 'labeled' && github.event.label.name == 'codex-deduplicate')) runs-on: ubuntu-latest permissions: contents: read outputs: issues_json: ${{ steps.normalize-all.outputs.issues_json }} reason: ${{ steps.normalize-all.outputs.reason }} has_matches: ${{ steps.normalize-all.outputs.has_matches }} steps: - uses: actions/checkout@v6 - name: Prepare Codex inputs env: GH_TOKEN: ${{ github.token }} REPO: ${{ github.repository }} ISSUE_NUMBER: ${{ github.event.issue.number }} run: | set -eo pipefail CURRENT_ISSUE_FILE=codex-current-issue.json EXISTING_ALL_FILE=codex-existing-issues-all.json gh issue list --repo "$REPO" \ --json number,title,body,createdAt,updatedAt,state,labels \ --limit 1000 \ --state all \ --search "sort:created-desc" \ | jq '[.[] | { number, title, body: ((.body // "")[0:4000]), createdAt, updatedAt, state, labels: ((.labels // []) | map(.name)) }]' \ > "$EXISTING_ALL_FILE" gh issue view "$ISSUE_NUMBER" \ --repo "$REPO" \ --json number,title,body \ | jq '{number, title, body: ((.body // "")[0:4000])}' \ > "$CURRENT_ISSUE_FILE" echo "Prepared duplicate detection input files." echo "all_issue_count=$(jq 'length' "$EXISTING_ALL_FILE")" # Prompt instructions are intentionally inline in this workflow. The old # .github/prompts/issue-deduplicator.txt file is obsolete and removed. - id: codex-all name: Find duplicates (pass 1, all issues) uses: openai/codex-action@main with: openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }} allow-users: "*" prompt: | You are an assistant that triages new GitHub issues by identifying potential duplicates. You will receive the following JSON files located in the current working directory: - `codex-current-issue.json`: JSON object describing the newly created issue (fields: number, title, body). - `codex-existing-issues-all.json`: JSON array of recent issues with states, timestamps, and labels. Instructions: - Compare the current issue against the existing issues to find up to five that appear to describe the same underlying problem or request. - Prioritize concrete overlap in symptoms, reproduction details, error signatures, and user intent. - Prefer active unresolved issues when confidence is similar. - Closed issues can still be valid duplicates if they clearly match. - Return fewer matches rather than speculative ones. - If confidence is low, return an empty list. - Include at most five issue numbers. - After analysis, provide a short reason for your decision. output-schema: | { "type": "object", "properties": { "issues": { "type": "array", "items": { "type": "string" } }, "reason": { "type": "string" } }, "required": ["issues", "reason"], "additionalProperties": false } - id: normalize-all name: Normalize pass 1 output env: CODEX_OUTPUT: ${{ steps.codex-all.outputs.final-message }} CURRENT_ISSUE_NUMBER: ${{ github.event.issue.number }} run: | set -eo pipefail raw=${CODEX_OUTPUT//$'\r'/} parsed=false issues='[]' reason='' if [ -n "$raw" ] && printf '%s' "$raw" | jq -e 'type == "object" and (.issues | type == "array")' >/dev/null 2>&1; then parsed=true issues=$(printf '%s' "$raw" | jq -c '[.issues[] | tostring]') reason=$(printf '%s' "$raw" | jq -r '.reason // ""') else reason='Pass 1 output was empty or invalid JSON.' fi filtered=$(jq -cn --argjson issues "$issues" --arg current "$CURRENT_ISSUE_NUMBER" '[ $issues[] | tostring | select(. != $current) ] | reduce .[] as $issue ([]; if index($issue) then . else . + [$issue] end) | .[:5]') has_matches=false if [ "$(jq 'length' <<< "$filtered")" -gt 0 ]; then has_matches=true fi echo "Pass 1 parsed: $parsed" echo "Pass 1 matches after filtering: $(jq 'length' <<< "$filtered")" echo "Pass 1 reason: $reason" { echo "issues_json=$filtered" echo "reason<> "$GITHUB_OUTPUT" gather-duplicates-open: name: Identify potential duplicates (open issues fallback) # Pass 1 may drop sudo on the runner, so run the fallback in a fresh job. needs: gather-duplicates-all if: ${{ needs.gather-duplicates-all.result == 'success' && needs.gather-duplicates-all.outputs.has_matches != 'true' }} runs-on: ubuntu-latest permissions: contents: read outputs: issues_json: ${{ steps.normalize-open.outputs.issues_json }} reason: ${{ steps.normalize-open.outputs.reason }} has_matches: ${{ steps.normalize-open.outputs.has_matches }} steps: - uses: actions/checkout@v6 - name: Prepare Codex inputs env: GH_TOKEN: ${{ github.token }} REPO: ${{ github.repository }} ISSUE_NUMBER: ${{ github.event.issue.number }} run: | set -eo pipefail CURRENT_ISSUE_FILE=codex-current-issue.json EXISTING_OPEN_FILE=codex-existing-issues-open.json gh issue list --repo "$REPO" \ --json number,title,body,createdAt,updatedAt,state,labels \ --limit 1000 \ --state open \ --search "sort:created-desc" \ | jq '[.[] | { number, title, body: ((.body // "")[0:4000]), createdAt, updatedAt, state, labels: ((.labels // []) | map(.name)) }]' \ > "$EXISTING_OPEN_FILE" gh issue view "$ISSUE_NUMBER" \ --repo "$REPO" \ --json number,title,body \ | jq '{number, title, body: ((.body // "")[0:4000])}' \ > "$CURRENT_ISSUE_FILE" echo "Prepared fallback duplicate detection input files." echo "open_issue_count=$(jq 'length' "$EXISTING_OPEN_FILE")" - id: codex-open name: Find duplicates (pass 2, open issues) uses: openai/codex-action@main with: openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }} allow-users: "*" prompt: | You are an assistant that triages new GitHub issues by identifying potential duplicates. This is a fallback pass because a broad search did not find convincing matches. You will receive the following JSON files located in the current working directory: - `codex-current-issue.json`: JSON object describing the newly created issue (fields: number, title, body). - `codex-existing-issues-open.json`: JSON array of open issues only. Instructions: - Search only these active unresolved issues for duplicates of the current issue. - Prioritize concrete overlap in symptoms, reproduction details, error signatures, and user intent. - Prefer fewer, higher-confidence matches. - If confidence is low, return an empty list. - Include at most five issue numbers. - After analysis, provide a short reason for your decision. output-schema: | { "type": "object", "properties": { "issues": { "type": "array", "items": { "type": "string" } }, "reason": { "type": "string" } }, "required": ["issues", "reason"], "additionalProperties": false } - id: normalize-open name: Normalize pass 2 output env: CODEX_OUTPUT: ${{ steps.codex-open.outputs.final-message }} CURRENT_ISSUE_NUMBER: ${{ github.event.issue.number }} run: | set -eo pipefail raw=${CODEX_OUTPUT//$'\r'/} parsed=false issues='[]' reason='' if [ -n "$raw" ] && printf '%s' "$raw" | jq -e 'type == "object" and (.issues | type == "array")' >/dev/null 2>&1; then parsed=true issues=$(printf '%s' "$raw" | jq -c '[.issues[] | tostring]') reason=$(printf '%s' "$raw" | jq -r '.reason // ""') else reason='Pass 2 output was empty or invalid JSON.' fi filtered=$(jq -cn --argjson issues "$issues" --arg current "$CURRENT_ISSUE_NUMBER" '[ $issues[] | tostring | select(. != $current) ] | reduce .[] as $issue ([]; if index($issue) then . else . + [$issue] end) | .[:5]') has_matches=false if [ "$(jq 'length' <<< "$filtered")" -gt 0 ]; then has_matches=true fi echo "Pass 2 parsed: $parsed" echo "Pass 2 matches after filtering: $(jq 'length' <<< "$filtered")" echo "Pass 2 reason: $reason" { echo "issues_json=$filtered" echo "reason<> "$GITHUB_OUTPUT" select-final: name: Select final duplicate set needs: - gather-duplicates-all - gather-duplicates-open if: ${{ always() && needs.gather-duplicates-all.result == 'success' && (needs.gather-duplicates-open.result == 'success' || needs.gather-duplicates-open.result == 'skipped') }} runs-on: ubuntu-latest permissions: contents: read outputs: codex_output: ${{ steps.select-final.outputs.codex_output }} steps: - id: select-final name: Select final duplicate set env: PASS1_ISSUES: ${{ needs.gather-duplicates-all.outputs.issues_json }} PASS1_REASON: ${{ needs.gather-duplicates-all.outputs.reason }} PASS2_ISSUES: ${{ needs.gather-duplicates-open.outputs.issues_json }} PASS2_REASON: ${{ needs.gather-duplicates-open.outputs.reason }} PASS1_HAS_MATCHES: ${{ needs.gather-duplicates-all.outputs.has_matches }} PASS2_HAS_MATCHES: ${{ needs.gather-duplicates-open.outputs.has_matches }} run: | set -eo pipefail selected_issues='[]' selected_reason='No plausible duplicates found.' selected_pass='none' if [ "$PASS1_HAS_MATCHES" = "true" ]; then selected_issues=${PASS1_ISSUES:-'[]'} selected_reason=${PASS1_REASON:-'Pass 1 found duplicates.'} selected_pass='all' fi if [ "$PASS2_HAS_MATCHES" = "true" ]; then selected_issues=${PASS2_ISSUES:-'[]'} selected_reason=${PASS2_REASON:-'Pass 2 found duplicates.'} selected_pass='open-fallback' fi final_json=$(jq -cn \ --argjson issues "$selected_issues" \ --arg reason "$selected_reason" \ --arg pass "$selected_pass" \ '{issues: $issues, reason: $reason, pass: $pass}') echo "Final pass used: $selected_pass" echo "Final duplicate count: $(jq '.issues | length' <<< "$final_json")" echo "Final reason: $(jq -r '.reason' <<< "$final_json")" { echo "codex_output<> "$GITHUB_OUTPUT" comment-on-issue: name: Comment with potential duplicates needs: select-final if: ${{ always() && needs.select-final.result == 'success' }} runs-on: ubuntu-latest permissions: contents: read issues: write steps: - name: Comment on issue uses: actions/github-script@v8 env: CODEX_OUTPUT: ${{ needs.select-final.outputs.codex_output }} with: github-token: ${{ github.token }} script: | const raw = process.env.CODEX_OUTPUT ?? ''; let parsed; try { parsed = JSON.parse(raw); } catch (error) { core.info(`Codex output was not valid JSON. Raw output: ${raw}`); core.info(`Parse error: ${error.message}`); return; } const issues = Array.isArray(parsed?.issues) ? parsed.issues : []; const currentIssueNumber = String(context.payload.issue.number); const passUsed = typeof parsed?.pass === 'string' ? parsed.pass : 'unknown'; const reason = typeof parsed?.reason === 'string' ? parsed.reason : ''; console.log(`Current issue number: ${currentIssueNumber}`); console.log(`Pass used: ${passUsed}`); if (reason) { console.log(`Reason: ${reason}`); } console.log(issues); const filteredIssues = [...new Set(issues.map((value) => String(value)))].filter((value) => value !== currentIssueNumber).slice(0, 5); if (filteredIssues.length === 0) { core.info('Codex reported no potential duplicates.'); return; } const lines = [ 'Potential duplicates detected. Please review them and close your issue if it is a duplicate.', '', ...filteredIssues.map((value) => `- #${String(value)}`), '', '*Powered by [Codex Action](https://github.com/openai/codex-action)*']; await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.payload.issue.number, body: lines.join("\n"), }); - name: Remove codex-deduplicate label if: ${{ always() && github.event.action == 'labeled' && github.event.label.name == 'codex-deduplicate' }} env: GH_TOKEN: ${{ github.token }} GH_REPO: ${{ github.repository }} ISSUE_NUMBER: ${{ github.event.issue.number }} run: | gh issue edit "$ISSUE_NUMBER" --remove-label codex-deduplicate || true echo "Attempted to remove label: codex-deduplicate"