fix: issue_claim race — verify assignee after PATCH to prevent duplicate work (#830)

Forgejo's assignees PATCH is last-write-wins, so two dev agents polling concurrently could both observe .assignee == null at the pre-check, both PATCH, and the loser would silently "succeed" and proceed to implement the same issue — colliding at the PR/branch stage. Re-read the assignee after the PATCH and bail out if it isn't self. Label writes are moved AFTER this verification so a losing claim leaves no stray in-progress label to roll back. Adds tests/lib-issue-claim.bats covering the three paths: - happy path (single agent, re-read confirms self) - lost race (re-read shows another agent — returns 1, no labels added) - pre-check skip (initial GET already shows another agent) Prerequisite for the LLAMA_BOTS parametric refactor that will run N dev containers against the same project. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 08:35:18 +00:00 · 2026-04-16 08:35:18 +00:00 · 620515634a
commit 620515634a
parent 2a7ae0b7ea
2 changed files with 198 additions and 0 deletions
--- a/lib/issue-lifecycle.sh
+++ b/lib/issue-lifecycle.sh
@ -132,6 +132,21 @@ issue_claim() {
    "${FORGE_API}/issues/${issue}" \
    -d "{\"assignees\":[\"${me}\"]}" >/dev/null 2>&1 || return 1
  # Verify the PATCH stuck.  Forgejo's assignees PATCH is last-write-wins, so
  # under concurrent claims from multiple dev agents two invocations can both
  # see .assignee == null at the pre-check, both PATCH, and the loser's write
  # gets silently overwritten (issue #830).  Re-reading the assignee closes
  # that TOCTOU window: only the actual winner observes its own login.
  # Labels are intentionally applied AFTER this check so the losing claim
  # leaves no stray "in-progress" label to roll back.
  local actual
  actual=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/issues/${issue}" | jq -r '.assignee.login // ""') || return 1
  if [ "$actual" != "$me" ]; then
    _ilc_log "issue #${issue} claim lost to ${actual:-<none>} — skipping"
    return 1
  fi
  local ip_id bl_id
  ip_id=$(_ilc_in_progress_id)
  bl_id=$(_ilc_backlog_id)
--- a/tests/lib-issue-claim.bats
+++ b/tests/lib-issue-claim.bats
@ -0,0 +1,183 @@
 #!/usr/bin/env bats
 # =============================================================================
 # tests/lib-issue-claim.bats — Regression guard for the issue_claim TOCTOU
 # fix landed in #830.
 #
 # Before the fix, two dev agents polling concurrently could both observe
 # `.assignee == null`, both PATCH the assignee, and Forgejo's last-write-wins
 # semantics would leave the loser believing it had claimed successfully.
 # Two agents would then implement the same issue and collide at the PR/branch
 # stage.
 #
 # The fix re-reads the assignee after the PATCH and aborts when it doesn't
 # match self, with label writes moved AFTER the verification so a losing
 # claim leaves no stray `in-progress` label.
 #
 # These tests stub `curl` with a bash function so each call tree can be
 # driven through a specific response sequence (pre-check, PATCH, re-read)
 # without a live Forgejo. The stub records every HTTP call to
 # `$CALLS_LOG` for assertions.
 # =============================================================================
 setup() {
  ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
  export FACTORY_ROOT="$ROOT"
  export FORGE_TOKEN="dummy-token"
  export FORGE_URL="https://forge.example.test"
  export FORGE_API="${FORGE_URL}/api/v1"
  export CALLS_LOG="${BATS_TEST_TMPDIR}/curl-calls.log"
  : > "$CALLS_LOG"
  export ISSUE_GET_COUNT_FILE="${BATS_TEST_TMPDIR}/issue-get-count"
  echo 0 > "$ISSUE_GET_COUNT_FILE"
  # Scenario knobs — overridden per @test.
  export MOCK_ME="bot"
  export MOCK_INITIAL_ASSIGNEE=""
  export MOCK_RECHECK_ASSIGNEE="bot"
  # Stand-in for lib/env.sh's forge_api (we don't source env.sh — too
  # much unrelated setup). Shape mirrors the real helper closely enough
  # that _ilc_ensure_label_id() works.
  forge_api() {
    local method="$1" path="$2"
    shift 2
    curl -sf -X "$method" \
      -H "Authorization: token ${FORGE_TOKEN}" \
      -H "Content-Type: application/json" \
      "${FORGE_API}${path}" "$@"
  }
  # curl shim — parses method + URL out of the argv and dispatches
  # canned responses per endpoint. Every call gets logged as
  # `METHOD URL` (one line) to $CALLS_LOG for later grep-based asserts.
  curl() {
    local method="GET" url="" arg
    while [ $# -gt 0 ]; do
      arg="$1"
      case "$arg" in
        -X) method="$2"; shift 2 ;;
        -H|-d|--data-binary|-o) shift 2 ;;
        -sf|-s|-f|--silent|--fail) shift ;;
        *) url="$arg"; shift ;;
      esac
    done
    printf '%s %s\n' "$method" "$url" >> "$CALLS_LOG"
    case "$method $url" in
      "GET ${FORGE_URL}/api/v1/user")
        printf '{"login":"%s"}' "$MOCK_ME"
        ;;
      "GET ${FORGE_API}/issues/"*)
        # Distinguish pre-check (first GET) from re-read (subsequent GETs)
        # via a counter file that persists across curl invocations in the
        # same test.
        local n
        n=$(cat "$ISSUE_GET_COUNT_FILE")
        n=$((n + 1))
        echo "$n" > "$ISSUE_GET_COUNT_FILE"
        local who
        if [ "$n" -eq 1 ]; then
          who="$MOCK_INITIAL_ASSIGNEE"
        else
          who="$MOCK_RECHECK_ASSIGNEE"
        fi
        if [ -z "$who" ]; then
          printf '{"assignee":null}'
        else
          printf '{"assignee":{"login":"%s"}}' "$who"
        fi
        ;;
      "PATCH ${FORGE_API}/issues/"*)
        : # accept any PATCH; body is ignored by the mock
        ;;
      "GET ${FORGE_API}/labels")
        printf '[]'
        ;;
      "POST ${FORGE_API}/labels")
        printf '{"id":99}'
        ;;
      "POST ${FORGE_API}/issues/"*"/labels")
        :
        ;;
      "DELETE ${FORGE_API}/issues/"*"/labels/"*)
        :
        ;;
      *)
        return 1
        ;;
    esac
    return 0
  }
  # shellcheck source=../lib/issue-lifecycle.sh
  source "${ROOT}/lib/issue-lifecycle.sh"
 }
 # ── helpers ──────────────────────────────────────────────────────────────────
 # count_calls METHOD URL — count matching lines in $CALLS_LOG.
 count_calls() {
  local method="$1" url="$2"
  grep -cF "${method} ${url}" "$CALLS_LOG" 2>/dev/null || echo 0
 }
 # ── happy path ───────────────────────────────────────────────────────────────
@test "issue_claim returns 0 when re-read confirms self (no regression, single agent)" {
  export MOCK_ME="bot"
  export MOCK_INITIAL_ASSIGNEE=""
  export MOCK_RECHECK_ASSIGNEE="bot"
  run issue_claim 42
  [ "$status" -eq 0 ]
  # Exactly two GETs to /issues/42 — pre-check and post-PATCH re-read.
  [ "$(count_calls GET "${FORGE_API}/issues/42")" -eq 2 ]
  # Assignee PATCH fired.
  [ "$(count_calls PATCH "${FORGE_API}/issues/42")" -eq 1 ]
  # in-progress label added (POST /issues/42/labels).
  [ "$(count_calls POST "${FORGE_API}/issues/42/labels")" -eq 1 ]
 }
 # ── lost race ────────────────────────────────────────────────────────────────
@test "issue_claim returns 1 and leaves no stray in-progress when re-read shows another agent" {
  export MOCK_ME="bot"
  export MOCK_INITIAL_ASSIGNEE=""
  export MOCK_RECHECK_ASSIGNEE="rival"
  run issue_claim 42
  [ "$status" -eq 1 ]
  [[ "$output" == *"claim lost to rival"* ]]
  # Re-read happened (two GETs) — this is the new verification step.
  [ "$(count_calls GET "${FORGE_API}/issues/42")" -eq 2 ]
  # PATCH happened (losers still PATCH before verifying).
  [ "$(count_calls PATCH "${FORGE_API}/issues/42")" -eq 1 ]
  # CRITICAL: no in-progress label operations on a lost claim.
  # (No need to roll back what was never written.)
  [ "$(count_calls POST "${FORGE_API}/issues/42/labels")" -eq 0 ]
  [ "$(count_calls GET "${FORGE_API}/labels")" -eq 0 ]
 }
 # ── pre-check skip ──────────────────────────────────────────────────────────
@test "issue_claim skips early (no PATCH) when pre-check shows another assignee" {
  export MOCK_ME="bot"
  export MOCK_INITIAL_ASSIGNEE="rival"
  export MOCK_RECHECK_ASSIGNEE="rival"
  run issue_claim 42
  [ "$status" -eq 1 ]
  [[ "$output" == *"already assigned to rival"* ]]
  # Only the pre-check GET — no PATCH, no re-read, no labels.
  [ "$(count_calls GET "${FORGE_API}/issues/42")" -eq 1 ]
  [ "$(count_calls PATCH "${FORGE_API}/issues/42")" -eq 0 ]
  [ "$(count_calls POST "${FORGE_API}/issues/42/labels")" -eq 0 ]
 }