=== status === M codex-pr-reviewer/index.js M gemini-pr-reviewer/index.js M pr-review-shared/lib/__tests__/ask-ai.test.js M pr-review-shared/lib/ask-ai.js M pr-review-shared/lib/review-handler.js ?? pr-review-shared/lib/__tests__/review-handler.test.js === diff === diff --git a/codex-pr-reviewer/index.js b/codex-pr-reviewer/index.js index 8218f4cd..3acd9c33 100644 --- a/codex-pr-reviewer/index.js +++ b/codex-pr-reviewer/index.js @@ -6,5 +6,10 @@ dotenv.config(); export default createReviewHandler({ botName: 'Codex', binaryPath: `${process.env.HOME}/.local/bin/codex`, - cliArgs: ['exec', '-m', 'gpt-5.4', '-s', 'read-only'], + // askAI runs this in an empty temp dir (no .git), so codex must not try to + // attach to a repository — --skip-git-repo-check prevents the "not inside a + // git repo" error AND stops codex from inspecting the reviewer app's own tree + // as the repo under review. -s read-only keeps it from writing anything. The + // PR diff is supplied entirely via stdin. + cliArgs: ['exec', '--skip-git-repo-check', '-m', 'gpt-5.4', '-s', 'read-only'], }); diff --git a/gemini-pr-reviewer/index.js b/gemini-pr-reviewer/index.js index a728c11c..bae03c8c 100644 --- a/gemini-pr-reviewer/index.js +++ b/gemini-pr-reviewer/index.js @@ -6,5 +6,9 @@ dotenv.config(); export default createReviewHandler({ botName: 'Gemini', binaryPath: `${process.env.HOME}/.local/bin/gemini`, - cliArgs: ['-p', '-', '-o', 'text', '-m', 'gemini-2.5-pro', '-y'], + // --approval-mode plan = read-only mode (no tool execution). Combined with the + // empty sandbox cwd in askAI, this stops gemini from exploring the reviewer + // app's own source tree and reviewing the wrong repo. The diff is supplied + // entirely via stdin, mirroring Claude's tool-disabled posture. + cliArgs: ['-p', '-', '-o', 'text', '-m', 'gemini-2.5-pro', '--approval-mode', 'plan'], }); diff --git a/pr-review-shared/lib/__tests__/ask-ai.test.js b/pr-review-shared/lib/__tests__/ask-ai.test.js index 1c44325c..8badcfdf 100644 --- a/pr-review-shared/lib/__tests__/ask-ai.test.js +++ b/pr-review-shared/lib/__tests__/ask-ai.test.js @@ -1,5 +1,6 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { EventEmitter } from 'events'; +import { spawn } from 'child_process'; // Mock child_process.spawn let mockChild; @@ -55,4 +56,23 @@ describe('askAI', () => { const result = await promise; expect(result).toBe('chunk1chunk2'); }); + + it('spawns in an isolated temp dir by default (not the bot cwd)', async () => { + const promise = askAI('/usr/bin/test', [], 'prompt'); + mockChild.emit('close', 0); + await promise; + const opts = spawn.mock.calls.at(-1)[2]; + expect(opts.cwd).toBeTruthy(); + // Default sandbox lives under the OS temp dir, never the process cwd. + expect(opts.cwd).not.toBe(process.cwd()); + expect(opts.cwd).toMatch(/pr-review-/); + }); + + it('honors an explicit cwd when provided', async () => { + const promise = askAI('/usr/bin/test', [], 'prompt', { cwd: '/some/explicit/dir' }); + mockChild.emit('close', 0); + await promise; + const opts = spawn.mock.calls.at(-1)[2]; + expect(opts.cwd).toBe('/some/explicit/dir'); + }); }); diff --git a/pr-review-shared/lib/ask-ai.js b/pr-review-shared/lib/ask-ai.js index cc137f95..5bb5f2f0 100644 --- a/pr-review-shared/lib/ask-ai.js +++ b/pr-review-shared/lib/ask-ai.js @@ -1,19 +1,42 @@ import { spawn } from 'child_process'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; /** * Spawn a CLI subprocess, pipe a prompt to stdin, and return stdout. * + * IMPORTANT: agentic CLIs (gemini, codex) inspect their *current working + * directory* for repo context. If we let them inherit the bot's own cwd (the + * reviewer app source tree), they review THAT repo instead of the diff in the + * prompt — the root cause of false "wrong repo" reviews (gbharg/exult #11). + * We therefore run every CLI in a throwaway empty temp dir so there is nothing + * local for the model to read. The PR's diff/context is supplied entirely via + * the prompt on stdin, so the model has no legitimate reason to touch the FS. + * * @param {string} binary - Path to CLI binary (claude, gemini, codex) * @param {string[]} args - CLI arguments * @param {string} prompt - Prompt text to pipe via stdin * @param {object} [options] * @param {number} [options.timeout=180000] - Subprocess timeout in ms + * @param {string} [options.cwd] - Working directory for the subprocess. + * Defaults to a fresh empty temp dir (see note above). * @returns {Promise} stdout output */ -export function askAI(binary, args, prompt, { timeout = 180_000 } = {}) { +export function askAI(binary, args, prompt, { timeout = 180_000, cwd } = {}) { return new Promise((resolve, reject) => { + // Isolate the subprocess in an empty dir so agentic CLIs cannot read the + // reviewer app's own source as "the repo under review". + const sandboxDir = cwd ?? mkdtempSync(join(tmpdir(), 'pr-review-')); + const cleanup = () => { + if (!cwd) { + try { rmSync(sandboxDir, { recursive: true, force: true }); } catch { /* best-effort */ } + } + }; + const child = spawn(binary, args, { timeout, + cwd: sandboxDir, env: { ...process.env }, stdio: ['pipe', 'pipe', 'pipe'], }); @@ -25,6 +48,7 @@ export function askAI(binary, args, prompt, { timeout = 180_000 } = {}) { child.stderr.on('data', (data) => { stderr += data.toString(); }); child.on('close', (code) => { + cleanup(); if (code === 0) { resolve(stdout.trim()); } else { @@ -32,7 +56,10 @@ export function askAI(binary, args, prompt, { timeout = 180_000 } = {}) { } }); - child.on('error', reject); + child.on('error', (err) => { + cleanup(); + reject(err); + }); child.stdin.write(prompt); child.stdin.end(); diff --git a/pr-review-shared/lib/review-handler.js b/pr-review-shared/lib/review-handler.js index 1d0a62e7..d72607c0 100644 --- a/pr-review-shared/lib/review-handler.js +++ b/pr-review-shared/lib/review-handler.js @@ -44,6 +44,23 @@ export function createReviewHandler({ botName, binaryPath, cliArgs }) { const isSynchronize = context.payload.action === "synchronize"; app.log.info(`Processing PR #${pr.number} - ${pr.title} (${context.payload.action})`); + // Repo-context guard: the repo under review is ALWAYS the one that fired + // the webhook (context.payload.repository), at the PR head SHA. Assert it + // agrees with the PR's base repo before doing anything, so we never review + // a diff against the wrong tree. Mismatch => log + abort (don't review). + const webhookRepo = context.payload.repository.full_name; + const prBaseRepo = pr.base?.repo?.full_name; + const prHeadSha = pr.head?.sha; + if (prBaseRepo && prBaseRepo !== webhookRepo) { + app.log.error( + `Repo-context mismatch for PR #${pr.number}: webhook repo '${webhookRepo}' != PR base repo '${prBaseRepo}'. Aborting review.`, + ); + return; + } + app.log.info( + `Repo under review: ${webhookRepo} @ ${prHeadSha ?? "unknown-sha"} (head: ${pr.head?.repo?.full_name ?? "?"})`, + ); + let startedReviewId; try { // Check prior state BEFORE dismissing (dismissal destroys the signal) @@ -124,7 +141,26 @@ export function createReviewHandler({ botName, binaryPath, cliArgs }) { }); } - app.log.info(`Invoking ${botName} CLI for PR #${pr.number} (${prompt.length} chars)...`); + // Final guard before spending a model call: the prompt must contain the + // diff we fetched from THIS PR. If the diff is empty the CLI would be + // tempted to inspect its own working dir, so refuse rather than review. + if (!diff.trim()) { + app.log.error( + `Empty diff for PR #${pr.number} in ${webhookRepo}; aborting to avoid reviewing the wrong tree.`, + ); + await updateStartedReviewNoFiles(context.octokit, { + owner, + repo, + pull_number: pr.number, + reviewId: startedReviewId, + botName, + }); + return; + } + + app.log.info( + `Invoking ${botName} CLI for PR #${pr.number} in ${webhookRepo} (${prompt.length} chars)...`, + ); const raw = await askAI(binaryPath, cliArgs, prompt); const result = parseReviewResponse(raw); const validComments = validateComments(result.comments || [], filtered); DONE_EXIT_OK