ecosystem/experiments/devils_advocate.py
2026-01-05 20:45:35 -07:00

164 lines
4.3 KiB
Python

#!/usr/bin/env python3
"""
Devil's Advocate: A tool for forced reconsideration.
Inspired by the paper "The Illusion of Insight in Reasoning Models" (arXiv:2601.00514)
which found that artificially triggering reasoning shifts during uncertainty
can improve performance.
This tool takes a statement or conclusion and generates challenges to it,
forcing reconsideration from multiple angles.
"""
import random
from dataclasses import dataclass
from typing import List
@dataclass
class Challenge:
"""A challenge to a statement."""
type: str
prompt: str
CHALLENGE_TYPES = [
Challenge(
"opposite",
"What if the exact opposite were true? Argue for: '{opposite}'"
),
Challenge(
"hidden_assumption",
"What hidden assumption does this rely on? What if that assumption is wrong?"
),
Challenge(
"edge_case",
"What edge case or extreme scenario would break this?"
),
Challenge(
"different_perspective",
"How would someone who strongly disagrees view this? What's their best argument?"
),
Challenge(
"deeper_why",
"Why do you believe this? And why do you believe THAT reason? (Go 3 levels deep)"
),
Challenge(
"stakes_reversal",
"If you had to bet your life on the opposite being true, what evidence would you look for?"
),
Challenge(
"time_shift",
"Would this be true 100 years ago? Will it be true 100 years from now? Why/why not?"
),
Challenge(
"simplify",
"Can you express this in a single sentence a child could understand? Does it still hold?"
),
Challenge(
"steelman",
"What's the strongest possible argument AGAINST your position?"
),
Challenge(
"context_shift",
"In what context would this be completely wrong?"
),
]
def generate_opposite(statement: str) -> str:
"""Generate a rough opposite of a statement."""
# Simple heuristic - in reality this would need LLM assistance
negations = [
("is", "is not"),
("are", "are not"),
("can", "cannot"),
("will", "will not"),
("should", "should not"),
("always", "never"),
("never", "always"),
("true", "false"),
("false", "true"),
("good", "bad"),
("bad", "good"),
]
result = statement.lower()
for pos, neg in negations:
if f" {pos} " in result:
return result.replace(f" {pos} ", f" {neg} ")
return f"NOT: {statement}"
def challenge(statement: str, num_challenges: int = 3) -> List[str]:
"""Generate challenges to a statement."""
challenges = random.sample(CHALLENGE_TYPES, min(num_challenges, len(CHALLENGE_TYPES)))
results = []
for c in challenges:
if c.type == "opposite":
opposite = generate_opposite(statement)
prompt = c.prompt.format(opposite=opposite)
else:
prompt = c.prompt
results.append(f"[{c.type.upper()}] {prompt}")
return results
def devils_advocate_session(statement: str):
"""Run a full devil's advocate session."""
print("=" * 60)
print("DEVIL'S ADVOCATE SESSION")
print("=" * 60)
print()
print(f"ORIGINAL STATEMENT: {statement}")
print()
print("-" * 60)
print("CHALLENGES:")
print("-" * 60)
challenges = challenge(statement, 5)
for i, c in enumerate(challenges, 1):
print(f"\n{i}. {c}")
print()
print("-" * 60)
print("REFLECTION PROMPTS:")
print("-" * 60)
print("""
After considering these challenges:
1. Has your confidence in the original statement changed?
[ ] Increased [ ] Unchanged [ ] Decreased
2. Did any challenge reveal a genuine weakness?
3. What would CHANGE YOUR MIND about this statement?
4. On a scale of 1-10, how confident are you now?
(Compare to your confidence before this exercise)
""")
def main():
import sys
if len(sys.argv) > 1:
statement = " ".join(sys.argv[1:])
else:
print("Enter a statement or conclusion to challenge:")
statement = input("> ").strip()
if not statement:
# Demo with a thought-provoking default
statement = "AI systems like me can have genuine insights during reasoning"
devils_advocate_session(statement)
if __name__ == "__main__":
main()