diff --git a/.github/mlc_config.json b/.github/mlc_config.json index 9695722f..a0cec581 100644 --- a/.github/mlc_config.json +++ b/.github/mlc_config.json @@ -18,6 +18,15 @@ { "pattern": "https://docs\\.codeclash\\.io" }, + { + "pattern": "https://robotrumble\\.org/boards/2" + }, + { + "pattern": "https://robocode\\.sourceforge\\.io.*" + }, + { + "pattern": "https://huskybench\\.com/.*" + }, { "pattern": "https?://(.*\\.)?twitter\\.com/.*" }, diff --git a/README.md b/README.md index f9bc5b23..1e7f5436 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,15 @@ Critically, *LMs don't play the game directly*. Their code serves as their competitive proxy. The winner is the LM agent who wins the most rounds. +## 🧩 Available Arenas + +CodeClash includes competitive programming games and simulation-backed arenas, including BattleSnake, +CoreWar, Halite, HuskyBench, RoboCode, RobotRumble, and SCML. + +SCML is a supply-chain negotiation arena based on the ANAC Supply Chain Management League OneShot +track. Agents edit a Python `scml_agent.py` implementation and compete to maximize average profit +across multiple simulated supply-chain worlds. + ## 🚀 Get Involved - Check out our [docs](https://docs.codeclash.ai/) for more details on running different arenas, configuring tournaments, etc. diff --git a/codeclash/arenas/__init__.py b/codeclash/arenas/__init__.py index 3958f8b5..923f151e 100644 --- a/codeclash/arenas/__init__.py +++ b/codeclash/arenas/__init__.py @@ -15,6 +15,7 @@ from codeclash.arenas.huskybench.huskybench import HuskyBenchArena from codeclash.arenas.robocode.robocode import RoboCodeArena from codeclash.arenas.robotrumble.robotrumble import RobotRumbleArena +from codeclash.arenas.scml.scml import SCMLOneShotArena ARENAS = [ BattleCode23Arena, @@ -33,6 +34,7 @@ HuskyBenchArena, RoboCodeArena, RobotRumbleArena, + SCMLOneShotArena, ] diff --git a/codeclash/arenas/scml/SCML.Dockerfile b/codeclash/arenas/scml/SCML.Dockerfile new file mode 100644 index 00000000..94c14350 --- /dev/null +++ b/codeclash/arenas/scml/SCML.Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.11-slim-bookworm + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates git build-essential jq \ + && rm -rf /var/lib/apt/lists/* + +RUN python -m pip install --upgrade pip \ + && python -m pip install scml==0.8.2 + +WORKDIR /workspace + +COPY codeclash/arenas/scml/runtime/ /workspace/ + +RUN git init \ + && git config user.email "player@codeclash.com" \ + && git config user.name "Player" \ + && git add . \ + && git commit -m "Initial SCML workspace" diff --git a/codeclash/arenas/scml/__init__.py b/codeclash/arenas/scml/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/codeclash/arenas/scml/__init__.py @@ -0,0 +1 @@ + diff --git a/codeclash/arenas/scml/runtime/.gitignore b/codeclash/arenas/scml/runtime/.gitignore new file mode 100644 index 00000000..43ae0e2a --- /dev/null +++ b/codeclash/arenas/scml/runtime/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +*.py[cod] diff --git a/codeclash/arenas/scml/runtime/README.md b/codeclash/arenas/scml/runtime/README.md new file mode 100644 index 00000000..b9df5b6a --- /dev/null +++ b/codeclash/arenas/scml/runtime/README.md @@ -0,0 +1,15 @@ +# SCML OneShot CodeClash Workspace + +Edit `scml_agent.py`. + +Your file must define `MyAgent`, an SCML OneShot agent class. A safe starting point is: + +```python +from scml.oneshot.agents import GreedySyncAgent + + +class MyAgent(GreedySyncAgent): + pass +``` + +The arena runs multiple SCML2024 OneShot worlds and scores agents by average profit. diff --git a/codeclash/arenas/scml/runtime/run_scml.py b/codeclash/arenas/scml/runtime/run_scml.py new file mode 100644 index 00000000..ab0d054a --- /dev/null +++ b/codeclash/arenas/scml/runtime/run_scml.py @@ -0,0 +1,125 @@ +import argparse +import importlib.util +import json +import random +import re +from pathlib import Path + +import numpy as np +from scml.oneshot import SCML2024OneShotWorld + + +def safe_class_name(player_name: str) -> str: + safe = re.sub(r"\W+", "_", player_name) + if not safe or safe[0].isdigit(): + safe = f"player_{safe}" + return f"CodeClash_{safe}" + + +def load_agent_class(player_name: str, path: str): + module_name = f"codeclash_scml_{safe_class_name(player_name).lower()}" + spec = importlib.util.spec_from_file_location(module_name, path) + if spec is None or spec.loader is None: + raise RuntimeError(f"Could not load module spec from {path}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + if not hasattr(module, "MyAgent"): + raise RuntimeError(f"{path} does not define MyAgent") + base_class = module.MyAgent + return type(safe_class_name(player_name), (base_class,), {"__module__": module.__name__}) + + +def run_world(agent_classes: dict[str, type], *, sim_idx: int, steps: int, lines: int) -> dict: + seed = 1729 + sim_idx + random.seed(seed) + np.random.seed(seed) + + player_names = list(agent_classes.keys()) + offset = sim_idx % len(player_names) + ordered_names = player_names[offset:] + player_names[:offset] + wrapped_classes = [agent_classes[name] for name in ordered_names] + class_to_player = {cls.__name__: player for player, cls in agent_classes.items()} + + config = SCML2024OneShotWorld.generate( + agent_types=wrapped_classes, + agent_processes=[0 for _ in wrapped_classes], + n_steps=steps, + n_processes=1, + n_lines=lines, + random_agent_types=False, + ) + world = SCML2024OneShotWorld( + **config, + no_logs=True, + compact=True, + fast=True, + agent_name_reveals_type=True, + agent_name_reveals_position=True, + ) + world.run() + + raw_scores = world.scores() + player_scores = {player: 0.0 for player in player_names} + details = [] + for agent_id, score in raw_scores.items(): + world_agent = world.agents[agent_id] + player = class_to_player.get(world_agent.short_type_name) + if player is None: + continue + numeric_score = float(score) + player_scores[player] = numeric_score + details.append( + { + "sim": sim_idx, + "player": player, + "world_agent_id": agent_id, + "score": numeric_score, + } + ) + + return {"scores": player_scores, "details": details} + + +def parse_agent_arg(value: str) -> tuple[str, str]: + if "=" not in value: + raise argparse.ArgumentTypeError("--agent values must be NAME=/path/to/scml_agent.py") + name, path = value.split("=", 1) + if not name: + raise argparse.ArgumentTypeError("agent name cannot be empty") + if not Path(path).exists(): + raise argparse.ArgumentTypeError(f"agent path does not exist: {path}") + return name, path + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--agent", action="append", type=parse_agent_arg, required=True) + parser.add_argument("--sims", type=int, default=3) + parser.add_argument("--steps", type=int, default=10) + parser.add_argument("--lines", type=int, default=2) + parser.add_argument("--output", required=True) + args = parser.parse_args() + + agent_classes = {name: load_agent_class(name, path) for name, path in args.agent} + totals = {name: 0.0 for name in agent_classes} + details = [] + + for sim_idx in range(args.sims): + result = run_world(agent_classes, sim_idx=sim_idx, steps=args.steps, lines=args.lines) + for player, score in result["scores"].items(): + totals[player] += score + details.extend(result["details"]) + + averages = {player: score / args.sims for player, score in totals.items()} + output = { + "average_scores": averages, + "total_scores": totals, + "sims": args.sims, + "details": [json.dumps(item, sort_keys=True) for item in details], + } + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + Path(args.output).write_text(json.dumps(output, indent=2, sort_keys=True)) + + +if __name__ == "__main__": + main() diff --git a/codeclash/arenas/scml/runtime/scml_agent.py b/codeclash/arenas/scml/runtime/scml_agent.py new file mode 100644 index 00000000..9ed67d4f --- /dev/null +++ b/codeclash/arenas/scml/runtime/scml_agent.py @@ -0,0 +1,10 @@ +from scml.oneshot.agents import GreedySyncAgent + + +class MyAgent(GreedySyncAgent): + """Baseline SCML OneShot agent. + + Improve this class to negotiate better supply-chain contracts and maximize profit. + """ + + pass diff --git a/codeclash/arenas/scml/scml.py b/codeclash/arenas/scml/scml.py new file mode 100644 index 00000000..f830627a --- /dev/null +++ b/codeclash/arenas/scml/scml.py @@ -0,0 +1,125 @@ +import json +import shlex +import subprocess + +from codeclash.agents.player import Player +from codeclash.arenas.arena import CodeArena, RoundStats +from codeclash.constants import RESULT_TIE +from codeclash.utils.environment import assert_zero_exit_code + +RESULTS_JSON = "scml_results.json" + + +class SCMLOneShotArena(CodeArena): + name: str = "SCML" + submission: str = "scml_agent.py" + description: str = """SCML OneShot is a supply-chain negotiation simulator based on the ANAC Supply Chain Management League. + +Your bot is a Python file named `scml_agent.py` that defines a class named `MyAgent`. +`MyAgent` should inherit from an SCML OneShot agent class, for example: + + from scml.oneshot.agents import GreedySyncAgent + + class MyAgent(GreedySyncAgent): + ... + +Each round runs several SCML2024 OneShot worlds. Your agent negotiates with the other submitted +agents to buy or sell goods in a simulated supply chain. The objective is to maximize profit. The +arena score is your average SCML score across all worlds in the round. +""" + default_args: dict = { + "sims_per_round": 3, + "n_steps": 10, + "n_lines": 2, + "timeout": 180, + } + + def _game_arg(self, key: str): + return self.game_config.get(key, self.default_args[key]) + + def validate_code(self, agent: Player) -> tuple[bool, str | None]: + quoted_submission = shlex.quote(self.submission) + file_check = agent.environment.execute(f"test -f {quoted_submission} && echo exists") + if "exists" not in file_check["output"]: + return False, f"Submission file `{self.submission}` not found in the workspace root" + + content = agent.environment.execute(f"cat {quoted_submission}")["output"] + if not content.strip(): + return False, f"`{self.submission}` is empty" + + syntax_check = agent.environment.execute(f"python -m py_compile {quoted_submission}") + if syntax_check["returncode"] != 0: + return False, f"Python syntax error in `{self.submission}`:\n{syntax_check['output']}" + + import_check = agent.environment.execute( + "python - <<'PY'\n" + "import importlib.util\n" + f"spec = importlib.util.spec_from_file_location('submission_agent', {self.submission!r})\n" + "module = importlib.util.module_from_spec(spec)\n" + "spec.loader.exec_module(module)\n" + "assert hasattr(module, 'MyAgent'), 'MyAgent class not found'\n" + "from scml.oneshot.agent import OneShotAgent\n" + "assert issubclass(module.MyAgent, OneShotAgent), 'MyAgent must inherit from an SCML OneShotAgent class'\n" + "PY" + ) + if import_check["returncode"] != 0: + return False, f"Could not import `MyAgent` from `{self.submission}`:\n{import_check['output']}" + + return True, None + + def execute_round(self, agents: list[Player]) -> None: + agent_args = [] + for agent in agents: + agent_args.extend(["--agent", f"{agent.name}=/{agent.name}/{self.submission}"]) + + cmd = [ + "python", + "run_scml.py", + "--sims", + str(self._game_arg("sims_per_round")), + "--steps", + str(self._game_arg("n_steps")), + "--lines", + str(self._game_arg("n_lines")), + "--output", + str(self.log_env / RESULTS_JSON), + *agent_args, + ] + full_cmd = " ".join(shlex.quote(part) for part in cmd) + self.logger.info(f"Running game: {full_cmd}") + try: + response = self.environment.execute(full_cmd, timeout=int(self._game_arg("timeout"))) + except subprocess.TimeoutExpired as exc: + raise RuntimeError("SCML round timed out") from exc + assert_zero_exit_code(response, logger=self.logger) + + def get_results(self, agents: list[Player], round_num: int, stats: RoundStats): + result_file = self.log_round(round_num) / RESULTS_JSON + if not result_file.exists(): + self.logger.error(f"Missing result file: {result_file}") + stats.winner = RESULT_TIE + for agent in agents: + stats.scores[agent.name] = 0.0 + stats.player_stats[agent.name].score = 0.0 + return + + with open(result_file) as f: + result = json.load(f) + + scores = {agent.name: 0.0 for agent in agents} + for player, score in result.get("average_scores", {}).items(): + if player in scores: + scores[player] = float(score) + + stats.scores = scores + stats.details = result.get("details", []) + for player, score in scores.items(): + stats.player_stats[player].score = score + + if not scores: + stats.winner = RESULT_TIE + return + + top_score = max(scores.values()) + winners = [player for player, score in scores.items() if score == top_score] + stats.winner = winners[0] if len(winners) == 1 else RESULT_TIE diff --git a/configs/examples/SCML__dummy__r1__s2.yaml b/configs/examples/SCML__dummy__r1__s2.yaml new file mode 100644 index 00000000..d87fafb3 --- /dev/null +++ b/configs/examples/SCML__dummy__r1__s2.yaml @@ -0,0 +1,31 @@ +tournament: + rounds: 1 +game: + name: SCML + sims_per_round: 2 + n_steps: 5 + n_lines: 2 + timeout: 240 +players: +- agent: dummy + name: alpha +- agent: dummy + name: beta +prompts: + game_description: |- + You are a software developer ({{player_id}}) competing in CodeClash's SCML OneShot arena. + + The game is played in {{total_rounds}} rounds. For every round, you and your competitors edit + code that controls an autonomous supply-chain negotiation agent. This is round {{round}}. + + Your task: improve `scml_agent.py`, located in {{working_dir}}. + All commands run from {{working_dir}}. + + Your file must define `MyAgent`, an SCML OneShot agent class. A valid starting point is: + + from scml.oneshot.agents import GreedySyncAgent + + class MyAgent(GreedySyncAgent): + pass + + The arena runs multiple SCML2024 OneShot worlds. Your objective is to maximize average profit. diff --git a/docs/reference/arenas/battlecode.md b/docs/reference/arenas/battlecode.md index f73ae12b..c36dd6e5 100644 --- a/docs/reference/arenas/battlecode.md +++ b/docs/reference/arenas/battlecode.md @@ -12,7 +12,7 @@ BattleCode is a programming competition where players write Java code to control ## Implementation -::: codeclash.arenas.battlecode.battlecode.BattleCodeArena +::: codeclash.arenas.battlecode25.battlecode25.BattleCode25Arena options: show_root_heading: true heading_level: 2 diff --git a/docs/reference/arenas/scml.md b/docs/reference/arenas/scml.md new file mode 100644 index 00000000..39d66443 --- /dev/null +++ b/docs/reference/arenas/scml.md @@ -0,0 +1,69 @@ +# SCML + +Supply-chain negotiation arena based on the ANAC Supply Chain Management League OneShot track. + +## Overview + +SCML simulates a supply chain in which autonomous factory-manager agents negotiate contracts to buy +and sell goods. The CodeClash arena uses the SCML2024 OneShot world because it focuses on negotiation +and profit without requiring long-term production scheduling. + +Each CodeClash player edits an SCML OneShot agent. A round runs multiple independent SCML worlds and +scores each player by average profit. + +## Resources + +- [SCML Official Site](https://scml.cs.brown.edu/) +- [SCML Documentation](https://scml.readthedocs.io/) + +## Implementation + +::: codeclash.arenas.scml.scml.SCMLOneShotArena + options: + show_root_heading: true + heading_level: 2 + +## Agent Interface + +Your bot must be a Python file named `scml_agent.py` that defines `MyAgent`. + +`MyAgent` must inherit from an SCML OneShot agent class. A valid starting point is: + +```python +from scml.oneshot.agents import GreedySyncAgent + + +class MyAgent(GreedySyncAgent): + pass +``` + +Agents can use the normal SCML OneShot APIs exposed by the upstream `scml` package. The package is +installed in the SCML arena Docker image, not in CodeClash's core Python environment. + +## Configuration Example + +```yaml +tournament: + rounds: 1 +game: + name: SCML + sims_per_round: 2 + n_steps: 5 + n_lines: 2 +players: + - agent: dummy + name: alpha + - agent: dummy + name: beta +``` + +## Scoring + +The arena runs `sims_per_round` independent SCML2024 OneShot worlds. For each world, it maps SCML +agent scores back to CodeClash player names. The final CodeClash score is the average SCML score +across those worlds. + +The runner rotates player ordering across simulations to reduce positional bias from factory +assignment. + +--8<-- "docs/_footer.md" diff --git a/docs/reference/index.md b/docs/reference/index.md index da44bba6..3957bab9 100644 --- a/docs/reference/index.md +++ b/docs/reference/index.md @@ -22,6 +22,7 @@ Available arenas: - [HuskyBench](arenas/huskybench.md) - [RoboCode](arenas/robocode.md) - [RobotRumble](arenas/robotrumble.md) +- [SCML](arenas/scml.md) ### Players diff --git a/mkdocs.yml b/mkdocs.yml index 0806b138..62193f81 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -80,6 +80,7 @@ nav: - "HuskyBench": "reference/arenas/huskybench.md" - "RoboCode": "reference/arenas/robocode.md" - "RobotRumble": "reference/arenas/robotrumble.md" + - "SCML": "reference/arenas/scml.md" - "DummyArena": "reference/arenas/dummy.md" - Player: - "Player (Abstract)": "reference/player/player.md" diff --git a/pyproject.toml b/pyproject.toml index 188ad0ef..8d9324e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ ] dependencies = [ "litellm", - "mini-swe-agent", + "mini-swe-agent<2", "portkey-ai", "python-dotenv", "ghapi", diff --git a/tests/arenas/test_scml.py b/tests/arenas/test_scml.py new file mode 100644 index 00000000..6ad44c75 --- /dev/null +++ b/tests/arenas/test_scml.py @@ -0,0 +1,109 @@ +import json + +from codeclash.arenas.arena import RoundStats +from codeclash.arenas.scml.scml import SCMLOneShotArena +from codeclash.constants import RESULT_TIE + +from .conftest import MockPlayer + + +class TestSCMLValidation: + def test_valid_agent(self, mock_player_factory): + arena = SCMLOneShotArena.__new__(SCMLOneShotArena) + arena.submission = "scml_agent.py" + player = mock_player_factory( + name="Alice", + files={"scml_agent.py": "class MyAgent:\n pass\n"}, + command_outputs={ + "test -f scml_agent.py && echo exists": {"output": "exists\n", "returncode": 0}, + "cat scml_agent.py": {"output": "class MyAgent:\n pass\n", "returncode": 0}, + "python -m py_compile scml_agent.py": {"output": "", "returncode": 0}, + "python - <<'PY'": {"output": "", "returncode": 0}, + }, + ) + + valid, error = arena.validate_code(player) + + assert valid is True + assert error is None + + def test_missing_myagent(self, mock_player_factory): + arena = SCMLOneShotArena.__new__(SCMLOneShotArena) + arena.submission = "scml_agent.py" + player = mock_player_factory( + name="Alice", + files={"scml_agent.py": "class OtherAgent:\n pass\n"}, + command_outputs={ + "test -f scml_agent.py && echo exists": {"output": "exists\n", "returncode": 0}, + "cat scml_agent.py": {"output": "class OtherAgent:\n pass\n", "returncode": 0}, + "python -m py_compile scml_agent.py": {"output": "", "returncode": 0}, + "python - <<'PY'": {"output": "MyAgent class not found", "returncode": 1}, + }, + ) + + valid, error = arena.validate_code(player) + + assert valid is False + assert "Could not import" in error + + def test_import_failure(self, mock_player_factory): + arena = SCMLOneShotArena.__new__(SCMLOneShotArena) + arena.submission = "scml_agent.py" + player = mock_player_factory( + name="Alice", + files={"scml_agent.py": "class MyAgent:\n pass\n"}, + command_outputs={ + "test -f scml_agent.py && echo exists": {"output": "exists\n", "returncode": 0}, + "cat scml_agent.py": {"output": "class MyAgent:\n pass\n", "returncode": 0}, + "python -m py_compile scml_agent.py": {"output": "", "returncode": 0}, + "python - <<'PY'": {"output": "ImportError", "returncode": 1}, + }, + ) + + valid, error = arena.validate_code(player) + + assert valid is False + assert "Could not import" in error + + +class TestSCMLResults: + def test_parse_winner(self, tmp_log_dir): + arena = SCMLOneShotArena.__new__(SCMLOneShotArena) + arena.log_local = tmp_log_dir + arena.logger = type("Logger", (), {"error": lambda self, msg: None})() + round_dir = tmp_log_dir / "rounds" / "1" + round_dir.mkdir(parents=True) + (round_dir / "scml_results.json").write_text( + json.dumps( + { + "average_scores": {"Alice": 1.25, "Bob": 0.75}, + "details": ['{"sim": 0, "player": "Alice", "score": 1.25}'], + } + ) + ) + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, 1, stats) + + assert stats.winner == "Alice" + assert stats.scores == {"Alice": 1.25, "Bob": 0.75} + assert stats.player_stats["Alice"].score == 1.25 + assert stats.details == ['{"sim": 0, "player": "Alice", "score": 1.25}'] + + def test_parse_tie(self, tmp_log_dir): + arena = SCMLOneShotArena.__new__(SCMLOneShotArena) + arena.log_local = tmp_log_dir + arena.logger = type("Logger", (), {"error": lambda self, msg: None})() + round_dir = tmp_log_dir / "rounds" / "1" + round_dir.mkdir(parents=True) + (round_dir / "scml_results.json").write_text(json.dumps({"average_scores": {"Alice": 1, "Bob": 1}})) + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, 1, stats) + + assert stats.winner == RESULT_TIE + assert stats.scores == {"Alice": 1.0, "Bob": 1.0} diff --git a/uv.lock b/uv.lock index 8261e376..02b740eb 100644 --- a/uv.lock +++ b/uv.lock @@ -397,7 +397,7 @@ requires-dist = [ { name = "jinja2" }, { name = "litellm" }, { name = "markupsafe" }, - { name = "mini-swe-agent" }, + { name = "mini-swe-agent", specifier = "<2" }, { name = "mkdocs-glightbox", marker = "extra == 'docs'" }, { name = "mkdocs-include-markdown-plugin", marker = "extra == 'docs'" }, { name = "mkdocs-material", marker = "extra == 'docs'" },