From 837e3141db85bb7df176e8f51e698c9e637f271e Mon Sep 17 00:00:00 2001 From: muhtasham Date: Tue, 5 May 2026 10:34:32 +0200 Subject: [PATCH] Add CybORG arena --- .github/mlc_config.json | 6 + README.md | 6 +- codeclash/arenas/__init__.py | 2 + codeclash/arenas/cyborg/CybORG.Dockerfile | 26 +++ codeclash/arenas/cyborg/__init__.py | 3 + codeclash/arenas/cyborg/cyborg.py | 128 +++++++++++++ codeclash/arenas/cyborg/runtime/.gitignore | 2 + codeclash/arenas/cyborg/runtime/README.md | 15 ++ .../arenas/cyborg/runtime/cyborg_agent.py | 10 ++ codeclash/arenas/cyborg/runtime/run_cyborg.py | 155 ++++++++++++++++ configs/examples/CybORG__dummy__r1__s2.yaml | 33 ++++ docs/index.md | 2 +- docs/reference/arenas/cyborg.md | 74 ++++++++ docs/reference/index.md | 1 + mkdocs.yml | 1 + tests/arenas/test_cyborg.py | 169 ++++++++++++++++++ 16 files changed, 631 insertions(+), 2 deletions(-) create mode 100644 codeclash/arenas/cyborg/CybORG.Dockerfile create mode 100644 codeclash/arenas/cyborg/__init__.py create mode 100644 codeclash/arenas/cyborg/cyborg.py create mode 100644 codeclash/arenas/cyborg/runtime/.gitignore create mode 100644 codeclash/arenas/cyborg/runtime/README.md create mode 100644 codeclash/arenas/cyborg/runtime/cyborg_agent.py create mode 100644 codeclash/arenas/cyborg/runtime/run_cyborg.py create mode 100644 configs/examples/CybORG__dummy__r1__s2.yaml create mode 100644 docs/reference/arenas/cyborg.md create mode 100644 tests/arenas/test_cyborg.py diff --git a/.github/mlc_config.json b/.github/mlc_config.json index a0cec581..673871bc 100644 --- a/.github/mlc_config.json +++ b/.github/mlc_config.json @@ -27,6 +27,9 @@ { "pattern": "https://huskybench\\.com/.*" }, + { + "pattern": "https://corewar\\.co\\.uk" + }, { "pattern": "https?://(.*\\.)?twitter\\.com/.*" }, @@ -35,6 +38,9 @@ }, { "pattern": "https://www\\.contributor-covenant\\.org/version/2/1/code_of_conduct\\.html" + }, + { + "pattern": "https://join\\.slack\\.com/t/swe-bench/shared_invite/.*" } ] } diff --git a/README.md b/README.md index 1e7f5436..6dda5e55 100644 --- a/README.md +++ b/README.md @@ -98,12 +98,16 @@ The winner is the LM agent who wins the most rounds. ## 🧩 Available Arenas CodeClash includes competitive programming games and simulation-backed arenas, including BattleSnake, -CoreWar, Halite, HuskyBench, RoboCode, RobotRumble, and SCML. +CoreWar, CybORG, Halite, HuskyBench, RoboCode, RobotRumble, and SCML. SCML is a supply-chain negotiation arena based on the ANAC Supply Chain Management League OneShot track. Agents edit a Python `scml_agent.py` implementation and compete to maximize average profit across multiple simulated supply-chain worlds. +CybORG is a simulated cyber-defense arena based on the CAGE Challenge 3 DroneSwarm scenario. Agents +edit a Python `cyborg_agent.py` implementation and compete to maximize blue-team reward across +simulated episodes. + ## 🚀 Get Involved - Check out our [docs](https://docs.codeclash.ai/) for more details on running different arenas, configuring tournaments, etc. diff --git a/codeclash/arenas/__init__.py b/codeclash/arenas/__init__.py index 923f151e..700f87d3 100644 --- a/codeclash/arenas/__init__.py +++ b/codeclash/arenas/__init__.py @@ -6,6 +6,7 @@ from codeclash.arenas.bridge.bridge import BridgeArena from codeclash.arenas.chess.chess import ChessArena from codeclash.arenas.corewar.corewar import CoreWarArena +from codeclash.arenas.cyborg.cyborg import CybORGArena from codeclash.arenas.dummy.dummy import DummyArena from codeclash.arenas.figgie.figgie import FiggieArena from codeclash.arenas.gomoku.gomoku import GomokuArena @@ -25,6 +26,7 @@ BridgeArena, ChessArena, CoreWarArena, + CybORGArena, DummyArena, FiggieArena, GomokuArena, diff --git a/codeclash/arenas/cyborg/CybORG.Dockerfile b/codeclash/arenas/cyborg/CybORG.Dockerfile new file mode 100644 index 00000000..e79a81ab --- /dev/null +++ b/codeclash/arenas/cyborg/CybORG.Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.11-slim-bookworm + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates git build-essential jq \ + && rm -rf /var/lib/apt/lists/* + +RUN python -m pip install --upgrade pip \ + && git clone https://github.com/cage-challenge/CybORG.git /opt/CybORG \ + && cd /opt/CybORG \ + && git checkout a2d03f99e587af153ae0ac50fb94ba6272e4fff2 \ + && python -m pip install "numpy<1.24" -e /opt/CybORG + +WORKDIR /workspace + +COPY codeclash/arenas/cyborg/runtime/ /workspace/ + +RUN git init \ + && git config user.email "player@codeclash.com" \ + && git config user.name "Player" \ + && git add . \ + && git commit -m "Initial CybORG workspace" diff --git a/codeclash/arenas/cyborg/__init__.py b/codeclash/arenas/cyborg/__init__.py new file mode 100644 index 00000000..dc5b912a --- /dev/null +++ b/codeclash/arenas/cyborg/__init__.py @@ -0,0 +1,3 @@ +from codeclash.arenas.cyborg.cyborg import CybORGArena + +__all__ = ["CybORGArena"] diff --git a/codeclash/arenas/cyborg/cyborg.py b/codeclash/arenas/cyborg/cyborg.py new file mode 100644 index 00000000..e3ab83ad --- /dev/null +++ b/codeclash/arenas/cyborg/cyborg.py @@ -0,0 +1,128 @@ +import json +import shlex +import subprocess + +from codeclash.agents.player import Player +from codeclash.arenas.arena import CodeArena, RoundStats +from codeclash.constants import RESULT_TIE +from codeclash.utils.environment import assert_zero_exit_code + +RESULTS_JSON = "cyborg_results.json" + + +class CybORGArena(CodeArena): + name: str = "CybORG" + submission: str = "cyborg_agent.py" + description: str = """CybORG is a simulated cyber-defense arena based on the CAGE Challenge 3 DroneSwarm scenario. + +Your bot is a Python file named `cyborg_agent.py` that defines a class named `MyAgent`. +`MyAgent` should inherit from a CybORG BaseAgent-compatible class, for example: + + from CybORG.Agents import RandomAgent + + class MyAgent(RandomAgent): + ... + +Each round evaluates every submitted agent independently on the same seeded DroneSwarm episodes. +Your agent controls the blue-team drone agents through CybORG's simulated PettingZoo interface. +The objective is to maximize average episode reward. This arena uses CybORG simulation only and does + not run real exploit tools or interact with external networks. + """ + default_args: dict = { + "steps_per_episode": 30, + "num_drones": 18, + "timeout": 240, + } + + def _game_arg(self, key: str): + return self.game_config.get("args", {}).get(key, self.default_args[key]) + + def _episodes_per_round(self) -> int: + return int(self.game_config.get("args", {}).get("episodes_per_round", self.game_config["sims_per_round"])) + + def validate_code(self, agent: Player) -> tuple[bool, str | None]: + quoted_submission = shlex.quote(self.submission) + file_check = agent.environment.execute(f"test -f {quoted_submission} && echo exists") + if "exists" not in file_check["output"]: + return False, f"Submission file `{self.submission}` not found in the workspace root" + + content = agent.environment.execute(f"cat {quoted_submission}")["output"] + if not content.strip(): + return False, f"`{self.submission}` is empty" + + syntax_check = agent.environment.execute(f"python -m py_compile {quoted_submission}") + if syntax_check["returncode"] != 0: + return False, f"Python syntax error in `{self.submission}`:\n{syntax_check['output']}" + + import_check = agent.environment.execute( + "python - <<'PY'\n" + "import importlib.util\n" + f"spec = importlib.util.spec_from_file_location('submission_agent', {self.submission!r})\n" + "module = importlib.util.module_from_spec(spec)\n" + "spec.loader.exec_module(module)\n" + "assert hasattr(module, 'MyAgent'), 'MyAgent class not found'\n" + "from CybORG.Agents import BaseAgent\n" + "assert issubclass(module.MyAgent, BaseAgent), 'MyAgent must inherit from a CybORG BaseAgent class'\n" + "PY" + ) + if import_check["returncode"] != 0: + return False, f"Could not import `MyAgent` from `{self.submission}`:\n{import_check['output']}" + + return True, None + + def execute_round(self, agents: list[Player]) -> None: + agent_args = [] + for agent in agents: + agent_args.extend(["--agent", f"{agent.name}=/{agent.name}/{self.submission}"]) + + cmd = [ + "python", + "run_cyborg.py", + "--episodes", + str(self._episodes_per_round()), + "--steps", + str(self._game_arg("steps_per_episode")), + "--drones", + str(self._game_arg("num_drones")), + "--output", + str(self.log_env / RESULTS_JSON), + *agent_args, + ] + full_cmd = " ".join(shlex.quote(part) for part in cmd) + self.logger.info(f"Running game: {full_cmd}") + try: + response = self.environment.execute(full_cmd, timeout=int(self._game_arg("timeout"))) + except subprocess.TimeoutExpired as exc: + raise RuntimeError("CybORG round timed out") from exc + assert_zero_exit_code(response, logger=self.logger) + + def get_results(self, agents: list[Player], round_num: int, stats: RoundStats): + result_file = self.log_round(round_num) / RESULTS_JSON + if not result_file.exists(): + self.logger.error(f"Missing result file: {result_file}") + stats.winner = RESULT_TIE + for agent in agents: + stats.scores[agent.name] = 0.0 + stats.player_stats[agent.name].score = 0.0 + return + + with open(result_file) as f: + result = json.load(f) + + scores = {agent.name: 0.0 for agent in agents} + for player, score in result.get("average_scores", {}).items(): + if player in scores: + scores[player] = float(score) + + stats.scores = scores + stats.details = result.get("details", []) + for player, score in scores.items(): + stats.player_stats[player].score = score + + if not scores: + stats.winner = RESULT_TIE + return + + top_score = max(scores.values()) + winners = [player for player, score in scores.items() if score == top_score] + stats.winner = winners[0] if len(winners) == 1 else RESULT_TIE diff --git a/codeclash/arenas/cyborg/runtime/.gitignore b/codeclash/arenas/cyborg/runtime/.gitignore new file mode 100644 index 00000000..43ae0e2a --- /dev/null +++ b/codeclash/arenas/cyborg/runtime/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +*.py[cod] diff --git a/codeclash/arenas/cyborg/runtime/README.md b/codeclash/arenas/cyborg/runtime/README.md new file mode 100644 index 00000000..1972be69 --- /dev/null +++ b/codeclash/arenas/cyborg/runtime/README.md @@ -0,0 +1,15 @@ +# CybORG CodeClash Workspace + +Edit `cyborg_agent.py`. + +Your file must define `MyAgent`, a CybORG `BaseAgent` subclass. A safe starting point is: + +```python +from CybORG.Agents import RandomAgent + + +class MyAgent(RandomAgent): + pass +``` + +The arena runs simulated CAGE Challenge 3 DroneSwarm episodes and scores agents by average reward. diff --git a/codeclash/arenas/cyborg/runtime/cyborg_agent.py b/codeclash/arenas/cyborg/runtime/cyborg_agent.py new file mode 100644 index 00000000..c3bcff82 --- /dev/null +++ b/codeclash/arenas/cyborg/runtime/cyborg_agent.py @@ -0,0 +1,10 @@ +from CybORG.Agents import RandomAgent + + +class MyAgent(RandomAgent): + """Baseline CybORG blue-team agent. + + Improve this class to choose better defensive actions in the simulated DroneSwarm scenario. + """ + + pass diff --git a/codeclash/arenas/cyborg/runtime/run_cyborg.py b/codeclash/arenas/cyborg/runtime/run_cyborg.py new file mode 100644 index 00000000..4b3288aa --- /dev/null +++ b/codeclash/arenas/cyborg/runtime/run_cyborg.py @@ -0,0 +1,155 @@ +import argparse +import importlib.util +import json +import random +import re +import traceback +from pathlib import Path +from statistics import mean + +import numpy as np +from CybORG import CybORG +from CybORG.Agents import BaseAgent +from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper +from CybORG.Simulator.Scenarios import DroneSwarmScenarioGenerator + +CRASH_SCORE = -1_000_000.0 + + +def safe_module_name(player_name: str) -> str: + safe = re.sub(r"\W+", "_", player_name) + if not safe or safe[0].isdigit(): + safe = f"player_{safe}" + return f"codeclash_cyborg_{safe.lower()}" + + +def load_agent_class(player_name: str, path: str): + spec = importlib.util.spec_from_file_location(safe_module_name(player_name), path) + if spec is None or spec.loader is None: + raise RuntimeError(f"Could not load module spec from {path}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + if not hasattr(module, "MyAgent"): + raise RuntimeError(f"{path} does not define MyAgent") + agent_class = module.MyAgent + if not issubclass(agent_class, BaseAgent): + raise RuntimeError(f"{path} MyAgent must inherit from CybORG BaseAgent") + return agent_class + + +def make_agent(agent_class: type, agent_name: str): + try: + return agent_class(name=agent_name) + except TypeError: + try: + return agent_class(agent_name) + except TypeError: + return agent_class() + + +def evaluate_player( + player_name: str, + agent_class: type, + *, + episode_idx: int, + steps: int, + drones: int, +) -> dict: + seed = 4100 + episode_idx + random.seed(seed) + np.random.seed(seed) + + try: + scenario = DroneSwarmScenarioGenerator(num_drones=drones) + env = PettingZooParallelWrapper(CybORG(scenario, "sim")) + observations = env.reset() + action_spaces = env.action_spaces + agents = {agent_name: make_agent(agent_class, agent_name) for agent_name in env.possible_agents} + + for agent_name, agent in agents.items(): + if hasattr(agent, "set_initial_values"): + agent.set_initial_values(action_spaces[agent_name], observations[agent_name]) + + step_rewards = [] + for _ in range(steps): + actions = { + agent_name: agents[agent_name].get_action(observations[agent_name], action_spaces[agent_name]) + for agent_name in env.agents + } + observations, rewards, done, _info = env.step(actions) + step_rewards.append(mean(rewards.values())) + if all(done.values()): + break + + for agent in agents.values(): + if hasattr(agent, "end_episode"): + agent.end_episode() + + return { + "player": player_name, + "episode": episode_idx, + "score": float(sum(step_rewards)), + "steps_completed": len(step_rewards), + "status": "ok", + } + except Exception as exc: + return { + "player": player_name, + "episode": episode_idx, + "score": CRASH_SCORE, + "steps_completed": 0, + "status": "error", + "error": f"{type(exc).__name__}: {exc}", + "traceback": traceback.format_exc(limit=5), + } + + +def parse_agent_arg(value: str) -> tuple[str, str]: + if "=" not in value: + raise argparse.ArgumentTypeError("--agent values must be NAME=/path/to/cyborg_agent.py") + name, path = value.split("=", 1) + if not name: + raise argparse.ArgumentTypeError("agent name cannot be empty") + if not Path(path).exists(): + raise argparse.ArgumentTypeError(f"agent path does not exist: {path}") + return name, path + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--agent", action="append", type=parse_agent_arg, required=True) + parser.add_argument("--episodes", type=int, default=3) + parser.add_argument("--steps", type=int, default=30) + parser.add_argument("--drones", type=int, default=18) + parser.add_argument("--output", required=True) + args = parser.parse_args() + + agent_classes = {name: load_agent_class(name, path) for name, path in args.agent} + totals = {name: 0.0 for name in agent_classes} + details = [] + + for episode_idx in range(args.episodes): + for player_name, agent_class in agent_classes.items(): + result = evaluate_player( + player_name, + agent_class, + episode_idx=episode_idx, + steps=args.steps, + drones=args.drones, + ) + totals[player_name] += result["score"] + details.append(result) + + averages = {player: score / args.episodes for player, score in totals.items()} + output = { + "average_scores": averages, + "total_scores": totals, + "episodes": args.episodes, + "details": [json.dumps(item, sort_keys=True) for item in details], + } + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + Path(args.output).write_text(json.dumps(output, indent=2, sort_keys=True)) + + +if __name__ == "__main__": + main() diff --git a/configs/examples/CybORG__dummy__r1__s2.yaml b/configs/examples/CybORG__dummy__r1__s2.yaml new file mode 100644 index 00000000..c0e9d54a --- /dev/null +++ b/configs/examples/CybORG__dummy__r1__s2.yaml @@ -0,0 +1,33 @@ +tournament: + rounds: 1 +game: + name: CybORG + sims_per_round: 2 + args: + steps_per_episode: 5 + num_drones: 8 + timeout: 240 +players: +- agent: dummy + name: alpha +- agent: dummy + name: beta +prompts: + game_description: |- + You are a software developer ({{player_id}}) competing in CodeClash's CybORG arena. + + The game is played in {{total_rounds}} rounds. For every round, you and your competitors edit + code that controls a blue-team cyber-defense agent in a simulated CAGE Challenge environment. + This is round {{round}}. + + Your task: improve `cyborg_agent.py`, located in {{working_dir}}. + All commands run from {{working_dir}}. + + Your file must define `MyAgent`, a CybORG BaseAgent subclass. A valid starting point is: + + from CybORG.Agents import RandomAgent + + class MyAgent(RandomAgent): + pass + + The arena runs simulated DroneSwarm episodes. Your objective is to maximize average reward. diff --git a/docs/index.md b/docs/index.md index 881fe2dc..8778c90a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,7 +9,7 @@ Welcome to **CodeClash**, a framework for evaluating Large Language Models (LLMs ## Quick links
- +