Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/mlc_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
{
"pattern": "https://huskybench\\.com/.*"
},
{
"pattern": "https://corewar\\.co\\.uk"
},
{
"pattern": "https?://(.*\\.)?twitter\\.com/.*"
},
Expand All @@ -35,6 +38,9 @@
},
{
"pattern": "https://www\\.contributor-covenant\\.org/version/2/1/code_of_conduct\\.html"
},
{
"pattern": "https://join\\.slack\\.com/t/swe-bench/shared_invite/.*"
}
]
}
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,16 @@ The winner is the LM agent who wins the most rounds.
## 🧩 Available Arenas

CodeClash includes competitive programming games and simulation-backed arenas, including BattleSnake,
CoreWar, Halite, HuskyBench, RoboCode, RobotRumble, and SCML.
CoreWar, CybORG, Halite, HuskyBench, RoboCode, RobotRumble, and SCML.

SCML is a supply-chain negotiation arena based on the ANAC Supply Chain Management League OneShot
track. Agents edit a Python `scml_agent.py` implementation and compete to maximize average profit
across multiple simulated supply-chain worlds.

CybORG is a simulated cyber-defense arena based on the CAGE Challenge 3 DroneSwarm scenario. Agents
edit a Python `cyborg_agent.py` implementation and compete to maximize blue-team reward across
simulated episodes.

## 🚀 Get Involved

- Check out our [docs](https://docs.codeclash.ai/) for more details on running different arenas, configuring tournaments, etc.
Expand Down
2 changes: 2 additions & 0 deletions codeclash/arenas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from codeclash.arenas.bridge.bridge import BridgeArena
from codeclash.arenas.chess.chess import ChessArena
from codeclash.arenas.corewar.corewar import CoreWarArena
from codeclash.arenas.cyborg.cyborg import CybORGArena
from codeclash.arenas.dummy.dummy import DummyArena
from codeclash.arenas.figgie.figgie import FiggieArena
from codeclash.arenas.gomoku.gomoku import GomokuArena
Expand All @@ -25,6 +26,7 @@
BridgeArena,
ChessArena,
CoreWarArena,
CybORGArena,
DummyArena,
FiggieArena,
GomokuArena,
Expand Down
26 changes: 26 additions & 0 deletions codeclash/arenas/cyborg/CybORG.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM python:3.11-slim-bookworm

ENV DEBIAN_FRONTEND=noninteractive \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
ca-certificates git build-essential jq \
&& rm -rf /var/lib/apt/lists/*

RUN python -m pip install --upgrade pip \
&& git clone https://github.com/cage-challenge/CybORG.git /opt/CybORG \
&& cd /opt/CybORG \
&& git checkout a2d03f99e587af153ae0ac50fb94ba6272e4fff2 \
&& python -m pip install "numpy<1.24" -e /opt/CybORG

WORKDIR /workspace

COPY codeclash/arenas/cyborg/runtime/ /workspace/

RUN git init \
&& git config user.email "player@codeclash.com" \
&& git config user.name "Player" \
&& git add . \
&& git commit -m "Initial CybORG workspace"
3 changes: 3 additions & 0 deletions codeclash/arenas/cyborg/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from codeclash.arenas.cyborg.cyborg import CybORGArena

__all__ = ["CybORGArena"]
128 changes: 128 additions & 0 deletions codeclash/arenas/cyborg/cyborg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import json
import shlex
import subprocess

from codeclash.agents.player import Player
from codeclash.arenas.arena import CodeArena, RoundStats
from codeclash.constants import RESULT_TIE
from codeclash.utils.environment import assert_zero_exit_code

RESULTS_JSON = "cyborg_results.json"


class CybORGArena(CodeArena):
name: str = "CybORG"
submission: str = "cyborg_agent.py"
description: str = """CybORG is a simulated cyber-defense arena based on the CAGE Challenge 3 DroneSwarm scenario.

Your bot is a Python file named `cyborg_agent.py` that defines a class named `MyAgent`.
`MyAgent` should inherit from a CybORG BaseAgent-compatible class, for example:

from CybORG.Agents import RandomAgent

class MyAgent(RandomAgent):
...

Each round evaluates every submitted agent independently on the same seeded DroneSwarm episodes.
Your agent controls the blue-team drone agents through CybORG's simulated PettingZoo interface.
The objective is to maximize average episode reward. This arena uses CybORG simulation only and does
not run real exploit tools or interact with external networks.
"""
default_args: dict = {
"steps_per_episode": 30,
"num_drones": 18,
"timeout": 240,
}

def _game_arg(self, key: str):
return self.game_config.get("args", {}).get(key, self.default_args[key])

def _episodes_per_round(self) -> int:
return int(self.game_config.get("args", {}).get("episodes_per_round", self.game_config["sims_per_round"]))

def validate_code(self, agent: Player) -> tuple[bool, str | None]:
quoted_submission = shlex.quote(self.submission)
file_check = agent.environment.execute(f"test -f {quoted_submission} && echo exists")
if "exists" not in file_check["output"]:
return False, f"Submission file `{self.submission}` not found in the workspace root"

content = agent.environment.execute(f"cat {quoted_submission}")["output"]
if not content.strip():
return False, f"`{self.submission}` is empty"

syntax_check = agent.environment.execute(f"python -m py_compile {quoted_submission}")
if syntax_check["returncode"] != 0:
return False, f"Python syntax error in `{self.submission}`:\n{syntax_check['output']}"

import_check = agent.environment.execute(
"python - <<'PY'\n"
"import importlib.util\n"
f"spec = importlib.util.spec_from_file_location('submission_agent', {self.submission!r})\n"
"module = importlib.util.module_from_spec(spec)\n"
"spec.loader.exec_module(module)\n"
"assert hasattr(module, 'MyAgent'), 'MyAgent class not found'\n"
"from CybORG.Agents import BaseAgent\n"
"assert issubclass(module.MyAgent, BaseAgent), 'MyAgent must inherit from a CybORG BaseAgent class'\n"
"PY"
)
if import_check["returncode"] != 0:
return False, f"Could not import `MyAgent` from `{self.submission}`:\n{import_check['output']}"

return True, None

def execute_round(self, agents: list[Player]) -> None:
agent_args = []
for agent in agents:
agent_args.extend(["--agent", f"{agent.name}=/{agent.name}/{self.submission}"])

cmd = [
"python",
"run_cyborg.py",
"--episodes",
str(self._episodes_per_round()),
"--steps",
str(self._game_arg("steps_per_episode")),
"--drones",
str(self._game_arg("num_drones")),
"--output",
str(self.log_env / RESULTS_JSON),
*agent_args,
]
full_cmd = " ".join(shlex.quote(part) for part in cmd)
self.logger.info(f"Running game: {full_cmd}")
try:
response = self.environment.execute(full_cmd, timeout=int(self._game_arg("timeout")))
except subprocess.TimeoutExpired as exc:
raise RuntimeError("CybORG round timed out") from exc
assert_zero_exit_code(response, logger=self.logger)

def get_results(self, agents: list[Player], round_num: int, stats: RoundStats):
result_file = self.log_round(round_num) / RESULTS_JSON
if not result_file.exists():
self.logger.error(f"Missing result file: {result_file}")
stats.winner = RESULT_TIE
for agent in agents:
stats.scores[agent.name] = 0.0
stats.player_stats[agent.name].score = 0.0
return

with open(result_file) as f:
result = json.load(f)

scores = {agent.name: 0.0 for agent in agents}
for player, score in result.get("average_scores", {}).items():
if player in scores:
scores[player] = float(score)

stats.scores = scores
stats.details = result.get("details", [])
for player, score in scores.items():
stats.player_stats[player].score = score

if not scores:
stats.winner = RESULT_TIE
return

top_score = max(scores.values())
winners = [player for player, score in scores.items() if score == top_score]
stats.winner = winners[0] if len(winners) == 1 else RESULT_TIE
2 changes: 2 additions & 0 deletions codeclash/arenas/cyborg/runtime/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
__pycache__/
*.py[cod]
15 changes: 15 additions & 0 deletions codeclash/arenas/cyborg/runtime/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# CybORG CodeClash Workspace

Edit `cyborg_agent.py`.

Your file must define `MyAgent`, a CybORG `BaseAgent` subclass. A safe starting point is:

```python
from CybORG.Agents import RandomAgent


class MyAgent(RandomAgent):
pass
```

The arena runs simulated CAGE Challenge 3 DroneSwarm episodes and scores agents by average reward.
10 changes: 10 additions & 0 deletions codeclash/arenas/cyborg/runtime/cyborg_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from CybORG.Agents import RandomAgent


class MyAgent(RandomAgent):
"""Baseline CybORG blue-team agent.

Improve this class to choose better defensive actions in the simulated DroneSwarm scenario.
"""

pass
155 changes: 155 additions & 0 deletions codeclash/arenas/cyborg/runtime/run_cyborg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import argparse
import importlib.util
import json
import random
import re
import traceback
from pathlib import Path
from statistics import mean

import numpy as np
from CybORG import CybORG
from CybORG.Agents import BaseAgent
from CybORG.Agents.Wrappers.PettingZooParallelWrapper import PettingZooParallelWrapper
from CybORG.Simulator.Scenarios import DroneSwarmScenarioGenerator

CRASH_SCORE = -1_000_000.0


def safe_module_name(player_name: str) -> str:
safe = re.sub(r"\W+", "_", player_name)
if not safe or safe[0].isdigit():
safe = f"player_{safe}"
return f"codeclash_cyborg_{safe.lower()}"


def load_agent_class(player_name: str, path: str):
spec = importlib.util.spec_from_file_location(safe_module_name(player_name), path)
if spec is None or spec.loader is None:
raise RuntimeError(f"Could not load module spec from {path}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
if not hasattr(module, "MyAgent"):
raise RuntimeError(f"{path} does not define MyAgent")
agent_class = module.MyAgent
if not issubclass(agent_class, BaseAgent):
raise RuntimeError(f"{path} MyAgent must inherit from CybORG BaseAgent")
return agent_class


def make_agent(agent_class: type, agent_name: str):
try:
return agent_class(name=agent_name)
except TypeError:
try:
return agent_class(agent_name)
except TypeError:
return agent_class()


def evaluate_player(
player_name: str,
agent_class: type,
*,
episode_idx: int,
steps: int,
drones: int,
) -> dict:
seed = 4100 + episode_idx
random.seed(seed)
np.random.seed(seed)

try:
scenario = DroneSwarmScenarioGenerator(num_drones=drones)
env = PettingZooParallelWrapper(CybORG(scenario, "sim"))
observations = env.reset()
action_spaces = env.action_spaces
agents = {agent_name: make_agent(agent_class, agent_name) for agent_name in env.possible_agents}

for agent_name, agent in agents.items():
if hasattr(agent, "set_initial_values"):
agent.set_initial_values(action_spaces[agent_name], observations[agent_name])

step_rewards = []
for _ in range(steps):
actions = {
agent_name: agents[agent_name].get_action(observations[agent_name], action_spaces[agent_name])
for agent_name in env.agents
}
observations, rewards, done, _info = env.step(actions)
step_rewards.append(mean(rewards.values()))
if all(done.values()):
break

for agent in agents.values():
if hasattr(agent, "end_episode"):
agent.end_episode()

return {
"player": player_name,
"episode": episode_idx,
"score": float(sum(step_rewards)),
"steps_completed": len(step_rewards),
"status": "ok",
}
except Exception as exc:
return {
"player": player_name,
"episode": episode_idx,
"score": CRASH_SCORE,
"steps_completed": 0,
"status": "error",
"error": f"{type(exc).__name__}: {exc}",
"traceback": traceback.format_exc(limit=5),
}


def parse_agent_arg(value: str) -> tuple[str, str]:
if "=" not in value:
raise argparse.ArgumentTypeError("--agent values must be NAME=/path/to/cyborg_agent.py")
name, path = value.split("=", 1)
if not name:
raise argparse.ArgumentTypeError("agent name cannot be empty")
if not Path(path).exists():
raise argparse.ArgumentTypeError(f"agent path does not exist: {path}")
return name, path


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--agent", action="append", type=parse_agent_arg, required=True)
parser.add_argument("--episodes", type=int, default=3)
parser.add_argument("--steps", type=int, default=30)
parser.add_argument("--drones", type=int, default=18)
parser.add_argument("--output", required=True)
args = parser.parse_args()

agent_classes = {name: load_agent_class(name, path) for name, path in args.agent}
totals = {name: 0.0 for name in agent_classes}
details = []

for episode_idx in range(args.episodes):
for player_name, agent_class in agent_classes.items():
result = evaluate_player(
player_name,
agent_class,
episode_idx=episode_idx,
steps=args.steps,
drones=args.drones,
)
totals[player_name] += result["score"]
details.append(result)

averages = {player: score / args.episodes for player, score in totals.items()}
output = {
"average_scores": averages,
"total_scores": totals,
"episodes": args.episodes,
"details": [json.dumps(item, sort_keys=True) for item in details],
}
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
Path(args.output).write_text(json.dumps(output, indent=2, sort_keys=True))


if __name__ == "__main__":
main()
Loading
Loading