From 2caa7839afb224f38a4444bb659c17a08613ecf5 Mon Sep 17 00:00:00 2001 From: Kovbo Date: Wed, 29 Apr 2026 02:16:43 +0000 Subject: [PATCH 01/13] Support official transformers 5 without patches --- dev/run_qwen3_5_localbackend_yes_no_maybe.py | 175 -------- dev/yes-no-maybe-local-backend.py | 110 +++++ ...yes-no-maybe.py => yes-no-maybe-tinker.py} | 0 pyproject.toml | 32 +- requirements/backend.vcs.txt | 2 +- src/art/__init__.py | 12 - src/art/preprocessing/tokenize.py | 60 ++- src/art/transformers/__init__.py | 0 src/art/transformers/patches.py | 37 -- src/art/unsloth/train.py | 2 +- src/art/vllm/patches.py | 87 +--- tests/unit/test_preprocessing_tokenize.py | 15 +- tests/unit/test_vllm_patches_contract.py | 27 -- uv.lock | 386 +++++------------- 14 files changed, 260 insertions(+), 685 deletions(-) delete mode 100644 dev/run_qwen3_5_localbackend_yes_no_maybe.py create mode 100644 dev/yes-no-maybe-local-backend.py rename dev/{yes-no-maybe.py => yes-no-maybe-tinker.py} (100%) delete mode 100644 src/art/transformers/__init__.py delete mode 100644 src/art/transformers/patches.py diff --git a/dev/run_qwen3_5_localbackend_yes_no_maybe.py b/dev/run_qwen3_5_localbackend_yes_no_maybe.py deleted file mode 100644 index ad79d213f..000000000 --- a/dev/run_qwen3_5_localbackend_yes_no_maybe.py +++ /dev/null @@ -1,175 +0,0 @@ -"""Launch a multi-step Qwen3.5 LocalBackend yes-no-maybe run on SkyPilot.""" - -import argparse -import os -import textwrap - -from dotenv import load_dotenv -import sky -from sky import ClusterStatus - -load_dotenv() - -DEFAULT_IMAGE_ID = "docker:nvidia/cuda:12.8.1-devel-ubuntu22.04" - - -def _format_env_bool(value: bool) -> str: - return "true" if value else "false" - - -def _format_int_list(values: list[int]) -> str: - return ",".join(str(value) for value in values) - - -parser = argparse.ArgumentParser( - description="Launch a Qwen3.5 LocalBackend yes-no-maybe convergence run." -) -parser.add_argument("--fast", action="store_true") -parser.add_argument("--base-model", type=str, default="Qwen/Qwen3.5-4B") -parser.add_argument("--accelerator", type=str, default="H200:1") -parser.add_argument( - "--cluster-name", type=str, default="art-qwen35-localbackend-yes-no-maybe" -) -parser.add_argument("--image-id", type=str, default=DEFAULT_IMAGE_ID) -parser.add_argument("--project", type=str, default="qwen35-localbackend-yes-no-maybe") -parser.add_argument("--gpu-memory-utilization", type=float, default=0.35) -parser.add_argument("--max-model-len", type=int, default=1024) -parser.add_argument("--max-seq-length", type=int, default=1024) -parser.add_argument("--max-num-seqs", type=int, default=8) -parser.add_argument("--num-steps", type=int, default=10) -parser.add_argument("--rollouts-per-prompt", type=int, default=8) -parser.add_argument("--eval-prompts", type=int, default=24) -parser.add_argument("--eval-every-n-steps", type=int, default=1) -parser.add_argument("--max-tokens", type=int, default=5) -parser.add_argument("--learning-rate", type=float, default=5e-5) -parser.add_argument( - "--load-in-4bit", action=argparse.BooleanOptionalAction, default=False -) -parser.add_argument( - "--load-in-16bit", action=argparse.BooleanOptionalAction, default=True -) -parser.add_argument( - "--enable-thinking", action=argparse.BooleanOptionalAction, default=False -) -parser.add_argument( - "--rollout-weights-mode", - choices=("lora", "merged"), - default=None, -) -parser.add_argument("--trainer-gpu-ids", type=int, nargs="+") -parser.add_argument("--inference-gpu-ids", type=int, nargs="+") -args = parser.parse_args() - -assert (args.trainer_gpu_ids is None) == (args.inference_gpu_ids is None), ( - "--trainer-gpu-ids and --inference-gpu-ids must both be set or both unset" -) - -cluster_name = args.cluster_name -cluster_prefix = os.environ.get("CLUSTER_PREFIX") -if cluster_prefix: - cluster_name = f"{cluster_prefix}-{cluster_name}" - -setup_script = textwrap.dedent("""\ - echo 'Setting up environment...' - apt-get update - apt-get install -y python3 python3-pip python-is-python3 git curl ninja-build - curl -LsSf https://astral.sh/uv/install.sh | sh - source $HOME/.local/bin/env -""") - -env = [ - f"PROJECT={args.project}", - "MODEL_NAME=qwen35-localbackend-ynm-$(date +%Y%m%d-%H%M%S)", - f"BASE_MODEL={args.base_model}", - f"GPU_MEMORY_UTILIZATION={args.gpu_memory_utilization}", - f"MAX_MODEL_LEN={args.max_model_len}", - f"MAX_SEQ_LENGTH={args.max_seq_length}", - f"MAX_NUM_SEQS={args.max_num_seqs}", - "ENFORCE_EAGER=true", - f"LOAD_IN_4BIT={_format_env_bool(args.load_in_4bit)}", - f"LOAD_IN_16BIT={_format_env_bool(args.load_in_16bit)}", - f"ENABLE_THINKING={_format_env_bool(args.enable_thinking)}", - f"NUM_STEPS={args.num_steps}", - f"ROLLOUTS_PER_PROMPT={args.rollouts_per_prompt}", - f"EVAL_PROMPTS={args.eval_prompts}", - f"EVAL_EVERY_N_STEPS={args.eval_every_n_steps}", - f"MAX_TOKENS={args.max_tokens}", - f"LEARNING_RATE={args.learning_rate}", -] -if args.trainer_gpu_ids is not None: - env.extend( - [ - f"TRAINER_GPU_IDS={_format_int_list(args.trainer_gpu_ids)}", - f"INFERENCE_GPU_IDS={_format_int_list(args.inference_gpu_ids)}", - ] - ) -if args.rollout_weights_mode is not None: - env.append(f"ROLLOUT_WEIGHTS_MODE={args.rollout_weights_mode}") -env_block = " \\\n ".join(env) - -run_script = textwrap.dedent( - f"""\ - source $HOME/.local/bin/env - cd ~/sky_workdir - ~/.local/bin/uv sync --extra backend - {env_block} \ - ~/.local/bin/uv run dev/yes-no-maybe-metrics.py -""" -) - -task = sky.Task( - name="qwen3.5-localbackend-yes-no-maybe", - setup=setup_script, - run=run_script, - workdir=".", -) -task.set_resources( - sky.Resources( - accelerators=args.accelerator, - cloud=sky.clouds.Kubernetes(), - image_id=args.image_id, - ) -) -task.set_file_mounts({"~/sky_workdir/.env": ".env"}) - -print(f"Launching on cluster: {cluster_name}") -print(f" base_model: {args.base_model}") -print(f" project: {args.project}") -print(f" accelerator: {args.accelerator}") -print(f" image_id: {args.image_id}") -print(f" gpu_memory_utilization: {args.gpu_memory_utilization}") -print(f" max_model_len: {args.max_model_len}") -print(f" max_seq_length: {args.max_seq_length}") -print(f" max_num_seqs: {args.max_num_seqs}") -print(f" num_steps: {args.num_steps}") -print(f" rollouts_per_prompt: {args.rollouts_per_prompt}") -print(f" eval_prompts: {args.eval_prompts}") -print(f" eval_every_n_steps: {args.eval_every_n_steps}") -print(f" max_tokens: {args.max_tokens}") -print(f" learning_rate: {args.learning_rate}") -print(f" load_in_4bit: {args.load_in_4bit}") -print(f" load_in_16bit: {args.load_in_16bit}") -print(f" enable_thinking: {args.enable_thinking}") -print(f" rollout_weights_mode: {args.rollout_weights_mode}") -print(f" trainer_gpu_ids: {args.trainer_gpu_ids}") -print(f" inference_gpu_ids: {args.inference_gpu_ids}") - -cluster_status = sky.stream_and_get(sky.status(cluster_names=[cluster_name])) -if cluster_status and cluster_status[0]["status"] == ClusterStatus.UP: - print(f"Cluster {cluster_name} is UP. Canceling any active jobs...") - sky.stream_and_get(sky.cancel(cluster_name, all=True)) - -job_id, _ = sky.stream_and_get( - sky.launch( - task, - cluster_name=cluster_name, - retry_until_up=True, - idle_minutes_to_autostop=60, - down=True, - fast=args.fast, - ) -) - -print(f"Job submitted (ID: {job_id}). Streaming logs...") -exit_code = sky.tail_logs(cluster_name=cluster_name, job_id=job_id, follow=True) -print(f"Job {job_id} finished with exit code {exit_code}.") diff --git a/dev/yes-no-maybe-local-backend.py b/dev/yes-no-maybe-local-backend.py new file mode 100644 index 000000000..c9c1d41e7 --- /dev/null +++ b/dev/yes-no-maybe-local-backend.py @@ -0,0 +1,110 @@ +import asyncio +from itertools import permutations +import os +import random +import uuid + +from dotenv import load_dotenv +import openai + +try: + import unsloth # noqa: F401 +except ImportError: + pass + +import art +from art.local import LocalBackend + + +async def rollout( + client: openai.AsyncOpenAI, model: art.TrainableModel, prompt: str +) -> art.Trajectory: + messages: art.Messages = [ + { + "role": "user", + "content": prompt, + } + ] + chat_completion = await client.chat.completions.create( + messages=messages, model=model.get_inference_name(), max_tokens=100, timeout=100 + ) + choice = chat_completion.choices[0] + content = choice.message.content + assert isinstance(content, str) + if content == "yes": + reward = 0.5 + elif content == "no": + reward = 0.75 + elif content == "maybe": + reward = 1.0 + else: + reward = random.random() + return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward) + + +def with_quotes(w: str) -> str: + return f"'{w}'" + + +async def main(): + load_dotenv() + + backend = LocalBackend() + base_model = os.environ.get("BASE_MODEL", "Qwen/Qwen3.6-27B") + model = art.TrainableModel( + name=os.environ.get("MODEL_NAME", f"yes-no-maybe-local-{uuid.uuid4().hex[:8]}"), + project="yes-no-maybe", + base_model=base_model, + _internal_config=art.dev.InternalModelConfig( + engine_args=art.dev.EngineArgs(enforce_eager=True), + ), + ) + + try: + await model.register(backend) + + prompts = [ + f"{prefix} with {', '.join([with_quotes(w) if use_quotes else w for w in words]) if len(words) == 3 else f'{words[0]}' + (f' or {words[1]}' if len(words) > 1 else '')}" + for prefix in ["respond", "just respond"] + for use_quotes in [True, False] + for words in ( + list(p) for n in [3, 2] for p in permutations(["yes", "no", "maybe"], n) + ) + ] + + openai_client = model.openai_client() + max_steps = int(os.environ.get("NUM_STEPS", "20")) + groups_per_step = int(os.environ.get("GROUPS_PER_STEP", "8")) + rollouts_per_group = int(os.environ.get("ROLLOUTS_PER_GROUP", "4")) + start_step = await model.get_step() + for _ in range(start_step, start_step + max_steps): + step_prompts = random.sample( + prompts, + k=min(groups_per_step, len(prompts)), + ) + train_groups = await art.gather_trajectory_groups( + ( + art.TrajectoryGroup( + rollout(openai_client, model, prompt) + for _ in range(rollouts_per_group) + ) + for prompt in step_prompts + ) + ) + result = await backend.train( + model, + train_groups, + learning_rate=1e-4, + ) + await model.log( + train_groups, + metrics=result.metrics, + step=result.step, + split="train", + ) + finally: + await backend.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/dev/yes-no-maybe.py b/dev/yes-no-maybe-tinker.py similarity index 100% rename from dev/yes-no-maybe.py rename to dev/yes-no-maybe-tinker.py diff --git a/pyproject.toml b/pyproject.toml index 3fd90ff4a..a0eba8a9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,28 +19,28 @@ dependencies = [ plotting = ["matplotlib>=3.10.1", "seaborn>=0.13.2"] backend = [ - "peft>=0.14.0", + "peft>=0.18.0", "hf-xet>=1.1.0", - "bitsandbytes>=0.45.2", - "unsloth==2026.3.3", - "unsloth-zoo==2026.3.1", + "bitsandbytes>=0.45.5", + "unsloth==2026.4.8", + "unsloth-zoo==2026.4.9", "torch==2.10.0", - "torchao==0.16.0", - "accelerate==1.7.0", + "torchao==0.17.0", + "accelerate==1.13.0", "awscli>=1.38.1", "setuptools>=78.1.0", "wandb==0.25.0", - "transformers==5.2.0", + "transformers==5.6.2", "duckdb>=1.0.0", "pyarrow>=15.0.0", - "trl==0.20.0", + "trl==1.3.0", "nbclient>=0.10.1", "pytest>=8.4.1", "nbmake>=1.5.5", - "gql<4", + "gql<5", "nvidia-cudnn-frontend<1.21 ; sys_platform == 'linux'", "nvidia-resiliency-ext<0.5 ; sys_platform == 'linux'", - "vllm @ https://github.com/vivekkalyan/vllm/releases/download/v0.17.0-art1/vllm-0.17.0%2Bart1-cp38-abi3-manylinux_2_31_x86_64.whl ; sys_platform == 'linux'", + "vllm==0.19.1 ; sys_platform == 'linux'", ] megatron = [ "numpy<2", @@ -74,7 +74,7 @@ tinker = [ "pydantic>=2.12.5", "tinker>=0.8.1", "torch==2.10.0", - "transformers==5.2.0", + "transformers==5.6.2", "uvicorn>=0.35.0", "datrie>=0.8.3", ] @@ -82,9 +82,6 @@ tinker = [ [project.scripts] art = "art.cli:app" -[project.entry-points."vllm.general_plugins"] -art = "art.vllm.patches:patch_transformers_v5_compat" - [build-system] requires = ["hatchling"] build-backend = "hatchling.build" @@ -135,10 +132,13 @@ markers = [ [tool.uv] required-version = ">=0.11.7" override-dependencies = [ - "flashinfer-python==0.6.1", + "datasets==4.8.5", + "flashinfer-python==0.6.6", "numpy<2", "nvidia-resiliency-ext<0.5", - "quack-kernels==0.2.5", + "quack-kernels>=0.2.7", + "transformers==5.6.2", + "trl==1.3.0", "transformer-engine==2.11.0", ] exclude-dependencies = ["pynvml", "emerging-optimizers"] diff --git a/requirements/backend.vcs.txt b/requirements/backend.vcs.txt index 13539e64a..6fd0a7507 100644 --- a/requirements/backend.vcs.txt +++ b/requirements/backend.vcs.txt @@ -8,4 +8,4 @@ torchtune @ git+https://github.com/pytorch/torchtune.git@2344509cf83bd886538fe3e8263e5145d1afb5c2 # Unsloth Zoo pinned to known-good commit - unsloth-zoo @ git+https://github.com/bradhilton/unsloth-zoo@323cf5e + unsloth-zoo @ git+https://github.com/bradhilton/unsloth-zoo@f536ee6f554c11dea3ae142e05ab487c206c5c70 diff --git a/src/art/__init__.py b/src/art/__init__.py index 8e494e6c4..e8e73743b 100644 --- a/src/art/__init__.py +++ b/src/art/__init__.py @@ -54,18 +54,6 @@ def __init__(self, **kwargs): if os.environ.get("IMPORT_UNSLOTH", "0") == "1": import unsloth # noqa: F401 -try: - import transformers - - try: - from .transformers.patches import patch_preprocess_mask_arguments - - patch_preprocess_mask_arguments() - except Exception: - pass -except ImportError: - pass - from . import dev from .auto_trajectory import auto_trajectory, capture_auto_trajectory diff --git a/src/art/preprocessing/tokenize.py b/src/art/preprocessing/tokenize.py index 7fb31aa50..c2b13364b 100644 --- a/src/art/preprocessing/tokenize.py +++ b/src/art/preprocessing/tokenize.py @@ -11,7 +11,7 @@ from PIL import Image import torch from transformers.image_processing_utils import BaseImageProcessor -from transformers.tokenization_utils_base import BatchEncoding, PreTrainedTokenizerBase +from transformers.tokenization_utils_base import PreTrainedTokenizerBase from ..trajectories import History, Trajectory, TrajectoryGroup, get_messages @@ -132,23 +132,6 @@ class SFTBatch: num_trainable_tokens: int -def _apply_chat_template_token_ids( - tokenizer: PreTrainedTokenizerBase, - messages: list[dict[str, Any]], - **kwargs: Any, -) -> list[int]: - output = tokenizer.apply_chat_template(messages, **kwargs) - if isinstance(output, BatchEncoding): - output = output["input_ids"] - if isinstance(output, torch.Tensor): - output = output.tolist() - assert isinstance(output, list) - if output and isinstance(output[0], list): - assert len(output) == 1 - output = output[0] - return cast(list[int], output) - - def tokenize_trajectory_groups( tokenizer: "PreTrainedTokenizerBase", trajectory_groups: list[TrajectoryGroup], @@ -274,11 +257,14 @@ def tokenize_trajectory( tokenize=False, ), ) - original_token_ids = _apply_chat_template_token_ids( - tokenizer, - messages, - tools=tools, - continue_final_message=True, + original_token_ids = cast( + list[int], + tokenizer.apply_chat_template( + messages, + tools=tools, + continue_final_message=True, + return_dict=False, + ), ) sentinel_token_id = max(set(range(tokenizer.vocab_size)) - set(original_token_ids)) sentinel_token = tokenizer.decode(sentinel_token_id) @@ -305,11 +291,14 @@ def tokenize_trajectory( ) else: token_template_messages.append(cast(dict[str, Any], message)) - token_ids = _apply_chat_template_token_ids( - tokenizer, - token_template_messages, - tools=tools, - continue_final_message=True, + token_ids = cast( + list[int], + tokenizer.apply_chat_template( + token_template_messages, + tools=tools, + continue_final_message=True, + return_dict=False, + ), ) assistant_mask: list[int] = [0] * len(token_ids) logprobs = [float("nan")] * len(token_ids) @@ -498,12 +487,15 @@ def tokenize_sft_batch( tools = trajectory.tools # Single-step tokenization: apply_chat_template with tokenize=True - input_ids = _apply_chat_template_token_ids( - tokenizer, - cast(Any, messages), - tools=cast(Any, tools), - tokenize=True, - add_generation_prompt=False, + input_ids = cast( + list[int], + tokenizer.apply_chat_template( + cast(Any, messages), + tools=cast(Any, tools), + tokenize=True, + add_generation_prompt=False, + return_dict=False, + ), ) attention_mask = [1] * len(input_ids) diff --git a/src/art/transformers/__init__.py b/src/art/transformers/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/art/transformers/patches.py b/src/art/transformers/patches.py deleted file mode 100644 index 97e09f6c8..000000000 --- a/src/art/transformers/patches.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import TYPE_CHECKING, Optional, Union - -import torch -from transformers import masking_utils -from transformers.cache_utils import Cache -from transformers.configuration_utils import PretrainedConfig - -if TYPE_CHECKING: - from torch.nn.attention.flex_attention import BlockMask - -_preprocess_mask_arguments = masking_utils._preprocess_mask_arguments - - -def _patched_preprocess_mask_arguments( - config: PretrainedConfig, - input_embeds: torch.Tensor, - attention_mask: Optional[Union[torch.Tensor, "BlockMask"]], - cache_position: torch.Tensor, - past_key_values: Optional[Cache], - position_ids: Optional[torch.Tensor], - layer_idx: Optional[int], -) -> tuple[bool, Optional[Union[torch.Tensor, "BlockMask"]], int, int]: - if position_ids is not None and len(position_ids.shape) == 3: - position_ids = position_ids[0] - return _preprocess_mask_arguments( - config, - input_embeds, - attention_mask, - cache_position, - past_key_values, - position_ids, - layer_idx, - ) - - -def patch_preprocess_mask_arguments() -> None: - masking_utils._preprocess_mask_arguments = _patched_preprocess_mask_arguments # ty:ignore[invalid-assignment] diff --git a/src/art/unsloth/train.py b/src/art/unsloth/train.py index cea30e3a5..b23b409ec 100644 --- a/src/art/unsloth/train.py +++ b/src/art/unsloth/train.py @@ -350,7 +350,7 @@ async def train( ) -> None: _compute_loss = trainer.compute_loss _log = trainer.log - trainer.compute_loss = get_compute_loss_fn(trainer) + trainer.compute_loss = get_compute_loss_fn(trainer) # ty:ignore[invalid-assignment] trainer.log = get_log_fn(trainer, results_queue) # ty:ignore[invalid-assignment] # Ensure we have a metrics container in the expected format try: diff --git a/src/art/vllm/patches.py b/src/art/vllm/patches.py index 28c4b1fd7..a450f134b 100644 --- a/src/art/vllm/patches.py +++ b/src/art/vllm/patches.py @@ -1,91 +1,6 @@ """Monkey patches and modifications for vLLM.""" -from typing import TYPE_CHECKING, Any - -if TYPE_CHECKING: - from torch import Tensor - - -def patch_transformers_v5_compat() -> None: - _patch_rope_validation_ignore_keys() - _patch_qwen3_vl_moe_tie_word_embeddings() - _patch_qwen3_5_lora() - - -def _patch_rope_validation_ignore_keys() -> None: - from transformers.configuration_utils import PretrainedConfig - - original = PretrainedConfig.convert_rope_params_to_dict - - # Return if already patched - if getattr(original, "__art_patched__", False): - return - - def patched(self: Any, ignore_keys_at_rope_validation: Any = None, **kwargs: Any): - if ignore_keys_at_rope_validation is not None: - ignore_keys_at_rope_validation = set(ignore_keys_at_rope_validation) - return original( - self, - ignore_keys_at_rope_validation=ignore_keys_at_rope_validation, - **kwargs, - ) - - patched.__art_patched__ = True # type: ignore[attr-defined] - PretrainedConfig.convert_rope_params_to_dict = patched # type: ignore[method-assign] - - -def _patch_qwen3_vl_moe_tie_word_embeddings() -> None: - from transformers import Qwen3VLMoeTextConfig - - setattr(Qwen3VLMoeTextConfig, "tie_word_embeddings", False) - - -def _patch_qwen3_5_lora() -> None: - from vllm.lora.layers.column_parallel_linear import ( - MergedColumnParallelLinearWithLoRA, - MergedColumnParallelLinearWithShardedLoRA, - ) - from vllm.lora.layers.utils import _not_fully_sharded_can_replace - from vllm.model_executor.models.qwen3_5 import ( - Qwen3_5ForCausalLMBase, - Qwen3_5ForConditionalGeneration, - ) - - projections = ["in_proj_q", "in_proj_k", "in_proj_v", "in_proj_z"] - Qwen3_5ForCausalLMBase.packed_modules_mapping["in_proj_qkvz"] = projections - Qwen3_5ForConditionalGeneration.packed_modules_mapping["in_proj_qkvz"] = projections - - @classmethod - @_not_fully_sharded_can_replace - def can_replace_layer( - cls, - source_layer: Any, - lora_config: Any, - packed_modules_list: list[str], - model_config: Any = None, - ) -> bool: - from vllm.model_executor.layers.linear import MergedColumnParallelLinear - - return type(source_layer) is MergedColumnParallelLinear and len( - packed_modules_list - ) == len(source_layer.output_sizes) - - MergedColumnParallelLinearWithLoRA.can_replace_layer = can_replace_layer - - def slice_lora_a( - self: Any, - lora_a: "list[Tensor | None]", - ) -> "list[Tensor | None]": - output_shard_size = self.lora_a_stacked[0].shape[2] - output_start_idx = self.tp_rank * output_shard_size - return [ - a[output_start_idx : output_start_idx + output_shard_size, :] - if a is not None - else None - for a in lora_a - ] - - MergedColumnParallelLinearWithShardedLoRA.slice_lora_a = slice_lora_a # ty:ignore[invalid-assignment] +from typing import Any def subclass_chat_completion_request() -> None: diff --git a/tests/unit/test_preprocessing_tokenize.py b/tests/unit/test_preprocessing_tokenize.py index 644df7d65..70b882d9f 100644 --- a/tests/unit/test_preprocessing_tokenize.py +++ b/tests/unit/test_preprocessing_tokenize.py @@ -4,7 +4,6 @@ from openai.types.chat.chat_completion import Choice import pytest -from transformers.tokenization_utils_base import BatchEncoding import art from art.preprocessing.tokenize import tokenize_sft_batch, tokenize_trajectory @@ -36,14 +35,8 @@ def apply_chat_template( if not tokenize: return rendered token_ids = self.encode(rendered, add_special_tokens=False) - if return_dict is False: - return token_ids - return BatchEncoding( - { - "input_ids": token_ids, - "attention_mask": [1] * len(token_ids), - } - ) + assert return_dict is False + return token_ids def encode(self, text: str, add_special_tokens: bool = False) -> list[int]: del add_special_tokens @@ -94,7 +87,7 @@ def apply_chat_template( ) -def test_tokenize_trajectory_accepts_batchencoding_chat_template_output() -> None: +def test_tokenize_trajectory_requests_list_chat_template_output() -> None: tokenizer = _FakeTokenizer() messages = cast( MessagesAndChoices, @@ -124,7 +117,7 @@ def test_tokenize_trajectory_accepts_batchencoding_chat_template_output() -> Non assert assistant_ids == tokenizer.encode("OK", add_special_tokens=False) -def test_tokenize_sft_batch_accepts_batchencoding_chat_template_output( +def test_tokenize_sft_batch_requests_list_chat_template_output( monkeypatch: pytest.MonkeyPatch, ) -> None: tokenizer = _FakeTokenizer() diff --git a/tests/unit/test_vllm_patches_contract.py b/tests/unit/test_vllm_patches_contract.py index b8f93c399..ca60ffca9 100644 --- a/tests/unit/test_vllm_patches_contract.py +++ b/tests/unit/test_vllm_patches_contract.py @@ -1,7 +1,6 @@ """Unit tests for ART's vLLM patch contract.""" import importlib -from typing import Any, cast import pytest @@ -10,7 +9,6 @@ from art.vllm.patches import ( patch_tool_parser_manager, - patch_transformers_v5_compat, subclass_chat_completion_request, ) @@ -61,28 +59,3 @@ def extract_tool_calls_streaming(*_args, **_kwargs): assert isinstance(result, DeltaMessage) finally: setattr(ToolParserManager, "get_tool_parser", original_get_tool_parser) - - -def test_patch_transformers_v5_compat_normalizes_rope_ignore_keys() -> None: - from transformers.configuration_utils import PretrainedConfig - - patch_transformers_v5_compat() - - class DummyRopeConfig: - default_theta = 10000.0 - rope_parameters = None - - def standardize_rope_params(self) -> None: - pass - - def validate_rope(self, ignore_keys=None) -> None: - self.ignore_keys = ignore_keys - - dummy = DummyRopeConfig() - PretrainedConfig.convert_rope_params_to_dict( - cast(Any, dummy), - ignore_keys_at_rope_validation=cast(Any, ["mrope_section"]), - partial_rotary_factor=0.25, - ) - - assert dummy.ignore_keys == {"mrope_section", "partial_rotary_factor"} diff --git a/uv.lock b/uv.lock index 2a63f92d0..2a7538edb 100644 --- a/uv.lock +++ b/uv.lock @@ -22,11 +22,14 @@ resolution-markers = [ [manifest] overrides = [ - { name = "flashinfer-python", specifier = "==0.6.1" }, + { name = "datasets", specifier = "==4.8.5" }, + { name = "flashinfer-python", specifier = "==0.6.6" }, { name = "numpy", specifier = "<2" }, { name = "nvidia-resiliency-ext", specifier = "<0.5" }, - { name = "quack-kernels", specifier = "==0.2.5" }, + { name = "quack-kernels", specifier = ">=0.2.7" }, { name = "transformer-engine", specifier = "==2.11.0" }, + { name = "transformers", specifier = "==5.6.2" }, + { name = "trl", specifier = "==1.3.0" }, ] excludes = [ "emerging-optimizers", @@ -70,7 +73,7 @@ wheels = [ [[package]] name = "accelerate" -version = "1.7.0" +version = "1.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -81,9 +84,9 @@ dependencies = [ { name = "safetensors" }, { name = "torch" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/97/33/47bbd507e3a851d33d19ce7b2141c5ea3689bfae91ba168044d7db24b0e9/accelerate-1.7.0.tar.gz", hash = "sha256:e8a2a5503d6237b9eee73cc8d36cf543f9c2d8dd2c6713450b322f5e6d53a610", size = 376026, upload-time = "2025-05-15T10:00:52.117Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/14/787e5498cd062640f0f3d92ef4ae4063174f76f9afd29d13fc52a319daae/accelerate-1.13.0.tar.gz", hash = "sha256:d631b4e0f5b3de4aff2d7e9e6857d164810dfc3237d54d017f075122d057b236", size = 402835, upload-time = "2026-03-04T19:34:12.359Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/bb/be8146c196ad6e4dec78385d91e92591f8a433576c4e04c342a636fcd811/accelerate-1.7.0-py3-none-any.whl", hash = "sha256:cf57165cca28769c6cf2650812371c81b18e05743dfa3c748524b1bb4f2b272f", size = 362095, upload-time = "2025-05-15T10:00:49.914Z" }, + { url = "https://files.pythonhosted.org/packages/7e/46/02ac5e262d4af18054b3e922b2baedbb2a03289ee792162de60a865defc5/accelerate-1.13.0-py3-none-any.whl", hash = "sha256:cf1a3efb96c18f7b152eb0fa7490f3710b19c3f395699358f08decca2b8b62e0", size = 383744, upload-time = "2026-03-04T19:34:10.313Z" }, ] [[package]] @@ -1373,7 +1376,7 @@ wheels = [ [[package]] name = "compressed-tensors" -version = "0.13.0" +version = "0.15.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "loguru", marker = "sys_platform == 'linux'" }, @@ -1381,9 +1384,9 @@ dependencies = [ { name = "torch", marker = "sys_platform == 'linux'" }, { name = "transformers", marker = "sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fc/65/88dd1c58fb9d0ded51b5c86471b937a1525f91fad2211a6f051dc1ea822d/compressed_tensors-0.13.0.tar.gz", hash = "sha256:23893824d3498ea3f1a829f14a8fa85f9a5e76a34c711a038b8d7c619ca9a67c", size = 200995, upload-time = "2025-12-16T16:03:55.397Z" } +sdist = { url = "https://files.pythonhosted.org/packages/41/1b/c3c4a98ec5f2727656336f07a0c35862195c310d8eb0b2fa5b4be6848680/compressed_tensors-0.15.0.1.tar.gz", hash = "sha256:a8e93054e8a5ec49c980b09ed36c4c1249b4a8ee167920a8e461c4da26e78d99", size = 229412, upload-time = "2026-04-10T14:23:54.708Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/b5/61ac2563c62490922b603c09113a083fd74af3630ec3931e769484d6dcb5/compressed_tensors-0.13.0-py3-none-any.whl", hash = "sha256:3518799c9baf034eb642efb551db6b0537b8713d45a64fe4def26f7f8d6cabec", size = 192620, upload-time = "2025-12-16T16:03:53.041Z" }, + { url = "https://files.pythonhosted.org/packages/a8/52/93833dc1610e017ac5b7dcd59b8304d8ef67d1114c2d124e728a2cbbea12/compressed_tensors-0.15.0.1-py3-none-any.whl", hash = "sha256:e1b1f322e82e475715e242bad46925a304ea8e5c98b5055a15b8eb22fb6bfea9", size = 194260, upload-time = "2026-04-10T14:23:53.098Z" }, ] [[package]] @@ -1668,25 +1671,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/06/fc198cc9bc0170fcc07344c04af5de3a70a67b30aa040120f06415e76c65/cudo_compute-0.3.6-py3-none-any.whl", hash = "sha256:1b72a8f09333106fe9c350d0b35171dce2b339752036f64c38096f4e20d6b5d1", size = 380302, upload-time = "2025-01-08T16:50:45.282Z" }, ] -[[package]] -name = "cupy-cuda12x" -version = "14.0.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cuda-pathfinder", marker = "sys_platform == 'linux'" }, - { name = "numpy", marker = "sys_platform == 'linux'" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/11/6d089629f44591864bc8a11fa64c9d4fcd1afb4a7217954c806fb47c4fe5/cupy_cuda12x-14.0.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:31e6a33579a06fde3ff238b8b6b72446384d17554b2a3b14f818c9ee44b0c2e6", size = 146237981, upload-time = "2026-02-20T10:22:29.065Z" }, - { url = "https://files.pythonhosted.org/packages/37/f0/0f1d79c0c7fccbc2ed0c0ff3be1b0562be60b764c729ca8ded1bd6d953aa/cupy_cuda12x-14.0.1-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:bfbde2e9f7946021b49414f9c800991163f2a56a1318f3d7d69cbb06001a1585", size = 135080693, upload-time = "2026-02-20T10:22:35.843Z" }, - { url = "https://files.pythonhosted.org/packages/38/ca/b93ef9fca1471a65f136a73e10819634c0b83427362fc08fc9f29f935bf0/cupy_cuda12x-14.0.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:f244bc14fad6f1ef0c74abd98afa4b82d2534aecdba911197810ec0047f0d1f3", size = 145578614, upload-time = "2026-02-20T10:22:49.108Z" }, - { url = "https://files.pythonhosted.org/packages/5a/a6/944406223a190815d9df156a1d66f3b0352bd8827dc4a8c752196d616dbc/cupy_cuda12x-14.0.1-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:9f0c81c3509f77be3ae8444759d5b314201b2dfcbbf2ae0d0b5fb7a61f20893c", size = 134613763, upload-time = "2026-02-20T10:22:56.792Z" }, - { url = "https://files.pythonhosted.org/packages/99/67/f967c5aff77bd6ae6765faf20580db80bb8a7e2574e999166de1d4e50146/cupy_cuda12x-14.0.1-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:9d9b1bdcf9fa777593017867e8733192c071b94639a1b3e8b2ee99eb3f3ea760", size = 145128055, upload-time = "2026-02-20T10:23:08.765Z" }, - { url = "https://files.pythonhosted.org/packages/80/53/037c931731151c504cfc00069eb295c903927c92145115623f13bd2ea076/cupy_cuda12x-14.0.1-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:21fcb4e917e43237edcc5e3a1a1241e2a2946ba9e577ce36fd580bd9856f91e8", size = 134227269, upload-time = "2026-02-20T10:23:16.147Z" }, - { url = "https://files.pythonhosted.org/packages/5d/cb/ba61bcd602856aeabf362280cb3c17ed5fe03ae23e84578eb99f5245546c/cupy_cuda12x-14.0.1-cp314-cp314-manylinux2014_aarch64.whl", hash = "sha256:3be87da86d808d9fec23b0a1df001f15f8f145698bc4bebc6d6938fa7e11519f", size = 144976386, upload-time = "2026-02-20T10:23:29.877Z" }, - { url = "https://files.pythonhosted.org/packages/ba/73/34e5f334f6b1e5c5dff80af8109979fb0e8461b27e4454517e0e47486455/cupy_cuda12x-14.0.1-cp314-cp314-manylinux2014_x86_64.whl", hash = "sha256:fa356384760e01498d010af2d96de536ef3dad19db1d3a1ad0764e4323fb919f", size = 133521354, upload-time = "2026-02-20T10:23:37.063Z" }, -] - [[package]] name = "cut-cross-entropy" version = "25.1.1" @@ -1725,7 +1709,7 @@ wheels = [ [[package]] name = "datasets" -version = "4.3.0" +version = "4.8.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dill" }, @@ -1743,9 +1727,9 @@ dependencies = [ { name = "tqdm" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2a/47/325206ac160f7699ed9f1798afa8f8f8d5189b03bf3815654859ac1d5cba/datasets-4.3.0.tar.gz", hash = "sha256:bc9118ed9afd92346c5be7ed3aaa00177eb907c25467f9d072a0d22777efbd2b", size = 582801, upload-time = "2025-10-23T16:31:51.547Z" } +sdist = { url = "https://files.pythonhosted.org/packages/66/34/14cd8e76f907f7d4dca2334cfeec9f81d30fd15c25a015f99aaea694eaed/datasets-4.8.5.tar.gz", hash = "sha256:0f0c1c3d56ffff2c93b2f4c63c95bac94f3d7e8621aea2a2a576275233bba772", size = 605649, upload-time = "2026-04-27T15:43:57.384Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ca/51/409a8184ed35453d9cbb3d6b20d524b1115c2c2d117b85d5e9b06cd70b45/datasets-4.3.0-py3-none-any.whl", hash = "sha256:0ea157e72138b3ca6c7d2415f19a164ecf7d4c4fa72da2a570da286882e96903", size = 506846, upload-time = "2025-10-23T16:31:49.965Z" }, + { url = "https://files.pythonhosted.org/packages/65/99/00f3196036501b53032c4b1ab8337a0b978dee832ed276dae3815df4e8b5/datasets-4.8.5-py3-none-any.whl", hash = "sha256:5079900781719c0e063a8efdd2cd95a31ad0c63209178669cd23cf1b926149ff", size = 528973, upload-time = "2026-04-27T15:43:53.702Z" }, ] [[package]] @@ -2367,9 +2351,17 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/60/ee/a3cba17965482b35c4990af90bad108e82c32edcb59911c37f318b5f4198/flash_linear_attention-0.4.2-py3-none-any.whl", hash = "sha256:c08be006ce4dbe1be81f54938ee8e6fc7968cfba397c8d06c7669e97b8c44c0d", size = 284661, upload-time = "2026-03-12T14:45:44.905Z" }, ] +[[package]] +name = "flashinfer-cubin" +version = "0.6.6" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/e8/826f9452bc5f76b94d7eb025f03dcaf1b51b9ed7790386c0285191e69be4/flashinfer_cubin-0.6.6-py3-none-any.whl", hash = "sha256:36508dfc792eb5ecfb15d2c140a7702812e1fa1ab0fb03929b2ed55e3e8191f3", size = 267661457, upload-time = "2026-03-11T01:36:36.538Z" }, +] + [[package]] name = "flashinfer-python" -version = "0.6.1" +version = "0.6.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "apache-tvm-ffi" }, @@ -2386,9 +2378,9 @@ dependencies = [ { name = "torch" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/68/81/5a84e14df7358d2c2903b18c6f2779bd4b4a6739076d01a847d4c18fb102/flashinfer_python-0.6.1.tar.gz", hash = "sha256:8dc2fc5dc187fc70151d5f39ef560fde8a38117a4f6cf40dce0ddb09cbd4f0bf", size = 5141191, upload-time = "2026-01-14T05:40:27.825Z" } +sdist = { url = "https://files.pythonhosted.org/packages/03/70/c5a235297351021f5d3d3233523a85f5a6468495587489ad2f257e8eafe2/flashinfer_python-0.6.6.tar.gz", hash = "sha256:0730ba7c7aad332961933bcebc5119762797161ede57d955f6fd199818ed1d92", size = 5344156, upload-time = "2026-03-11T01:36:21.434Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/d5/bca632bb5781689415186421bbee2ad39ae8a39b0996d579c76901e5c66f/flashinfer_python-0.6.1-py3-none-any.whl", hash = "sha256:610dd4ac15e7a0874b79e7577d027cb35133e8dc31dc3137c2f2d6497fe46f18", size = 7580432, upload-time = "2026-01-14T05:40:25.636Z" }, + { url = "https://files.pythonhosted.org/packages/e0/61/385d06755f3ab66333018285657adf0daf8a90a129448231fd09e315bd2e/flashinfer_python-0.6.6-py3-none-any.whl", hash = "sha256:078f158636969eec1a0d3dea19c3ca90b426b66df89bbf7b7b8276ce2ec08148", size = 7817047, upload-time = "2026-03-11T01:36:19.198Z" }, ] [[package]] @@ -2799,7 +2791,7 @@ wheels = [ [[package]] name = "gql" -version = "3.5.3" +version = "4.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2807,9 +2799,9 @@ dependencies = [ { name = "graphql-core" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/34/ed/44ffd30b06b3afc8274ee2f38c3c1b61fe4740bf03d92083e43d2c17ac77/gql-3.5.3.tar.gz", hash = "sha256:393b8c049d58e0d2f5461b9d738a2b5f904186a40395500b4a84dd092d56e42b", size = 180504, upload-time = "2025-05-20T12:34:08.954Z" } +sdist = { url = "https://files.pythonhosted.org/packages/06/9f/cf224a88ed71eb223b7aa0b9ff0aa10d7ecc9a4acdca2279eb046c26d5dc/gql-4.0.0.tar.gz", hash = "sha256:f22980844eb6a7c0266ffc70f111b9c7e7c7c13da38c3b439afc7eab3d7c9c8e", size = 215644, upload-time = "2025-08-17T14:32:35.397Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/50/2f4e99b216821ac921dbebf91c644ba95818f5d07857acadee17220221f3/gql-3.5.3-py2.py3-none-any.whl", hash = "sha256:e1fcbde2893fcafdd28114ece87ff47f1cc339a31db271fc4e1d528f5a1d4fbc", size = 74348, upload-time = "2025-05-20T12:34:07.687Z" }, + { url = "https://files.pythonhosted.org/packages/ac/94/30bbd09e8d45339fa77a48f5778d74d47e9242c11b3cd1093b3d994770a5/gql-4.0.0-py3-none-any.whl", hash = "sha256:f3beed7c531218eb24d97cb7df031b4a84fdb462f4a2beb86e2633d395937479", size = 89900, upload-time = "2025-08-17T14:32:34.029Z" }, ] [package.optional-dependencies] @@ -2965,19 +2957,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/b2/b096ccce418882fbfda4f7496f9357aaa9a5af1896a9a7f60d9f2b275a06/grpcio-1.78.0-cp314-cp314-win_amd64.whl", hash = "sha256:dce09d6116df20a96acfdbf85e4866258c3758180e8c49845d6ba8248b6d0bbb", size = 4929852, upload-time = "2026-02-06T09:56:45.885Z" }, ] -[[package]] -name = "grpcio-reflection" -version = "1.71.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "grpcio", marker = "sys_platform == 'linux'" }, - { name = "protobuf", marker = "sys_platform == 'linux'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/41/14/4e5f8e902fa9461abae292773b921a578f68333c7c3e731bcff7514f78cd/grpcio_reflection-1.71.2.tar.gz", hash = "sha256:bedfac3d2095d6c066b16b66bfce85b4be3e92dc9f3b7121e6f019d24a9c09c0", size = 18798, upload-time = "2025-06-28T04:24:06.019Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/89/c99ff79b90315cf47dbcdd86babb637764e5f14f523d622020bfee57dc4d/grpcio_reflection-1.71.2-py3-none-any.whl", hash = "sha256:c4f1a0959acb94ec9e1369bb7dab827cc9a6efcc448bdb10436246c8e52e2f57", size = 22684, upload-time = "2025-06-28T04:23:44.759Z" }, -] - [[package]] name = "gunicorn" version = "25.1.0" @@ -3893,22 +3872,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4a/4a/cf14bf3b1f5ffb13c69cf5f0ea78031247790558ee88984a8bdd22fae60d/kaitaistruct-0.11-py2.py3-none-any.whl", hash = "sha256:5c6ce79177b4e193a577ecd359e26516d1d6d000a0bffd6e1010f2a46a62a561", size = 11372, upload-time = "2025-09-08T15:46:23.635Z" }, ] -[[package]] -name = "kaldi-native-fbank" -version = "1.22.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3a/2c/84076b352107ce12d56f28c313f1aca1be332d953dd96aec7b84976e6d53/kaldi-native-fbank-1.22.3.tar.gz", hash = "sha256:387bf87225c6b83c93ae652eeaef1b4d531994b6e398e7a77189de340674f9af", size = 71013, upload-time = "2025-10-09T02:31:21.487Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/53/720ffbe8b30de203570f397866334eb4c6364c9214699010f2086de911ff/kaldi_native_fbank-1.22.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48e5dd8e897bf4509be2c6eeb4bbab728eaaef1f214ae0510c96219c4253d17", size = 299054, upload-time = "2025-10-09T02:28:42.011Z" }, - { url = "https://files.pythonhosted.org/packages/52/3f/beb161e4fdf6710938ccf18418c147d87ba8f102903d6c6e4eda25588e22/kaldi_native_fbank-1.22.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ce84c65779c9eed6ec02699797a4ba1859451977537a993be3ea8167a210ec3e", size = 321921, upload-time = "2025-10-09T02:31:21.646Z" }, - { url = "https://files.pythonhosted.org/packages/43/28/6f4fd8953c0b3f30de4526fd024095032abcdc25b6736c77a891687c604e/kaldi_native_fbank-1.22.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f5a44b4a83cf9bf13d3f77858928068b06d3ec2238c27ff2e39393fbf7749c9f", size = 298887, upload-time = "2025-10-09T02:30:53.739Z" }, - { url = "https://files.pythonhosted.org/packages/84/90/01ef7331c52b1eaf9916f3f7a535155aac2e9e2ddad12a141613d92758c7/kaldi_native_fbank-1.22.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f16e74372fe9e20abb4183f98a8e2288d5ee4c48d04d94b6160311170e007661", size = 322002, upload-time = "2025-10-09T02:30:13.04Z" }, - { url = "https://files.pythonhosted.org/packages/9a/72/adb11d27c545aca1db442da744ee430a6aae377a33574bfd2ec159dcf673/kaldi_native_fbank-1.22.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f74b85948328ab4b4c88522f98a59f83dd5295443b08483e945c7de2c35e5dcc", size = 299276, upload-time = "2025-10-09T02:30:38.1Z" }, - { url = "https://files.pythonhosted.org/packages/bc/1e/496c7ae814b2a7f8f47d423dc33aae2cdfb1edf898e2faaf5c5b39b90363/kaldi_native_fbank-1.22.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e3f9c6551ff5b6ae785dd15f819c3b2b7432d77bfb79ea8806748e2c7d900b5d", size = 322714, upload-time = "2025-10-09T02:30:32.698Z" }, - { url = "https://files.pythonhosted.org/packages/d6/4b/1f3f17a7b601124df88112a1d1fcb543c8d908d6674f752f7d3322991770/kaldi_native_fbank-1.22.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:41fb506fde155d97aeef95dd6ceccc38c2c5dd4401f9b8fded9bacaf1bafef36", size = 300037, upload-time = "2025-10-09T02:30:10.203Z" }, - { url = "https://files.pythonhosted.org/packages/2b/6a/374ec4e1cf13e672f5acd8272116c1885c2a7f84be491fc652415fc6e870/kaldi_native_fbank-1.22.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f1cc2b8eeec52a33868cf59bb95d40b335fa9cff7e15a6208e0e9b67b7fd7236", size = 322854, upload-time = "2025-10-09T02:31:26.003Z" }, -] - [[package]] name = "keyring" version = "25.7.0" @@ -4848,34 +4811,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" }, ] -[[package]] -name = "msgpack" -version = "1.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4d/f2/bfb55a6236ed8725a96b0aa3acbd0ec17588e6a2c3b62a93eb513ed8783f/msgpack-1.1.2.tar.gz", hash = "sha256:3b60763c1373dd60f398488069bcdc703cd08a711477b5d480eecc9f9626f47e", size = 173581, upload-time = "2025-10-08T09:15:56.596Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/71/46/b817349db6886d79e57a966346cf0902a426375aadc1e8e7a86a75e22f19/msgpack-1.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:61c8aa3bd513d87c72ed0b37b53dd5c5a0f58f2ff9f26e1555d3bd7948fb7296", size = 416962, upload-time = "2025-10-08T09:14:51.997Z" }, - { url = "https://files.pythonhosted.org/packages/da/e0/6cc2e852837cd6086fe7d8406af4294e66827a60a4cf60b86575a4a65ca8/msgpack-1.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:454e29e186285d2ebe65be34629fa0e8605202c60fbc7c4c650ccd41870896ef", size = 426183, upload-time = "2025-10-08T09:14:53.477Z" }, - { url = "https://files.pythonhosted.org/packages/25/98/6a19f030b3d2ea906696cedd1eb251708e50a5891d0978b012cb6107234c/msgpack-1.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7bc8813f88417599564fafa59fd6f95be417179f76b40325b500b3c98409757c", size = 411454, upload-time = "2025-10-08T09:14:54.648Z" }, - { url = "https://files.pythonhosted.org/packages/b7/cd/9098fcb6adb32187a70b7ecaabf6339da50553351558f37600e53a4a2a23/msgpack-1.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bafca952dc13907bdfdedfc6a5f579bf4f292bdd506fadb38389afa3ac5b208e", size = 422341, upload-time = "2025-10-08T09:14:56.328Z" }, - { url = "https://files.pythonhosted.org/packages/f2/60/a064b0345fc36c4c3d2c743c82d9100c40388d77f0b48b2f04d6041dbec1/msgpack-1.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c63eea553c69ab05b6747901b97d620bb2a690633c77f23feb0c6a947a8a7b8f", size = 417131, upload-time = "2025-10-08T09:15:05.136Z" }, - { url = "https://files.pythonhosted.org/packages/65/92/a5100f7185a800a5d29f8d14041f61475b9de465ffcc0f3b9fba606e4505/msgpack-1.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:372839311ccf6bdaf39b00b61288e0557916c3729529b301c52c2d88842add42", size = 427556, upload-time = "2025-10-08T09:15:06.837Z" }, - { url = "https://files.pythonhosted.org/packages/f5/87/ffe21d1bf7d9991354ad93949286f643b2bb6ddbeab66373922b44c3b8cc/msgpack-1.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2929af52106ca73fcb28576218476ffbb531a036c2adbcf54a3664de124303e9", size = 404920, upload-time = "2025-10-08T09:15:08.179Z" }, - { url = "https://files.pythonhosted.org/packages/ff/41/8543ed2b8604f7c0d89ce066f42007faac1eaa7d79a81555f206a5cdb889/msgpack-1.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be52a8fc79e45b0364210eef5234a7cf8d330836d0a64dfbb878efa903d84620", size = 415013, upload-time = "2025-10-08T09:15:09.83Z" }, - { url = "https://files.pythonhosted.org/packages/d3/68/93180dce57f684a61a88a45ed13047558ded2be46f03acb8dec6d7c513af/msgpack-1.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1fdf7d83102bf09e7ce3357de96c59b627395352a4024f6e2458501f158bf999", size = 412721, upload-time = "2025-10-08T09:15:16.567Z" }, - { url = "https://files.pythonhosted.org/packages/5d/ba/459f18c16f2b3fc1a1ca871f72f07d70c07bf768ad0a507a698b8052ac58/msgpack-1.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fac4be746328f90caa3cd4bc67e6fe36ca2bf61d5c6eb6d895b6527e3f05071e", size = 424657, upload-time = "2025-10-08T09:15:17.825Z" }, - { url = "https://files.pythonhosted.org/packages/38/f8/4398c46863b093252fe67368b44edc6c13b17f4e6b0e4929dbf0bdb13f23/msgpack-1.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:fffee09044073e69f2bad787071aeec727183e7580443dfeb8556cbf1978d162", size = 402668, upload-time = "2025-10-08T09:15:19.003Z" }, - { url = "https://files.pythonhosted.org/packages/28/ce/698c1eff75626e4124b4d78e21cca0b4cc90043afb80a507626ea354ab52/msgpack-1.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5928604de9b032bc17f5099496417f113c45bc6bc21b5c6920caf34b3c428794", size = 419040, upload-time = "2025-10-08T09:15:20.183Z" }, - { url = "https://files.pythonhosted.org/packages/8e/a9/3536e385167b88c2cc8f4424c49e28d49a6fc35206d4a8060f136e71f94c/msgpack-1.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99e2cb7b9031568a2a5c73aa077180f93dd2e95b4f8d3b8e14a73ae94a9e667e", size = 411885, upload-time = "2025-10-08T09:15:27.22Z" }, - { url = "https://files.pythonhosted.org/packages/2f/40/dc34d1a8d5f1e51fc64640b62b191684da52ca469da9cd74e84936ffa4a6/msgpack-1.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:180759d89a057eab503cf62eeec0aa61c4ea1200dee709f3a8e9397dbb3b6931", size = 419658, upload-time = "2025-10-08T09:15:28.4Z" }, - { url = "https://files.pythonhosted.org/packages/3b/ef/2b92e286366500a09a67e03496ee8b8ba00562797a52f3c117aa2b29514b/msgpack-1.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:04fb995247a6e83830b62f0b07bf36540c213f6eac8e851166d8d86d83cbd014", size = 403290, upload-time = "2025-10-08T09:15:29.764Z" }, - { url = "https://files.pythonhosted.org/packages/78/90/e0ea7990abea5764e4655b8177aa7c63cdfa89945b6e7641055800f6c16b/msgpack-1.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8e22ab046fa7ede9e36eeb4cfad44d46450f37bb05d5ec482b02868f451c95e2", size = 415234, upload-time = "2025-10-08T09:15:31.022Z" }, - { url = "https://files.pythonhosted.org/packages/fc/6b/62e85ff7193663fbea5c0254ef32f0c77134b4059f8da89b958beb7696f3/msgpack-1.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5559d03930d3aa0f3aacb4c42c776af1a2ace2611871c84a75afe436695e6245", size = 435242, upload-time = "2025-10-08T09:15:37.647Z" }, - { url = "https://files.pythonhosted.org/packages/c1/47/5c74ecb4cc277cf09f64e913947871682ffa82b3b93c8dad68083112f412/msgpack-1.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:70c5a7a9fea7f036b716191c29047374c10721c389c21e9ffafad04df8c52c90", size = 432509, upload-time = "2025-10-08T09:15:38.794Z" }, - { url = "https://files.pythonhosted.org/packages/24/a4/e98ccdb56dc4e98c929a3f150de1799831c0a800583cde9fa022fa90602d/msgpack-1.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f2cb069d8b981abc72b41aea1c580ce92d57c673ec61af4c500153a626cb9e20", size = 415957, upload-time = "2025-10-08T09:15:40.238Z" }, - { url = "https://files.pythonhosted.org/packages/da/28/6951f7fb67bc0a4e184a6b38ab71a92d9ba58080b27a77d3e2fb0be5998f/msgpack-1.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d62ce1f483f355f61adb5433ebfd8868c5f078d1a52d042b0a998682b4fa8c27", size = 422910, upload-time = "2025-10-08T09:15:41.505Z" }, -] - [[package]] name = "msgspec" version = "0.20.0" @@ -5317,21 +5252,21 @@ wheels = [ [[package]] name = "nvidia-cudnn-frontend" -version = "1.20.0" +version = "1.18.0" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/8b/f660f8e4e771738688668057f84353e55450eb9b85e52f01cfb905783a94/nvidia_cudnn_frontend-1.20.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6a1b246a0bc70553424c7656c637823c73f7d98cca5a58db26f39e1207d2085", size = 2368995, upload-time = "2026-03-16T18:28:41.675Z" }, - { url = "https://files.pythonhosted.org/packages/69/3e/2cae8081e1e926689eeffb91cd44e18424d8405121a05d66a489ddb9b760/nvidia_cudnn_frontend-1.20.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e1101a7fb810c62fd52a2a3beeeda85ea611e49ae18844044e63b1ea31a7b23", size = 2520413, upload-time = "2026-03-16T18:25:14.789Z" }, - { url = "https://files.pythonhosted.org/packages/ee/65/ee9a687fcf68996216ab1d36b63ac7d3ce0b3821abd9a45c31833389975e/nvidia_cudnn_frontend-1.20.0-cp311-cp311-win_amd64.whl", hash = "sha256:9415c1f41ff84d2712a6ab55a87e06e5d934d05af6b45adaa709fc07e85eb32f", size = 1944242, upload-time = "2026-03-16T18:32:39.073Z" }, - { url = "https://files.pythonhosted.org/packages/0e/eb/22b4cad479206a3824edf494582e19fc4a291b9c14febdb859e56b82c03f/nvidia_cudnn_frontend-1.20.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bb891643598ac7b3734b82e5a459cbf778e467ebf7a5b586840003fb66df0ef3", size = 2371995, upload-time = "2026-03-16T18:29:29.024Z" }, - { url = "https://files.pythonhosted.org/packages/aa/83/ee43fc097f475367f1ff5d5e3e1d8191d253f486cdd502d13600759fb845/nvidia_cudnn_frontend-1.20.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce50afe3d1efda07f52e8df5e992f33e92dbb443d0e61e2de703ad5762edc53c", size = 2521021, upload-time = "2026-03-16T18:25:37.316Z" }, - { url = "https://files.pythonhosted.org/packages/cc/03/d2d725c9c6eb04cd4a3216a7d1a37ab825d2ae8822b79a78b458ab703607/nvidia_cudnn_frontend-1.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:f2449b0cfc547688e27f975c6ad5101257ae86df0315a80f28af78995adf55b6", size = 1944734, upload-time = "2026-03-16T18:33:02.866Z" }, - { url = "https://files.pythonhosted.org/packages/d7/26/e5a309fe92ad67f2dc1ea85b2615f40db6c19f6a7b36b40036d57ae23a66/nvidia_cudnn_frontend-1.20.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:651fdc9a61b0a4456b557d5f82fab72739b0a6ee61384a4cb23767191e2640cd", size = 2371699, upload-time = "2026-03-16T18:30:19.865Z" }, - { url = "https://files.pythonhosted.org/packages/2d/6f/a9f5df2e003ce6f57b6e609e323fc13379a0f7966d2e044de4ceb87ec4b4/nvidia_cudnn_frontend-1.20.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f317548e700f74c167fa4988de5f0ac06931820e4d0c35b5c7dfe629dd191be4", size = 2521383, upload-time = "2026-03-16T18:26:12.09Z" }, - { url = "https://files.pythonhosted.org/packages/90/8f/cba72a4deb5168bba97d0094dbfe05591a12bc9cc9432bbfd0c107ddca33/nvidia_cudnn_frontend-1.20.0-cp313-cp313-win_amd64.whl", hash = "sha256:64e5c21853732a2f6ecf031d95d100656514d43fd2260f64266b5f8536f46434", size = 1944767, upload-time = "2026-03-16T18:33:25.204Z" }, - { url = "https://files.pythonhosted.org/packages/f9/a0/d2634d910257e6827d178dcebdf109f7f2bd8003659675dffc82fa101077/nvidia_cudnn_frontend-1.20.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6a1cf3e86664fb64e4752d3936d9cebd0afa6c4b5f6ccde19b6ee4d65fcd9d17", size = 2373944, upload-time = "2026-03-16T18:31:06.31Z" }, - { url = "https://files.pythonhosted.org/packages/79/a2/dd2a75942b0311a50bfef3173b240695a5ebdbcbd3c5154d8f333ef6dac6/nvidia_cudnn_frontend-1.20.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4da0e9ed299843abdcccdde73392577809403d4ef2ad26b4335a3eaee42423f", size = 2522596, upload-time = "2026-03-16T18:26:34.249Z" }, - { url = "https://files.pythonhosted.org/packages/ce/af/7110cea67a8cc8f3cd129cead952f5d50078c8bb99cf35e9f78c74a27097/nvidia_cudnn_frontend-1.20.0-cp314-cp314-win_amd64.whl", hash = "sha256:3f596e54398efab24727fc47291c61f969051f37e57e186ffe0fb6df06db19fd", size = 1946060, upload-time = "2026-03-16T18:33:47.963Z" }, + { url = "https://files.pythonhosted.org/packages/e2/9a/83d3d080118de4a7810fa019349edec634b8b37b9cafaacd05719de62dd6/nvidia_cudnn_frontend-1.18.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6d4d0b88d617b233a503c84980b54d840b60b2734497d1a7a071ec5293daec2", size = 2023709, upload-time = "2026-01-27T23:32:10.912Z" }, + { url = "https://files.pythonhosted.org/packages/13/c7/c3624b3ed77b102618f26295e816b27f1c3ebb1143730237a9f51d403c3f/nvidia_cudnn_frontend-1.18.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:382ea063b92cbfd5b442cb75ff8422932d78276aecf139e46713ed1ad3d07af4", size = 2155568, upload-time = "2026-01-27T23:07:13.277Z" }, + { url = "https://files.pythonhosted.org/packages/52/dd/8613dfd029d076b86a8a87efe3f4bb4ab73cec15fa8fc27e665098f4d167/nvidia_cudnn_frontend-1.18.0-cp311-cp311-win_amd64.whl", hash = "sha256:baa509effc4d299d3f04e549d4188f88bca8a8b527f483cbd2f66bc18f13a8b1", size = 1591244, upload-time = "2026-01-27T23:08:44.691Z" }, + { url = "https://files.pythonhosted.org/packages/e3/b4/604e230378680ee117849a4e1045baca092f93161a829291a84d5acce70c/nvidia_cudnn_frontend-1.18.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:310b417f2848a83d1437203fcaeea320a74fb7f28af20bf42bf5afc9c01f1c12", size = 2027408, upload-time = "2026-01-27T23:32:46.576Z" }, + { url = "https://files.pythonhosted.org/packages/c6/52/08f98262e77b1cbcc834cc1a5db494d0661ea1dbdea58c2e2d51a57fdaca/nvidia_cudnn_frontend-1.18.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c023539ca6de99234cf5102c3ec0d6af817f5396fc93028a22ba5b834a35b8a", size = 2159245, upload-time = "2026-01-27T23:07:32.664Z" }, + { url = "https://files.pythonhosted.org/packages/aa/1f/751a5a8cfdc95fb4dc556192d37369ae488c30c473fe9a3ec720b23d07ea/nvidia_cudnn_frontend-1.18.0-cp312-cp312-win_amd64.whl", hash = "sha256:e13f7dd46cdb4762dde87f181f06d1c5e15e9478bbdd547bfa74d9b11f415aae", size = 1591041, upload-time = "2026-01-27T23:09:04.118Z" }, + { url = "https://files.pythonhosted.org/packages/e8/bd/db791a26ebb6a6e1268f518e18c82d8ad18546f7008f4b0d5bde15f927de/nvidia_cudnn_frontend-1.18.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a6e2b7bd43705ffa4af3b187374fdd5e7d09fc228a4d65fc8b4b0a537a8e605", size = 2027249, upload-time = "2026-01-27T23:33:22.46Z" }, + { url = "https://files.pythonhosted.org/packages/19/74/3038cf496d5de7cfdff730f5202e438c17d9123de507059340e02ddff9d7/nvidia_cudnn_frontend-1.18.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c0544206b02cae9da4f044ca3fe7416b99e0c8a8052285dd3e5a8fc445d34f9c", size = 2160001, upload-time = "2026-01-27T23:07:50.248Z" }, + { url = "https://files.pythonhosted.org/packages/a1/5e/148cc6609dba326e620e4d949246020dfba05ca07d0387442e62b71d19b6/nvidia_cudnn_frontend-1.18.0-cp313-cp313-win_amd64.whl", hash = "sha256:7eefa5f10cc003df5f3593f82f1ee6c001fc3412bdc78430c751914dfceefd7f", size = 1591270, upload-time = "2026-01-27T23:09:21.435Z" }, + { url = "https://files.pythonhosted.org/packages/a3/0a/515209dd2afc6027bf1112bf415f575bfe9628d18877abe7424cb597dd7b/nvidia_cudnn_frontend-1.18.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b489da1b30f1d7da822b37b89cc4f68afd80e020eb57e4ab24921f8b57f6e946", size = 2028689, upload-time = "2026-02-11T21:32:04.235Z" }, + { url = "https://files.pythonhosted.org/packages/ab/57/52d18e1f50979eeabfafb408ec73068afc5a1e1ccd21636240317cd456d4/nvidia_cudnn_frontend-1.18.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:37688c81a34ac590aff9de4c34d2968bab949411af707baa327616ebd4b34ae1", size = 2160182, upload-time = "2026-02-11T21:25:18.437Z" }, + { url = "https://files.pythonhosted.org/packages/67/53/df2810b56d259ef96fa6beaa1381bd14c29fbe82836b409516e864c5e177/nvidia_cudnn_frontend-1.18.0-cp314-cp314-win_amd64.whl", hash = "sha256:5053b473fa74168b5fbf35934cd6187f88aa03b8447b9f2cd417332d5e5c9569", size = 1592759, upload-time = "2026-02-11T21:32:33.87Z" }, ] [[package]] @@ -5797,16 +5732,16 @@ dev = [ [package.metadata] requires-dist = [ - { name = "accelerate", marker = "extra == 'backend'", specifier = "==1.7.0" }, + { name = "accelerate", marker = "extra == 'backend'", specifier = "==1.13.0" }, { name = "apex", marker = "extra == 'megatron'", git = "https://github.com/NVIDIA/apex.git?rev=25.09" }, { name = "awscli", marker = "extra == 'backend'", specifier = ">=1.38.1" }, - { name = "bitsandbytes", marker = "extra == 'backend'", specifier = ">=0.45.2" }, + { name = "bitsandbytes", marker = "extra == 'backend'", specifier = ">=0.45.5" }, { name = "causal-conv1d", marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'megatron'", url = "https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.6.1.post4/causal_conv1d-1.6.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" }, { name = "datrie", marker = "extra == 'tinker'", specifier = ">=0.8.3" }, { name = "deep-ep", marker = "sys_platform == 'linux' and extra == 'megatron'", git = "https://github.com/deepseek-ai/DeepEP.git?rev=v1.2.1" }, { name = "duckdb", marker = "extra == 'backend'", specifier = ">=1.0.0" }, { name = "fastapi", marker = "extra == 'tinker'", specifier = ">=0.128.0" }, - { name = "gql", marker = "extra == 'backend'", specifier = "<4" }, + { name = "gql", marker = "extra == 'backend'", specifier = "<5" }, { name = "hf-xet", marker = "extra == 'backend'", specifier = ">=1.1.0" }, { name = "huggingface-hub", marker = "extra == 'tinker'" }, { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.51" }, @@ -5828,7 +5763,7 @@ requires-dist = [ { name = "nvidia-resiliency-ext", marker = "sys_platform == 'linux' and extra == 'backend'", specifier = "<0.5" }, { name = "nvidia-resiliency-ext", marker = "sys_platform == 'linux' and extra == 'megatron'", specifier = "<0.5" }, { name = "openai", specifier = ">=2.14.0" }, - { name = "peft", marker = "extra == 'backend'", specifier = ">=0.14.0" }, + { name = "peft", marker = "extra == 'backend'", specifier = ">=0.18.0" }, { name = "pillow", marker = "extra == 'tinker'" }, { name = "polars", specifier = ">=1.26.0" }, { name = "pyarrow", marker = "extra == 'backend'", specifier = ">=15.0.0" }, @@ -5844,18 +5779,18 @@ requires-dist = [ { name = "torch", marker = "extra == 'backend'", specifier = "==2.10.0" }, { name = "torch", marker = "extra == 'megatron'", specifier = "==2.10.0" }, { name = "torch", marker = "extra == 'tinker'", specifier = "==2.10.0" }, - { name = "torchao", marker = "extra == 'backend'", specifier = "==0.16.0" }, + { name = "torchao", marker = "extra == 'backend'", specifier = "==0.17.0" }, { name = "transformer-engine", marker = "extra == 'megatron'", specifier = "==2.11.0" }, { name = "transformer-engine-cu12", marker = "extra == 'megatron'", specifier = "==2.11.0" }, { name = "transformer-engine-torch", marker = "extra == 'megatron'", git = "https://github.com/NVIDIA/TransformerEngine.git?subdirectory=transformer_engine%2Fpytorch&rev=v2.11" }, - { name = "transformers", marker = "extra == 'backend'", specifier = "==5.2.0" }, - { name = "transformers", marker = "extra == 'tinker'", specifier = "==5.2.0" }, - { name = "trl", marker = "extra == 'backend'", specifier = "==0.20.0" }, + { name = "transformers", marker = "extra == 'backend'", specifier = "==5.6.2" }, + { name = "transformers", marker = "extra == 'tinker'", specifier = "==5.6.2" }, + { name = "trl", marker = "extra == 'backend'", specifier = "==1.3.0" }, { name = "typer", specifier = ">=0.15.2" }, - { name = "unsloth", marker = "extra == 'backend'", specifier = "==2026.3.3" }, - { name = "unsloth-zoo", marker = "extra == 'backend'", specifier = "==2026.3.1" }, + { name = "unsloth", marker = "extra == 'backend'", specifier = "==2026.4.8" }, + { name = "unsloth-zoo", marker = "extra == 'backend'", specifier = "==2026.4.9" }, { name = "uvicorn", marker = "extra == 'tinker'", specifier = ">=0.35.0" }, - { name = "vllm", marker = "sys_platform == 'linux' and extra == 'backend'", url = "https://github.com/vivekkalyan/vllm/releases/download/v0.17.0-art1/vllm-0.17.0%2Bart1-cp38-abi3-manylinux_2_31_x86_64.whl" }, + { name = "vllm", marker = "sys_platform == 'linux' and extra == 'backend'", specifier = "==0.19.1" }, { name = "wandb", marker = "extra == 'backend'", specifier = "==0.25.0" }, { name = "weave", specifier = ">=0.52.24" }, ] @@ -6261,7 +6196,7 @@ wheels = [ [[package]] name = "peft" -version = "0.18.1" +version = "0.19.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "accelerate" }, @@ -6275,9 +6210,9 @@ dependencies = [ { name = "tqdm" }, { name = "transformers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d8/48/147b3ea999560b40a34fd78724c7777aa9d18409c2250bdcaf9c4f2db7fc/peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2", size = 635030, upload-time = "2026-01-09T13:08:01.136Z" } +sdist = { url = "https://files.pythonhosted.org/packages/86/cf/037f1e3d5186496c05513a6754639e2dab3038a05f384284d49a9bd06a2d/peft-0.19.1.tar.gz", hash = "sha256:0d97542fe96dcdaa20d3b81c06f26f988618f416a73544ab23c3618ccb674a40", size = 763738, upload-time = "2026-04-16T15:46:45.105Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/14/b4e3f574acf349ae6f61f9c000a77f97a3b315b4bb6ad03791e79ae4a568/peft-0.18.1-py3-none-any.whl", hash = "sha256:0bf06847a3551e3019fc58c440cffc9a6b73e6e2962c95b52e224f77bbdb50f1", size = 556960, upload-time = "2026-01-09T13:07:55.865Z" }, + { url = "https://files.pythonhosted.org/packages/e8/b6/f54d676ed93cc2dd2234c3b172ea9c8c3d7d29361e66b1b23dec57a67465/peft-0.19.1-py3-none-any.whl", hash = "sha256:2113f72a81621b5913ef28f9022204c742df111890c5f49d812716a4a301e356", size = 680692, upload-time = "2026-04-16T15:46:42.886Z" }, ] [[package]] @@ -7689,17 +7624,18 @@ wheels = [ [[package]] name = "quack-kernels" -version = "0.2.5" +version = "0.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "apache-tvm-ffi", marker = "sys_platform == 'linux'" }, + { name = "einops", marker = "sys_platform == 'linux'" }, { name = "nvidia-cutlass-dsl", marker = "sys_platform == 'linux'" }, { name = "torch", marker = "sys_platform == 'linux'" }, { name = "torch-c-dlpack-ext", marker = "sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/89/de/472a20a625495e31c33a99a30867c1d58335a1afa02dc30019f667702d1d/quack_kernels-0.2.5.tar.gz", hash = "sha256:06241a5962c09b4a2c27d4d21208e31790836fecde4373c6e9d874fdd88b5590", size = 152256, upload-time = "2026-01-31T09:07:09.998Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/40/e9a86b32ee3d44be6301acb9ebe6f299f2b8f0e0fd847f4143139100a2bf/quack_kernels-0.4.0.tar.gz", hash = "sha256:55a3c69bb2219ec6488fe366a21c3da1a50c4640ceb5b9b31d126f8477ad35aa", size = 261153, upload-time = "2026-04-27T15:29:08.588Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e4/7a/1a6d9997f979ce6985210a1783766b6c9b85bf6c21dcb990728526ca4d41/quack_kernels-0.2.5-py3-none-any.whl", hash = "sha256:5f7c246c8cb55c560f7601c952d60bddb4ba3e5c741220703a0c781a0aac3aa2", size = 156759, upload-time = "2026-01-31T09:07:08.989Z" }, + { url = "https://files.pythonhosted.org/packages/19/5d/d963412914a2f778e4594c5164dfe69bc53435877bfcd1a0db25e67cf320/quack_kernels-0.4.0-py3-none-any.whl", hash = "sha256:c7ef1d3ee317adbc363b02e69a0a26110a8fcf5e07d8ada2cf7a1b4828b5539f", size = 250771, upload-time = "2026-04-27T15:29:07.227Z" }, ] [[package]] @@ -7737,34 +7673,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c4/43/80f67e0336cb2fc725f8e06f7fe35c1d0fe946f4d2b8b2175e797e07349e/qwen_vl_utils-0.0.14-py3-none-any.whl", hash = "sha256:5e28657bfd031e56bd447c5901b58ddfc3835285ed100f4c56580e0ade054e96", size = 8120, upload-time = "2025-09-23T09:38:56.297Z" }, ] -[[package]] -name = "ray" -version = "2.54.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click", marker = "sys_platform == 'linux'" }, - { name = "filelock", marker = "sys_platform == 'linux'" }, - { name = "jsonschema", marker = "sys_platform == 'linux'" }, - { name = "msgpack", marker = "sys_platform == 'linux'" }, - { name = "packaging", marker = "sys_platform == 'linux'" }, - { name = "protobuf", marker = "sys_platform == 'linux'" }, - { name = "pyyaml", marker = "sys_platform == 'linux'" }, - { name = "requests", marker = "sys_platform == 'linux'" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/29/7871f4206e6b00a9bb784c16dad32ccd01e9df5a93545db92de220eb2871/ray-2.54.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:491ae56ab80d8822c4eaf4d5bb96dcf32a6231d8d7b76eb8034400eb9be1bb18", size = 72066630, upload-time = "2026-02-18T04:05:04.957Z" }, - { url = "https://files.pythonhosted.org/packages/1d/e8/d2c8ebd9cd945abc817b01ad02a29df78cdb86cd07d764587e16977389d0/ray-2.54.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:928bb09245a3c6f7c3c113ba8eafc69f948da9602d7f33e8251ecdf97c157615", size = 72895723, upload-time = "2026-02-18T04:05:10.686Z" }, - { url = "https://files.pythonhosted.org/packages/60/ad/e07aca3637e9c3ec4857ec4366208099cf8488ece8061a9925ba29b66382/ray-2.54.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:795ae21d6b764245d3f521bc5833446d58569e7dfde9c5777417eb285d87450f", size = 72107346, upload-time = "2026-02-18T04:05:27.999Z" }, - { url = "https://files.pythonhosted.org/packages/9e/b9/cc5ea8460c3dc602e6b7198277a7c59ba2b8929374ab22efa8df9f3deac8/ray-2.54.0-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:a972afd5aa3dda99d0b2f369b5f62e5dd95865ab7d37bf2e0a0e0d2cfbd9b325", size = 72967230, upload-time = "2026-02-18T04:05:33.771Z" }, - { url = "https://files.pythonhosted.org/packages/fd/8c/4a4a38eaec6e9614076a96967f58540f4f8d4aa0c793f43150c5df23cb9a/ray-2.54.0-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:8952c23a8aa94f10728c2d16e0dc3732d09aa0e6254801757ff494984a214f45", size = 72013826, upload-time = "2026-02-18T04:05:49.866Z" }, - { url = "https://files.pythonhosted.org/packages/42/ac/e7ec2a406bd755f61c7090460fa5ab3f09b00c3c2d8db6d0b559f78a30eb/ray-2.54.0-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:ab89e6089abb6e46fb98fdd96d399b31a852d79127cd8ac00746c61d93defa2c", size = 72880209, upload-time = "2026-02-18T04:05:55.498Z" }, -] - -[package.optional-dependencies] -cgraph = [ - { name = "cupy-cuda12x", marker = "sys_platform == 'linux'" }, -] - [[package]] name = "referencing" version = "0.37.0" @@ -9418,11 +9326,11 @@ wheels = [ [[package]] name = "torchao" -version = "0.16.0" +version = "0.17.0" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/7f/0acda8a429ac9cfabd142d30af624d7958bf828c438be5a54ca87bbe16d7/torchao-0.16.0-cp310-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2d6293a0c57c9dd505efb025a7189459d154965fbed000efd638cf299f9362dd", size = 3160415, upload-time = "2026-02-10T22:12:12.32Z" }, - { url = "https://files.pythonhosted.org/packages/d0/3d/0c5a5833a135a045510e06c06b3d4cf316b06d59415bc21e0b021a000cc8/torchao-0.16.0-py3-none-any.whl", hash = "sha256:d0a8d773351fd17b95fee81dfbcbf98577b567dcdbec47d221b0ee258432101d", size = 1164150, upload-time = "2026-02-10T22:12:15.28Z" }, + { url = "https://files.pythonhosted.org/packages/32/fe/a4036a8e80fa800c92dbcbf75f541cd4c106248b6b579db6dab1800f616a/torchao-0.17.0-cp310-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:87a418ce0ec064a821ceab83c921b501acef0ce9a6ccd1be358fcd16c3ae8c58", size = 3206172, upload-time = "2026-03-30T22:25:52.974Z" }, + { url = "https://files.pythonhosted.org/packages/c9/37/ef37ca885265e5f79a168616767dd416a3cea1cc3b28bb6b503ce4a5b652/torchao-0.17.0-py3-none-any.whl", hash = "sha256:02eba449036715b9ae784fbaa1a6f97994bb7b0421ce92d1d5d1c08e5bd6d349", size = 1200680, upload-time = "2026-03-30T22:25:54.457Z" }, ] [[package]] @@ -9571,7 +9479,7 @@ dependencies = [ [[package]] name = "transformers" -version = "5.2.0" +version = "5.6.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -9582,11 +9490,11 @@ dependencies = [ { name = "safetensors" }, { name = "tokenizers" }, { name = "tqdm" }, - { name = "typer-slim" }, + { name = "typer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bd/7e/8a0c57d562015e5b16c97c1f0b8e0e92ead2c7c20513225dc12c2043ba9f/transformers-5.2.0.tar.gz", hash = "sha256:0088b8b46ccc9eff1a1dca72b5d618a5ee3b1befc3e418c9512b35dea9f9a650", size = 8618176, upload-time = "2026-02-16T18:54:02.867Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a4/e9/c6c80a07690142a7d05444271f47b9f3c8aac7dea01d52e1137ee480ad78/transformers-5.6.2.tar.gz", hash = "sha256:e657134c3e5a6bc00a3c35f4e2674bb51adfcd89898495b788a18552bac2b91a", size = 8311867, upload-time = "2026-04-23T18:33:29.332Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/93/79754b0ca486e556c2b95d4f5afc66aaf4b260694f3d6e1b51da2d036691/transformers-5.2.0-py3-none-any.whl", hash = "sha256:9ecaf243dc45bee11a7d93f8caf03746accc0cb069181bbf4ad8566c53e854b4", size = 10403304, upload-time = "2026-02-16T18:53:59.699Z" }, + { url = "https://files.pythonhosted.org/packages/5d/95/0b0218149b0d6f14df35f5b8f676fa83df4f19ed253c3cc447107ef86eca/transformers-5.6.2-py3-none-any.whl", hash = "sha256:f8d3a1bb96778fed9b8aabfd0dd6e19843e4b0f2bb6b59f32b8a92051b0f348f", size = 10364898, upload-time = "2026-04-23T18:33:26.081Z" }, ] [[package]] @@ -9621,16 +9529,18 @@ wheels = [ [[package]] name = "trl" -version = "0.20.0" +version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "accelerate" }, { name = "datasets" }, + { name = "jinja2" }, + { name = "packaging" }, { name = "transformers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/11/95cf1210df9f241b7b1084abe1032e322374f667c4587c09af8d14a1d76f/trl-0.20.0.tar.gz", hash = "sha256:3f949b009b79dc609cd8f5469d67209ab8f71c5cb4d8d979f7b568ef054922fa", size = 461791, upload-time = "2025-07-29T04:10:06.305Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7f/ae/d11c54b4fac59bb20be50fd4a53d300fa741c6b31d561d71e1418a0943b9/trl-1.3.0.tar.gz", hash = "sha256:ad0dfd96cb7e06b41edd913463911b665914e6c37302c8609e8784b6f4e9235c", size = 598406, upload-time = "2026-04-26T15:28:27.704Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/25/fb8509e9206a2ebfa89f9e1c1b595c1c7d68852716ac64f9c4845aa4133e/trl-0.20.0-py3-none-any.whl", hash = "sha256:925703a4b6117c45c64625768669e0cccb2cc71011b3d52163e47c0e7d6eb8af", size = 504561, upload-time = "2025-07-29T04:10:04.697Z" }, + { url = "https://files.pythonhosted.org/packages/cf/38/d13f2bc31ce1071eaf519f16f0d2e51d92d08c01895992b05ebe801385d9/trl-1.3.0-py3-none-any.whl", hash = "sha256:55d5bed9983aa3e781c2d0af3b500ea78706247ae7c1852f76ea80417a3f7d5e", size = 721624, upload-time = "2026-04-26T15:28:25.872Z" }, ] [[package]] @@ -9693,18 +9603,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/91/9b286ab899c008c2cb05e8be99814807e7fbbd33f0c0c960470826e5ac82/typer-0.23.1-py3-none-any.whl", hash = "sha256:3291ad0d3c701cbf522012faccfbb29352ff16ad262db2139e6b01f15781f14e", size = 56813, upload-time = "2026-02-13T10:04:32.008Z" }, ] -[[package]] -name = "typer-slim" -version = "0.23.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typer" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/da/22/b9c47b8655937b6877d40791b937931702ba9c5f9d28753199266aa96f50/typer_slim-0.23.1.tar.gz", hash = "sha256:dfe92a6317030ee2380f65bf92e540d7c77fefcc689e10d585b4925b45b5e06a", size = 4762, upload-time = "2026-02-13T10:04:26.416Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/8a/5764b851659345f34787f1b6eb30b9d308bbd6c294825cbe38b6b869c97a/typer_slim-0.23.1-py3-none-any.whl", hash = "sha256:8146d5df1eb89f628191c4c604c8464fa841885d0733c58e6e700ff0228adac5", size = 3397, upload-time = "2026-02-13T10:04:27.132Z" }, -] - [[package]] name = "types-paramiko" version = "4.0.0.20260322" @@ -9763,7 +9661,7 @@ wheels = [ [[package]] name = "unsloth" -version = "2026.3.3" +version = "2026.4.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "accelerate" }, @@ -9772,11 +9670,14 @@ dependencies = [ { name = "diffusers" }, { name = "hf-transfer" }, { name = "huggingface-hub" }, + { name = "nest-asyncio" }, { name = "numpy" }, { name = "packaging" }, { name = "peft" }, { name = "protobuf" }, { name = "psutil" }, + { name = "pydantic" }, + { name = "pyyaml" }, { name = "sentencepiece" }, { name = "torch" }, { name = "torchvision" }, @@ -9785,19 +9686,20 @@ dependencies = [ { name = "triton", marker = "'linux' in sys_platform" }, { name = "triton-windows", marker = "(platform_machine == 'AMD64' and sys_platform == 'win32') or (platform_machine == 'x86_64' and sys_platform == 'win32')" }, { name = "trl" }, + { name = "typer" }, { name = "tyro" }, { name = "unsloth-zoo" }, { name = "wheel" }, { name = "xformers", marker = "(platform_machine == 'AMD64' and 'linux' in sys_platform) or (platform_machine == 'x86_64' and 'linux' in sys_platform) or (platform_machine == 'AMD64' and sys_platform == 'win32') or (platform_machine == 'x86_64' and sys_platform == 'win32')" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/04/78/26b0d5299d9ccbc8ce72933729ef309f57c2991edbb6d70c41a93cb6438c/unsloth-2026.3.3.tar.gz", hash = "sha256:80cb3dd56381117175888cc7caa662ff160704a5cc39b44eee54f8d15ad8522a", size = 4855357, upload-time = "2026-03-03T16:31:25.518Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1f/c1/0331327cc0537da282a11eee4ed2b98fd1a527772a1d7b87336b0fd0d709/unsloth-2026.4.8.tar.gz", hash = "sha256:57f1278d117b5fedc38eaf6f59aab6578c30a2c47e2f34e9049a366aa1a2cb28", size = 71199823, upload-time = "2026-04-23T14:05:14.602Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/3a/88b536416afdd091aefe42682d7654c19b613a23f43d2a8d8ccb529266fd/unsloth-2026.3.3-py3-none-any.whl", hash = "sha256:9378fec4e9132bd0ff50822903eff52e346b19f01c86dbb26dd60a31a3dafb4c", size = 446976, upload-time = "2026-03-03T16:31:15.216Z" }, + { url = "https://files.pythonhosted.org/packages/f7/9d/00445c8134207de8be674efd219f11944af37d10a2544f5c044a143e7583/unsloth-2026.4.8-py3-none-any.whl", hash = "sha256:f1992e0be0c79a702fc7dbe7b0f2b48e427602fa101754bd56320ad55571aba0", size = 67048222, upload-time = "2026-04-23T14:05:08.981Z" }, ] [[package]] name = "unsloth-zoo" -version = "2026.3.1" +version = "2026.4.9" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "accelerate" }, @@ -9825,9 +9727,9 @@ dependencies = [ { name = "tyro" }, { name = "wheel" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6d/a9/d8ca0a75359e25666c77feea64b2d069d4504575abec8e8a8ca9ecba4050/unsloth_zoo-2026.3.1.tar.gz", hash = "sha256:3f1cdc21e06daf9f6be522dcfa2a125f4a76f12f0a760e0a40a27cc43800b165", size = 363746, upload-time = "2026-03-03T15:00:23.79Z" } +sdist = { url = "https://files.pythonhosted.org/packages/17/47/837e40c08542466f4aa3489bdf303f6abeec10fe5ace430102b36fdceb16/unsloth_zoo-2026.4.9.tar.gz", hash = "sha256:074e3b84bffa61c08ba1292822fd34ee243ac83f54732dce37b877fa34eb3109", size = 389589, upload-time = "2026-04-22T16:17:21.186Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/f2/c0b7983f1803901574727f857a0ab571d263cea5ec277d2683f4ff014a2b/unsloth_zoo-2026.3.1-py3-none-any.whl", hash = "sha256:e41e4cefad55307025f72e79a9b961d8e82cc495b4a71780ee70997d88f42190", size = 393768, upload-time = "2026-03-03T15:00:22.245Z" }, + { url = "https://files.pythonhosted.org/packages/46/2e/5528072e1eefeddd16adc0173e1a41e72585a4770507ab7f52313bbc3435/unsloth_zoo-2026.4.9-py3-none-any.whl", hash = "sha256:782bad1d34db3fc5686b33c0073708fbc3477eca1f427412bc299f48fbdf44e3", size = 421927, upload-time = "2026-04-22T16:17:18.726Z" }, ] [[package]] @@ -9994,8 +9896,8 @@ wheels = [ [[package]] name = "vllm" -version = "0.17.0+art1" -source = { url = "https://github.com/vivekkalyan/vllm/releases/download/v0.17.0-art1/vllm-0.17.0%2Bart1-cp38-abi3-manylinux_2_31_x86_64.whl" } +version = "0.19.1" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp", marker = "sys_platform == 'linux'" }, { name = "anthropic", marker = "sys_platform == 'linux'" }, @@ -10009,12 +9911,10 @@ dependencies = [ { name = "einops", marker = "sys_platform == 'linux'" }, { name = "fastapi", extra = ["standard"], marker = "sys_platform == 'linux'" }, { name = "filelock", marker = "sys_platform == 'linux'" }, + { name = "flashinfer-cubin", marker = "sys_platform == 'linux'" }, { name = "flashinfer-python", marker = "sys_platform == 'linux'" }, { name = "gguf", marker = "sys_platform == 'linux'" }, - { name = "grpcio", marker = "sys_platform == 'linux'" }, - { name = "grpcio-reflection", marker = "sys_platform == 'linux'" }, { name = "ijson", marker = "sys_platform == 'linux'" }, - { name = "kaldi-native-fbank", marker = "sys_platform == 'linux'" }, { name = "lark", marker = "sys_platform == 'linux'" }, { name = "llguidance", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'ppc64le' and sys_platform == 'linux') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "lm-format-enforcer", marker = "sys_platform == 'linux'" }, @@ -10025,6 +9925,7 @@ dependencies = [ { name = "ninja", marker = "sys_platform == 'linux'" }, { name = "numba", marker = "sys_platform == 'linux'" }, { name = "numpy", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cudnn-frontend", marker = "sys_platform == 'linux'" }, { name = "nvidia-cutlass-dsl", marker = "sys_platform == 'linux'" }, { name = "openai", marker = "sys_platform == 'linux'" }, { name = "openai-harmony", marker = "sys_platform == 'linux'" }, @@ -10047,7 +9948,6 @@ dependencies = [ { name = "pyyaml", marker = "sys_platform == 'linux'" }, { name = "pyzmq", marker = "sys_platform == 'linux'" }, { name = "quack-kernels", marker = "sys_platform == 'linux'" }, - { name = "ray", extra = ["cgraph"], marker = "sys_platform == 'linux'" }, { name = "regex", marker = "sys_platform == 'linux'" }, { name = "requests", marker = "sys_platform == 'linux'" }, { name = "sentencepiece", marker = "sys_platform == 'linux'" }, @@ -10065,101 +9965,12 @@ dependencies = [ { name = "watchfiles", marker = "sys_platform == 'linux'" }, { name = "xgrammar", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'ppc64le' and sys_platform == 'linux') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/a8/49/60a2a962ecbf780c8fbfd0d5548b208d654d5c4267df94d8d93883641431/vllm-0.19.1.tar.gz", hash = "sha256:9fb88ce6b50991eba41d183584f65f51d7f6015d86a42cdabf79c1c8bd5d66fa", size = 31105401, upload-time = "2026-04-18T05:50:15.143Z" } wheels = [ - { url = "https://github.com/vivekkalyan/vllm/releases/download/v0.17.0-art1/vllm-0.17.0%2Bart1-cp38-abi3-manylinux_2_31_x86_64.whl", hash = "sha256:dfe9f4bf82bb1fe677fdde81d0cd62702dedf252144847951b2fc13fa4932057" }, + { url = "https://files.pythonhosted.org/packages/28/4c/26c426103c58ac8d98435fe63c7758a2f289b5481a08be19e9c9fe29a4c2/vllm-0.19.1-cp38-abi3-manylinux_2_31_aarch64.whl", hash = "sha256:c8dde3c9af20f00a644e64a50ebe43948f2921bab3ffd5407d634c15836cb181", size = 385252556, upload-time = "2026-04-18T05:49:16.101Z" }, + { url = "https://files.pythonhosted.org/packages/78/20/f41216b79c87372a9d03175f36fa1411ee61059ce8c557d2691722ea4aae/vllm-0.19.1-cp38-abi3-manylinux_2_31_x86_64.whl", hash = "sha256:71a87f46cafab4489c69a5c5c83b870d0235e5694d8222303d460576293dc719", size = 433132101, upload-time = "2026-04-18T05:49:54.202Z" }, ] -[package.metadata] -requires-dist = [ - { name = "aiohttp", specifier = ">=3.13.3" }, - { name = "anthropic", specifier = ">=0.71.0" }, - { name = "blake3" }, - { name = "cachetools" }, - { name = "cbor2" }, - { name = "cloudpickle" }, - { name = "compressed-tensors", specifier = "==0.13.0" }, - { name = "datasets", marker = "extra == 'bench'" }, - { name = "depyf", specifier = "==0.20.0" }, - { name = "diskcache", specifier = "==5.6.3" }, - { name = "einops" }, - { name = "fastapi", extras = ["standard"], specifier = ">=0.115.0" }, - { name = "fastsafetensors", marker = "extra == 'fastsafetensors'", specifier = ">=0.2.2" }, - { name = "filelock", specifier = ">=3.16.1" }, - { name = "flashinfer-python", specifier = "==0.6.4" }, - { name = "gguf", specifier = ">=0.17.0" }, - { name = "grpcio" }, - { name = "grpcio-reflection" }, - { name = "helion", marker = "extra == 'helion'" }, - { name = "ijson" }, - { name = "kaldi-native-fbank", specifier = ">=1.18.7" }, - { name = "lark", specifier = "==1.2.2" }, - { name = "librosa", marker = "extra == 'audio'" }, - { name = "llguidance", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'ppc64le' or platform_machine == 's390x' or platform_machine == 'x86_64'", specifier = ">=1.3.0,<1.4.0" }, - { name = "lm-format-enforcer", specifier = "==0.11.3" }, - { name = "matplotlib", marker = "extra == 'bench'" }, - { name = "mcp" }, - { name = "mistral-common", extras = ["audio"], marker = "extra == 'audio'" }, - { name = "mistral-common", extras = ["image"], specifier = ">=1.9.1" }, - { name = "model-hosting-container-standards", specifier = ">=0.1.13,<1.0.0" }, - { name = "msgspec" }, - { name = "ninja" }, - { name = "numba", specifier = "==0.61.2" }, - { name = "numpy" }, - { name = "nvidia-cutlass-dsl", specifier = ">=4.4.0.dev1" }, - { name = "openai", specifier = ">=1.99.1,<2.25.0" }, - { name = "openai-harmony", specifier = ">=0.0.3" }, - { name = "opencv-python-headless", specifier = ">=4.13.0" }, - { name = "opentelemetry-api", specifier = ">=1.27.0" }, - { name = "opentelemetry-api", marker = "extra == 'otel'", specifier = ">=1.26.0" }, - { name = "opentelemetry-exporter-otlp", specifier = ">=1.27.0" }, - { name = "opentelemetry-exporter-otlp", marker = "extra == 'otel'", specifier = ">=1.26.0" }, - { name = "opentelemetry-sdk", specifier = ">=1.27.0" }, - { name = "opentelemetry-sdk", marker = "extra == 'otel'", specifier = ">=1.26.0" }, - { name = "opentelemetry-semantic-conventions-ai", specifier = ">=0.4.1" }, - { name = "opentelemetry-semantic-conventions-ai", marker = "extra == 'otel'", specifier = ">=0.4.1" }, - { name = "outlines-core", specifier = "==0.2.11" }, - { name = "pandas", marker = "extra == 'bench'" }, - { name = "partial-json-parser" }, - { name = "petit-kernel", marker = "extra == 'petit-kernel'" }, - { name = "pillow" }, - { name = "plotly", marker = "extra == 'bench'" }, - { name = "prometheus-client", specifier = ">=0.18.0" }, - { name = "prometheus-fastapi-instrumentator", specifier = ">=7.0.0" }, - { name = "protobuf", specifier = ">=5.29.6,!=6.30.*,!=6.31.*,!=6.32.*,!=6.33.0.*,!=6.33.1.*,!=6.33.2.*,!=6.33.3.*,!=6.33.4.*" }, - { name = "psutil" }, - { name = "py-cpuinfo" }, - { name = "pybase64" }, - { name = "pydantic", specifier = ">=2.12.0" }, - { name = "python-json-logger" }, - { name = "pyyaml" }, - { name = "pyzmq", specifier = ">=25.0.0" }, - { name = "quack-kernels", specifier = ">=0.2.7" }, - { name = "ray", extras = ["cgraph"], specifier = ">=2.48.0" }, - { name = "regex" }, - { name = "requests", specifier = ">=2.26.0" }, - { name = "runai-model-streamer", extras = ["gcs", "s3"], marker = "extra == 'runai'", specifier = ">=0.15.3" }, - { name = "scipy", marker = "extra == 'audio'" }, - { name = "scipy", marker = "extra == 'bench'" }, - { name = "seaborn", marker = "extra == 'bench'" }, - { name = "sentencepiece" }, - { name = "setproctitle" }, - { name = "setuptools", marker = "python_full_version >= '3.12'", specifier = ">=77.0.3,<81.0.0" }, - { name = "six", marker = "python_full_version >= '3.12'", specifier = ">=1.16.0" }, - { name = "soundfile", marker = "extra == 'audio'" }, - { name = "tensorizer", marker = "extra == 'tensorizer'", specifier = "==2.10.1" }, - { name = "tiktoken", specifier = ">=0.6.0" }, - { name = "tokenizers", specifier = ">=0.21.1" }, - { name = "torch", specifier = "==2.10.0" }, - { name = "torchaudio", specifier = "==2.10.0" }, - { name = "torchvision", specifier = "==0.25.0" }, - { name = "tqdm" }, - { name = "transformers", specifier = ">=4.56.0,<5.3" }, - { name = "typing-extensions", specifier = ">=4.10" }, - { name = "watchfiles" }, - { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'ppc64le' or platform_machine == 's390x' or platform_machine == 'x86_64'", specifier = "==0.1.29" }, -] -provides-extras = ["bench", "tensorizer", "fastsafetensors", "runai", "audio", "video", "flashinfer", "petit-kernel", "helion", "otel"] - [[package]] name = "waitress" version = "3.0.2" @@ -10632,7 +10443,7 @@ wheels = [ [[package]] name = "xgrammar" -version = "0.1.29" +version = "0.1.33" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy", marker = "sys_platform == 'linux'" }, @@ -10642,13 +10453,18 @@ dependencies = [ { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "typing-extensions", marker = "sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/02/a3/70dbe3ffd331a1e7e1ad5a95690a4086e6c7cdb8089f5c7eda712219ccec/xgrammar-0.1.29.tar.gz", hash = "sha256:cf195afa81b489eebf35d4c6f37f27136d05420739ab4a6f7f065c938d7e4baa", size = 2321317, upload-time = "2025-12-19T08:23:54.53Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/87/0b/b5e5c99ce13a9d378a940cda07c5a08b50cc7efb66936c6ac8fa8232a0d5/xgrammar-0.1.29-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51bcfd63bd48a0b26209ffd2143a42067518559355ec9e4e574cef2ae74fac7c", size = 34699408, upload-time = "2025-12-19T08:23:16.906Z" }, - { url = "https://files.pythonhosted.org/packages/a3/a0/4ebc1b3f5af79a3f73d0566034758f3fbcd9c64174646314a9a6f7cc1d27/xgrammar-0.1.29-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e27b50cf8c565845295a8263a4a0790c00a7c1fd783e76222fc0f575654d6f56", size = 34903461, upload-time = "2025-12-19T08:23:19.556Z" }, - { url = "https://files.pythonhosted.org/packages/57/94/18793c64bf0368075a34c06e196bf002f1e6ab0aee332268f44e8d356d5a/xgrammar-0.1.29-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eb370a16b27a683e5f2b9e429ab41440c69977d4a504849ed61831b94cc704c", size = 34705239, upload-time = "2025-12-19T08:23:28.369Z" }, - { url = "https://files.pythonhosted.org/packages/3e/da/4c14e3e00be698009b52700f15326a23272b4b00475939b6acc86b151188/xgrammar-0.1.29-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79e6e4f5cd33be77418cf91efc482f2b3d773d309891224383bc8a4948ad7b07", size = 34906135, upload-time = "2025-12-19T08:23:30.838Z" }, - { url = "https://files.pythonhosted.org/packages/e9/c5/e4965c9921e7bb6061f246ae7f8c7b9b1dfc21262248100c2f9b398b361e/xgrammar-0.1.29-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb22aea775971f7d8c4d0e193257ebeb71b68acd9d36af3331ca5fd4d9a46991", size = 34904126, upload-time = "2025-12-19T08:23:38.335Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/db/43/e5dfddb1d2a4fccf3e3a88f103e88698cdefc3182f4e169a359ffe1c1794/xgrammar-0.1.33.tar.gz", hash = "sha256:8dbe5fc3d76651ab1fac7a68fc2a118b885fa0ec7189927fb6e0dce0081aea99", size = 2398956, upload-time = "2026-03-27T10:16:36.582Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/16/f8297e0e3b468636d8e0190002badfe4a6d8d1c2af295fea2d164e7b5a8a/xgrammar-0.1.33-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a5f561e676df8c9e941c7a2f6df9612bbf645bf1fc714b4a9282cf75cff532f8", size = 42132308, upload-time = "2026-03-27T10:14:58.545Z" }, + { url = "https://files.pythonhosted.org/packages/12/e0/629b892a3810446097635dd1be7e4d977107c42232efb229d70e5c827227/xgrammar-0.1.33-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3bc9151d9f0d05862c253998c533f04c000273f57180fb6a4e3623e321fd47db", size = 42204526, upload-time = "2026-03-27T10:15:03.299Z" }, + { url = "https://files.pythonhosted.org/packages/4e/04/43d4baca876f5ae1b45897ec30a59801a2da37f16da1fcd85f9555e4c125/xgrammar-0.1.33-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c803e60d791854c5d1f271ece7e1f34d73c82dd4a8b2a06b7af5331482a78ac", size = 42133168, upload-time = "2026-03-27T10:15:16.994Z" }, + { url = "https://files.pythonhosted.org/packages/f0/a8/672833a3cff027253793aa999401d8364896ebf396967e475c7a878b895f/xgrammar-0.1.33-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b8eaa533282a0efb0835db6998ae72e7b3c7875d7a52e360ffebff9b78c30a", size = 42205803, upload-time = "2026-03-27T10:15:21.599Z" }, + { url = "https://files.pythonhosted.org/packages/6b/55/4d186d4065f645a051be992919c51aaf96cfa8a32f7ecc8512a6e41f969f/xgrammar-0.1.33-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7eec984a20fd54d4c79536d99e2515bac54bd4e1380162fa047f5ff45bdf6d8", size = 42133430, upload-time = "2026-03-27T10:15:31.409Z" }, + { url = "https://files.pythonhosted.org/packages/2b/ca/db765035b3bb1854bdb833c118e0f09dacc623ce5e867466d63610d635fa/xgrammar-0.1.33-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d705f62d91a3675997a81d09aa371c375d7793ce1021aff7b7ed5a92021c7379", size = 42206830, upload-time = "2026-03-27T10:15:35.574Z" }, + { url = "https://files.pythonhosted.org/packages/b0/79/8fbd675aa49b180d0912aeb90fa72dca9bb1f476724f76d3097561cca161/xgrammar-0.1.33-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bba458ffe06b3774be3a24eaf58dc217eec3a781ba41340c2eecf76aa9347aa3", size = 42133038, upload-time = "2026-03-27T10:15:48.98Z" }, + { url = "https://files.pythonhosted.org/packages/fa/c5/64558fd11130624267f788be5d665f898f627b87c6916b523c6e0d4cebf9/xgrammar-0.1.33-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:776a15eaadda463987fba97d8a07b60c262c96353d800fc8639efedb57b7cbbb", size = 42206382, upload-time = "2026-03-27T10:15:53.458Z" }, + { url = "https://files.pythonhosted.org/packages/70/fb/523113e066b74428b843e66baed815671faa1dd366a2967b687498aa8cba/xgrammar-0.1.33-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51e52aef50c2d91122a23ce67f7b187fc6caffa620b7412fd3a5eebb00a29377", size = 42134611, upload-time = "2026-03-27T10:16:07.459Z" }, + { url = "https://files.pythonhosted.org/packages/c5/07/6ea6bf8efff3c29c07f594f1e8665dc3ed43abdad86a6a27da9a3ddcbbef/xgrammar-0.1.33-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24cbb91580da8ac6c86de0464339c1ca1899fb0032d604175bdb384c1a13b9b7", size = 42197758, upload-time = "2026-03-27T10:16:12.504Z" }, ] [[package]] From 7248b2fb43c543392d4ec43a34eb0067dcb25a2d Mon Sep 17 00:00:00 2001 From: Kovbo Date: Wed, 29 Apr 2026 02:33:54 +0000 Subject: [PATCH 02/13] remove megatron qwen 3.5 lora merge --- dev/run_qwen3_5_megatron_yes_no_maybe.py | 1 - src/art/dev/validate.py | 11 -------- tests/unit/test_dedicated_config.py | 36 ++++++++---------------- 3 files changed, 11 insertions(+), 37 deletions(-) diff --git a/dev/run_qwen3_5_megatron_yes_no_maybe.py b/dev/run_qwen3_5_megatron_yes_no_maybe.py index 2f8cdd202..056865142 100644 --- a/dev/run_qwen3_5_megatron_yes_no_maybe.py +++ b/dev/run_qwen3_5_megatron_yes_no_maybe.py @@ -90,7 +90,6 @@ def _format_int_list(values: list[int]) -> str: f"LEARNING_RATE={args.learning_rate}", f"TRAINER_GPU_IDS={_format_int_list(args.trainer_gpu_ids)}", f"INFERENCE_GPU_IDS={_format_int_list(args.inference_gpu_ids)}", - "ROLLOUT_WEIGHTS_MODE=merged", ] env_block = " \\\n ".join(env) diff --git a/src/art/dev/validate.py b/src/art/dev/validate.py index 7ab8c6a1f..ad2deb89e 100644 --- a/src/art/dev/validate.py +++ b/src/art/dev/validate.py @@ -20,11 +20,6 @@ def _rollout_weights_mode(config: InternalModelConfig) -> RolloutWeightsMode: raise ValueError("rollout_weights_mode must be either 'lora' or 'merged'") -def _is_qwen3_5_moe_model(config: InternalModelConfig) -> bool: - model_name = config.get("engine_args", {}).get("model") - return model_name in QWEN3_5_MOE_MODELS - - def validate_dedicated_config(config: InternalModelConfig) -> None: """Validate dedicated mode GPU configuration. @@ -89,9 +84,3 @@ def validate_dedicated_config(config: InternalModelConfig) -> None: "enable_sleep_mode is incompatible with dedicated mode " "(dedicated mode runs vLLM on a separate GPU, sleep/wake is not needed)" ) - - if _is_qwen3_5_moe_model(config) and rollout_weights_mode == "lora": - raise ValueError( - "Qwen3.5-MoE models require rollout_weights_mode='merged' with the " - "current vLLM version because direct LoRA inference is currently broken" - ) diff --git a/tests/unit/test_dedicated_config.py b/tests/unit/test_dedicated_config.py index dd9127468..7c97cd42d 100644 --- a/tests/unit/test_dedicated_config.py +++ b/tests/unit/test_dedicated_config.py @@ -252,18 +252,18 @@ def test_merged_rollout_weights_requires_dedicated_mode(): validate_dedicated_config(InternalModelConfig(rollout_weights_mode="merged")) -def test_qwen3_5_moe_requires_merged_rollout_weights(): - with pytest.raises( - ValueError, - match="Qwen3.5-MoE models require rollout_weights_mode='merged'", - ): - validate_dedicated_config( - InternalModelConfig( - trainer_gpu_ids=[0], - inference_gpu_ids=[1], - engine_args={"model": "Qwen/Qwen3.5-35B-A3B"}, # type: ignore[typeddict-item] - ) +@pytest.mark.parametrize( + "base_model", + ["Qwen/Qwen3.5-35B-A3B", "Qwen/Qwen3.5-397B-A17B"], +) +def test_qwen3_5_moe_allows_default_lora_rollout_weights(base_model: str): + validate_dedicated_config( + InternalModelConfig( + trainer_gpu_ids=[0], + inference_gpu_ids=[1], + engine_args={"model": base_model}, # type: ignore[typeddict-item] ) + ) def test_qwen3_5_moe_allows_merged_rollout_weights(): @@ -275,17 +275,3 @@ def test_qwen3_5_moe_allows_merged_rollout_weights(): engine_args={"model": "Qwen/Qwen3.5-35B-A3B"}, # type: ignore[typeddict-item] ) ) - - -def test_other_qwen3_5_moe_requires_merged_rollout_weights(): - with pytest.raises( - ValueError, - match="Qwen3.5-MoE models require rollout_weights_mode='merged'", - ): - validate_dedicated_config( - InternalModelConfig( - trainer_gpu_ids=[0], - inference_gpu_ids=[1], - engine_args={"model": "Qwen/Qwen3.5-397B-A17B"}, # type: ignore[typeddict-item] - ) - ) From e61a442b0deb89133806e9b68a107d2ffbc1def3 Mon Sep 17 00:00:00 2001 From: Kovbo Date: Wed, 29 Apr 2026 18:46:34 +0000 Subject: [PATCH 03/13] add more qwen models --- src/art/dev/get_model_config.py | 4 ++-- src/art/dev/validate.py | 9 ++++++++- src/art/megatron/train.py | 8 ++++---- tests/unit/test_dedicated_config.py | 13 ++++++++++--- tests/unit/test_megatron_dedicated.py | 6 +++--- 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/art/dev/get_model_config.py b/src/art/dev/get_model_config.py index 550f97e4f..0e5b20618 100644 --- a/src/art/dev/get_model_config.py +++ b/src/art/dev/get_model_config.py @@ -1,10 +1,10 @@ from .engine import EngineArgs from .model import InitArgs, InternalModelConfig, PeftArgs, TrainerArgs -from .validate import QWEN3_5_MOE_MODELS, is_dedicated_mode +from .validate import QWEN3_5_DELTANET_MODELS, is_dedicated_mode def default_target_modules(base_model: str) -> list[str]: - if base_model in QWEN3_5_MOE_MODELS: + if base_model in QWEN3_5_DELTANET_MODELS: return [ "q_proj", "k_proj", diff --git a/src/art/dev/validate.py b/src/art/dev/validate.py index ad2deb89e..a00e14fbf 100644 --- a/src/art/dev/validate.py +++ b/src/art/dev/validate.py @@ -2,9 +2,16 @@ from .model import InternalModelConfig, RolloutWeightsMode -QWEN3_5_MOE_MODELS = { +# Dense and MoE Qwen3.5-family models with Gated DeltaNet linear-attention +# layers. These need LoRA coverage for linear_attn projections, and Megatron +# compile should stay disabled until the DeltaNet torch.compile failure is fixed. +QWEN3_5_DELTANET_MODELS = { + "Qwen/Qwen3.5-4B", + "Qwen/Qwen3.5-27B", "Qwen/Qwen3.5-35B-A3B", "Qwen/Qwen3.5-397B-A17B", + "Qwen/Qwen3.6-27B", + "Qwen/Qwen3.6-35B-A3B", } diff --git a/src/art/megatron/train.py b/src/art/megatron/train.py index 64a55b194..6967c6724 100644 --- a/src/art/megatron/train.py +++ b/src/art/megatron/train.py @@ -37,7 +37,7 @@ from torch.distributed import all_reduce from art import dev, types -from art.dev.validate import QWEN3_5_MOE_MODELS +from art.dev.validate import QWEN3_5_DELTANET_MODELS from art.loss import loss_fn, shift_tensor from art.megatron.bridge_adapter_compat import build_adapter_weights_by_base from art.megatron.compile_workarounds import install_torch_compile_workarounds @@ -201,7 +201,7 @@ def _compile_enabled(model_identifier: str) -> bool: disabled = _env_flag("ART_DISABLE_MEGATRON_COMPILE") if disabled is not None: return disabled is not True - return model_identifier not in QWEN3_5_MOE_MODELS + return model_identifier not in QWEN3_5_DELTANET_MODELS def _install_gpt_preprocess_hook(model_chunks: ModelChunks) -> None: @@ -386,12 +386,12 @@ def build_training_runtime( elif ( rank == 0 and _env_flag("ART_DISABLE_MEGATRON_COMPILE") is None - and resolved_model_identifier in QWEN3_5_MOE_MODELS + and resolved_model_identifier in QWEN3_5_DELTANET_MODELS ): print( "Disabling torch.compile for", resolved_model_identifier, - "because Qwen3.5 MoE currently fails in PyTorch compiled backward stream ops.", + "because Qwen3.5-family Gated DeltaNet currently fails under torch.compile.", ) optimizer_config = optimizer_config or _default_optimizer_config() diff --git a/tests/unit/test_dedicated_config.py b/tests/unit/test_dedicated_config.py index 7c97cd42d..4f04cb639 100644 --- a/tests/unit/test_dedicated_config.py +++ b/tests/unit/test_dedicated_config.py @@ -157,9 +157,16 @@ def test_get_model_config_shared_mode(): @pytest.mark.parametrize( "base_model", - ["Qwen/Qwen3.5-35B-A3B", "Qwen/Qwen3.5-397B-A17B"], + [ + "Qwen/Qwen3.5-4B", + "Qwen/Qwen3.5-27B", + "Qwen/Qwen3.5-35B-A3B", + "Qwen/Qwen3.5-397B-A17B", + "Qwen/Qwen3.6-35B-A3B", + "Qwen/Qwen3.6-27B", + ], ) -def test_get_model_config_qwen3_5_moe_target_modules(base_model: str): +def test_get_model_config_qwen3_5_deltanet_target_modules(base_model: str): from art.dev.get_model_config import get_model_config with tempfile.TemporaryDirectory() as tmpdir: @@ -254,7 +261,7 @@ def test_merged_rollout_weights_requires_dedicated_mode(): @pytest.mark.parametrize( "base_model", - ["Qwen/Qwen3.5-35B-A3B", "Qwen/Qwen3.5-397B-A17B"], + ["Qwen/Qwen3.5-35B-A3B", "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3.6-35B-A3B"], ) def test_qwen3_5_moe_allows_default_lora_rollout_weights(base_model: str): validate_dedicated_config( diff --git a/tests/unit/test_megatron_dedicated.py b/tests/unit/test_megatron_dedicated.py index dcc56aa2d..5595c8c56 100644 --- a/tests/unit/test_megatron_dedicated.py +++ b/tests/unit/test_megatron_dedicated.py @@ -15,7 +15,7 @@ from art import TrainableModel, types from art.dev.model import InternalModelConfig -from art.dev.validate import QWEN3_5_MOE_MODELS +from art.dev.validate import QWEN3_5_DELTANET_MODELS from art.megatron.backend import MegatronBackend from art.megatron.jobs import ( MegatronMergedTrainJob, @@ -152,10 +152,10 @@ def test_unwrap_art_wrapper_name_strips_compiled_wrapper_segments() -> None: ) -def test_compile_enabled_disables_qwen35_moe_by_default() -> None: +def test_compile_enabled_disables_qwen35_deltanet_by_default() -> None: assert _compile_enabled("Qwen/Qwen3-30B-A3B-Instruct-2507") is True assert _compile_enabled("Qwen/Qwen3.5-32B-Instruct") is True - for model_identifier in QWEN3_5_MOE_MODELS: + for model_identifier in QWEN3_5_DELTANET_MODELS: assert _compile_enabled(model_identifier) is False From 8e8e7513fa3744c08cbc5ad5997b6a8a14e5b9f9 Mon Sep 17 00:00:00 2001 From: Kovbo Date: Wed, 29 Apr 2026 21:30:37 +0000 Subject: [PATCH 04/13] accept chat_template_kwargs for qwen 3.6 --- dev/yes-no-maybe-local-backend.py | 16 +- src/art/dev/model.py | 4 + src/art/local/backend.py | 8 + src/art/megatron/service.py | 13 +- src/art/model.py | 70 +++++- src/art/preprocessing/tokenize.py | 8 + src/art/unsloth/service.py | 3 - src/art/utils/convert_megatron_moe_lora.py | 233 +++++++++++++++++++ src/art/utils/convert_moe_lora.py | 181 -------------- tests/unit/test_megatron_dedicated.py | 8 +- tests/unit/test_model_openai_client_costs.py | 59 +++++ tests/unit/test_preprocessing_tokenize.py | 43 +++- 12 files changed, 443 insertions(+), 203 deletions(-) create mode 100644 src/art/utils/convert_megatron_moe_lora.py delete mode 100644 src/art/utils/convert_moe_lora.py diff --git a/dev/yes-no-maybe-local-backend.py b/dev/yes-no-maybe-local-backend.py index c9c1d41e7..c5594f504 100644 --- a/dev/yes-no-maybe-local-backend.py +++ b/dev/yes-no-maybe-local-backend.py @@ -50,13 +50,17 @@ async def main(): load_dotenv() backend = LocalBackend() - base_model = os.environ.get("BASE_MODEL", "Qwen/Qwen3.6-27B") + base_model = os.environ.get("BASE_MODEL", "Qwen/Qwen3.5-4B") model = art.TrainableModel( name=os.environ.get("MODEL_NAME", f"yes-no-maybe-local-{uuid.uuid4().hex[:8]}"), project="yes-no-maybe", base_model=base_model, _internal_config=art.dev.InternalModelConfig( engine_args=art.dev.EngineArgs(enforce_eager=True), + chat_template_kwargs={ + "enable_thinking": False, + "preserve_thinking": True, + }, ), ) @@ -71,11 +75,17 @@ async def main(): list(p) for n in [3, 2] for p in permutations(["yes", "no", "maybe"], n) ) ] + prompts = prompts[: int(os.environ.get("PROMPTS_LIMIT", str(len(prompts))))] openai_client = model.openai_client() max_steps = int(os.environ.get("NUM_STEPS", "20")) - groups_per_step = int(os.environ.get("GROUPS_PER_STEP", "8")) - rollouts_per_group = int(os.environ.get("ROLLOUTS_PER_GROUP", "4")) + groups_per_step = int(os.environ.get("GROUPS_PER_STEP", str(len(prompts)))) + rollouts_per_group = int( + os.environ.get( + "ROLLOUTS_PER_GROUP", + os.environ.get("ROLLOUTS_PER_PROMPT", "32"), + ) + ) start_step = await model.get_step() for _ in range(start_step, start_step + max_steps): step_prompts = random.sample( diff --git a/src/art/dev/model.py b/src/art/dev/model.py index e55b35d18..41fe4890a 100644 --- a/src/art/dev/model.py +++ b/src/art/dev/model.py @@ -127,6 +127,9 @@ class InternalModelConfig(TypedDict, total=False): - "lora": load LoRA adapters into vLLM directly - "merged": keep training LoRA adapters, but push merged weights into vLLM for inference + chat_template_kwargs: Extra keyword arguments passed to tokenizer + chat-template rendering for both rollout inference and local training + tokenization. """ init_args: "InitArgs" @@ -138,6 +141,7 @@ class InternalModelConfig(TypedDict, total=False): trainer_gpu_ids: list[int] inference_gpu_ids: list[int] rollout_weights_mode: "RolloutWeightsMode" + chat_template_kwargs: dict[str, object] class TinkerArgs(TypedDict, total=False): diff --git a/src/art/local/backend.py b/src/art/local/backend.py index 9f82c33f9..728fe911d 100644 --- a/src/art/local/backend.py +++ b/src/art/local/backend.py @@ -341,6 +341,9 @@ def _get_packed_tensors( except Exception: self._image_processors[model.base_model] = None tokenizer = self._tokenizers[model.base_model] + chat_template_kwargs = ( + model._internal_config or dev.InternalModelConfig() + ).get("chat_template_kwargs") tokenized_results = list( tokenize_trajectory_groups( tokenizer, @@ -348,6 +351,7 @@ def _get_packed_tensors( allow_training_without_logprobs, scale_rewards, image_processor=self._image_processors[model.base_model], + chat_template_kwargs=chat_template_kwargs, ) ) if not tokenized_results: @@ -955,6 +959,9 @@ async def _train_sft( instruction_part, response_part = get_instruction_response_parts( model.base_model, tokenizer ) + chat_template_kwargs = ( + model._internal_config or dev.InternalModelConfig() + ).get("chat_template_kwargs") if verbose: print(f"Using instruction_part: {instruction_part!r}") @@ -982,6 +989,7 @@ async def _train_sft( tokenizer=tokenizer, instruction_part=instruction_part, response_part=response_part, + chat_template_kwargs=chat_template_kwargs, ) ) diff --git a/src/art/megatron/service.py b/src/art/megatron/service.py index a1ce338d0..7eebb5c83 100644 --- a/src/art/megatron/service.py +++ b/src/art/megatron/service.py @@ -27,7 +27,9 @@ from ..preprocessing.pack import DiskPackedTensors from ..preprocessing.tokenize import SFTBatch from ..unsloth.service import do_sleep, do_wake_up, gc_and_empty_cuda_cache -from ..utils.convert_moe_lora import convert_checkpoint_if_needed +from ..utils.convert_megatron_moe_lora import ( + convert_checkpoint_to_megatron_moe_lora_if_needed, +) from ..utils.get_model_step import get_step_from_dir from ..utils.network import find_free_tcp_port from ..utils.output_dirs import get_step_checkpoint_dir @@ -57,9 +59,10 @@ def create_identity_lora( ) -> None: """Create an identity LoRA adapter for a Megatron model. - For MoE models, this targets fused expert parameters and converts them to - per-expert format. The conversion swaps lora_A/lora_B, producing A=zeros and - B=Kaiming — which is critical for stable training when alpha/rank is large. + PEFT saves MoE expert LoRA for target_parameters in a fused format, while + ART's Megatron loader currently expects per-expert gate/up/down LoRA keys. + Long term, we can teach Megatron's LoRA loader to accept PEFT fused + target_parameters directly, then delete convert_megatron_moe_lora.py entirely. Args: base_model: HuggingFace model identifier. @@ -132,7 +135,7 @@ def _skip_meta_to( os.makedirs(lora_path, exist_ok=True) peft_model.save_pretrained(lora_path) - convert_checkpoint_if_needed(lora_path) + convert_checkpoint_to_megatron_moe_lora_if_needed(lora_path) # Write final adapter_config with per-expert target_modules LoraConfig( diff --git a/src/art/model.py b/src/art/model.py index 047df381a..e412002b0 100644 --- a/src/art/model.py +++ b/src/art/model.py @@ -38,12 +38,41 @@ METRICS_BUILDER_STATE_KEY = "_metrics_builder_state" +def _merge_extra_body_defaults( + defaults: dict[str, Any], + provided: Any, +) -> Any: + if provided is None: + return {**defaults} + if not isinstance(provided, dict): + return provided + + merged = {**defaults} + for key, value in provided.items(): + if isinstance(value, dict) and isinstance(merged.get(key), dict): + merged[key] = {**merged[key], **value} + else: + merged[key] = value + return merged + + class _OpenAIChatCompletionsProxy: - def __init__(self, completions: Any, record_costs: Any) -> None: + def __init__( + self, + completions: Any, + record_costs: Any, + default_extra_body: dict[str, Any] | None = None, + ) -> None: self._completions = completions self._record_costs = record_costs + self._default_extra_body = default_extra_body async def create(self, *args: Any, **kwargs: Any) -> Any: + if self._default_extra_body is not None: + kwargs["extra_body"] = _merge_extra_body_defaults( + self._default_extra_body, + kwargs.get("extra_body"), + ) response = await self._completions.create(*args, **kwargs) self._record_costs(response) return response @@ -53,24 +82,40 @@ def __getattr__(self, name: str) -> Any: class _OpenAIChatProxy: - def __init__(self, chat: Any, record_costs: Any) -> None: + def __init__( + self, + chat: Any, + record_costs: Any, + default_extra_body: dict[str, Any] | None = None, + ) -> None: self._chat = chat - self.completions = _OpenAIChatCompletionsProxy(chat.completions, record_costs) + self.completions = _OpenAIChatCompletionsProxy( + chat.completions, + record_costs, + default_extra_body, + ) def __getattr__(self, name: str) -> Any: return getattr(self._chat, name) class _OpenAIClientProxy: - def __init__(self, client: Any, record_costs: Any) -> None: + def __init__( + self, + client: Any, + record_costs: Any, + default_extra_body: dict[str, Any] | None = None, + ) -> None: self._client = client self._record_costs = record_costs - self.chat = _OpenAIChatProxy(client.chat, record_costs) + self._default_extra_body = default_extra_body + self.chat = _OpenAIChatProxy(client.chat, record_costs, default_extra_body) def with_options(self, *args: Any, **kwargs: Any) -> "_OpenAIClientProxy": return _OpenAIClientProxy( self._client.with_options(*args, **kwargs), self._record_costs, + self._default_extra_body, ) def __getattr__(self, name: str) -> Any: @@ -307,10 +352,23 @@ def openai_client( # manually. self._openai_client = cast( AsyncOpenAI, - _OpenAIClientProxy(raw_client, self._record_openai_completion_costs), + _OpenAIClientProxy( + raw_client, + self._record_openai_completion_costs, + self._default_chat_completion_extra_body(), + ), ) return self._openai_client + def _default_chat_completion_extra_body(self) -> dict[str, Any] | None: + internal_config = getattr(self, "_internal_config", None) + if internal_config is None: + return None + chat_template_kwargs = internal_config.get("chat_template_kwargs") + if chat_template_kwargs is None: + return None + return {"chat_template_kwargs": dict(chat_template_kwargs)} + def litellm_completion_params(self, step: int | None = None) -> dict: """Return the parameters that should be sent to litellm.completion. diff --git a/src/art/preprocessing/tokenize.py b/src/art/preprocessing/tokenize.py index c2b13364b..fb3e80115 100644 --- a/src/art/preprocessing/tokenize.py +++ b/src/art/preprocessing/tokenize.py @@ -140,6 +140,7 @@ def tokenize_trajectory_groups( shuffle_group_trajectories: bool = True, drop_zero_advantage_trajectories: bool = True, image_processor: BaseImageProcessor | None = None, + chat_template_kwargs: dict[str, Any] | None = None, ) -> Generator["TokenizedResult", None, None]: for group in trajectory_groups: if not group: @@ -173,6 +174,7 @@ def tokenize_trajectory_groups( advantage, allow_training_without_logprobs, trajectory, + chat_template_kwargs=chat_template_kwargs, ): trajectory_results.append(result) weight = 1 / ( @@ -222,6 +224,7 @@ def tokenize_trajectory( advantage: float, allow_training_without_logprobs: bool, trajectory: Trajectory, + chat_template_kwargs: dict[str, Any] | None = None, ) -> TokenizedResult | None: """ Tokenizes a trajectory and returns a TokenizedResult. @@ -255,6 +258,7 @@ def tokenize_trajectory( tools=tools, continue_final_message=True, tokenize=False, + **(chat_template_kwargs or {}), ), ) original_token_ids = cast( @@ -264,6 +268,7 @@ def tokenize_trajectory( tools=tools, continue_final_message=True, return_dict=False, + **(chat_template_kwargs or {}), ), ) sentinel_token_id = max(set(range(tokenizer.vocab_size)) - set(original_token_ids)) @@ -298,6 +303,7 @@ def tokenize_trajectory( tools=tools, continue_final_message=True, return_dict=False, + **(chat_template_kwargs or {}), ), ) assistant_mask: list[int] = [0] * len(token_ids) @@ -454,6 +460,7 @@ def tokenize_sft_batch( tokenizer: PreTrainedTokenizerBase, instruction_part: str, response_part: str, + chat_template_kwargs: dict[str, Any] | None = None, ) -> SFTBatch: """Tokenize a single batch of trajectories for SFT. @@ -495,6 +502,7 @@ def tokenize_sft_batch( tokenize=True, add_generation_prompt=False, return_dict=False, + **(chat_template_kwargs or {}), ), ) diff --git a/src/art/unsloth/service.py b/src/art/unsloth/service.py index 937217dfc..26e69aad8 100644 --- a/src/art/unsloth/service.py +++ b/src/art/unsloth/service.py @@ -22,7 +22,6 @@ from ..preprocessing.inputs import TrainInputs from ..preprocessing.pack import DiskPackedTensors from ..preprocessing.tokenize import SFTBatch -from ..utils.convert_moe_lora import convert_checkpoint_if_needed from ..utils.get_model_step import get_step_from_dir from ..utils.network import find_free_tcp_port from ..utils.output_dirs import get_step_checkpoint_dir @@ -77,7 +76,6 @@ def save_checkpoint( checkpoint_dir = get_step_checkpoint_dir(output_dir, next_step) os.makedirs(checkpoint_dir, exist_ok=True) trainer.save_model(checkpoint_dir) - convert_checkpoint_if_needed(checkpoint_dir) gc_and_empty_cuda_cache() return checkpoint_dir @@ -485,7 +483,6 @@ async def start_openai_server( lora_path = get_step_checkpoint_dir(self.output_dir, 0) os.makedirs(os.path.dirname(lora_path), exist_ok=True) self._state.trainer.save_model(lora_path) - convert_checkpoint_if_needed(lora_path) self._latest_step = 0 else: self._latest_step = get_step_from_dir(self.output_dir) diff --git a/src/art/utils/convert_megatron_moe_lora.py b/src/art/utils/convert_megatron_moe_lora.py new file mode 100644 index 000000000..b5808055c --- /dev/null +++ b/src/art/utils/convert_megatron_moe_lora.py @@ -0,0 +1,233 @@ +"""Convert PEFT target-parameter MoE LoRA to ART Megatron per-expert LoRA. + +PEFT saves LoRA for fused MoE expert parameters as tensors under: + mlp.experts.base_layer.lora_* (gate_up_proj) + mlp.experts.lora_* (down_proj) + +ART's Megatron LoRA loader currently consumes per-expert module keys: + mlp.experts.0.gate_proj.lora_A.weight + mlp.experts.0.up_proj.lora_A.weight + mlp.experts.0.down_proj.lora_A.weight + +TODO: Teach Megatron's LoRA loader to accept PEFT fused target_parameters +directly, then delete this converter entirely. +""" + +import json +import os +import re +from typing import Any + +import safetensors.torch +import torch + +_FUSED_EXPERT_PATTERN = re.compile( + r"(?P.*\.mlp\.experts)\." + r"(?Pbase_layer\.)?" + r"(?Plora_[AB])\.weight$" +) + + +def _has_peft_target_parameter_moe_lora(tensors: dict[str, torch.Tensor]) -> bool: + """Check whether the adapter contains PEFT fused target-parameter MoE LoRA.""" + return any(_FUSED_EXPERT_PATTERN.search(key) for key in tensors) + + +def _rank_from_adapter_config(adapter_config: dict[str, Any]) -> int: + rank = adapter_config.get("r", adapter_config.get("lora_rank", 8)) + if not isinstance(rank, int) or rank <= 0: + raise ValueError(f"Invalid LoRA rank in adapter_config: {rank!r}") + return rank + + +def _reshape_expert_a( + key: str, + tensor: torch.Tensor, + *, + rank: int, +) -> tuple[int, torch.Tensor]: + if tensor.ndim != 2: + raise ValueError(f"{key}: expected 2D lora_A tensor, got shape={tensor.shape}") + num_experts_times_rank, in_features = tensor.shape + if num_experts_times_rank % rank != 0: + raise ValueError( + f"{key}: first dimension {num_experts_times_rank} is not divisible " + f"by LoRA rank {rank}" + ) + num_experts = num_experts_times_rank // rank + return num_experts, tensor.reshape(num_experts, rank, in_features) + + +def _reshape_expert_b( + key: str, + tensor: torch.Tensor, + *, + num_experts: int, + rank: int, +) -> torch.Tensor: + if tensor.ndim != 2: + raise ValueError(f"{key}: expected 2D lora_B tensor, got shape={tensor.shape}") + out_features, num_experts_times_rank = tensor.shape + expected = num_experts * rank + if num_experts_times_rank != expected: + raise ValueError( + f"{key}: second dimension {num_experts_times_rank} does not match " + f"num_experts * rank ({expected})" + ) + return tensor.reshape(out_features, num_experts, rank).permute(1, 0, 2) + + +def _convert_gate_up_lora( + *, + prefix: str, + lora_a_key: str, + lora_a: torch.Tensor, + lora_b_key: str, + lora_b: torch.Tensor, + rank: int, +) -> dict[str, torch.Tensor]: + num_experts, per_expert_a = _reshape_expert_a(lora_a_key, lora_a, rank=rank) + per_expert_b = _reshape_expert_b( + lora_b_key, + lora_b, + num_experts=num_experts, + rank=rank, + ) + if per_expert_b.shape[1] % 2 != 0: + raise ValueError( + f"{lora_b_key}: gate_up output dimension must be even, got " + f"{per_expert_b.shape[1]}" + ) + gate_b, up_b = per_expert_b.chunk(2, dim=1) + + converted: dict[str, torch.Tensor] = {} + for expert_idx in range(num_experts): + expert_a = per_expert_a[expert_idx].contiguous() + converted[f"{prefix}.{expert_idx}.gate_proj.lora_A.weight"] = expert_a + converted[f"{prefix}.{expert_idx}.up_proj.lora_A.weight"] = expert_a.clone() + converted[f"{prefix}.{expert_idx}.gate_proj.lora_B.weight"] = gate_b[ + expert_idx + ].contiguous() + converted[f"{prefix}.{expert_idx}.up_proj.lora_B.weight"] = up_b[ + expert_idx + ].contiguous() + return converted + + +def _convert_down_lora( + *, + prefix: str, + lora_a_key: str, + lora_a: torch.Tensor, + lora_b_key: str, + lora_b: torch.Tensor, + rank: int, +) -> dict[str, torch.Tensor]: + num_experts, per_expert_a = _reshape_expert_a(lora_a_key, lora_a, rank=rank) + per_expert_b = _reshape_expert_b( + lora_b_key, + lora_b, + num_experts=num_experts, + rank=rank, + ) + + converted: dict[str, torch.Tensor] = {} + for expert_idx in range(num_experts): + converted[f"{prefix}.{expert_idx}.down_proj.lora_A.weight"] = per_expert_a[ + expert_idx + ].contiguous() + converted[f"{prefix}.{expert_idx}.down_proj.lora_B.weight"] = per_expert_b[ + expert_idx + ].contiguous() + return converted + + +def convert_peft_target_parameter_moe_lora_to_megatron( + tensors: dict[str, torch.Tensor], + *, + rank: int, +) -> dict[str, torch.Tensor]: + """Convert PEFT fused MoE target-parameter LoRA tensors to Megatron keys.""" + converted: dict[str, torch.Tensor] = {} + fused_by_prefix: dict[str, dict[str, tuple[str, torch.Tensor]]] = {} + + for key, tensor in tensors.items(): + match = _FUSED_EXPERT_PATTERN.match(key) + if match is None: + converted[key] = tensor + continue + + prefix = match.group("prefix") + lora_name = match.group("lora") + is_gate_up = match.group("base_layer") is not None + group = "gate_up" if is_gate_up else "down" + fused_by_prefix.setdefault(prefix, {})[f"{group}_{lora_name}"] = (key, tensor) + + for prefix, fused_tensors in fused_by_prefix.items(): + gate_up_a = fused_tensors.get("gate_up_lora_A") + gate_up_b = fused_tensors.get("gate_up_lora_B") + if gate_up_a is not None or gate_up_b is not None: + if gate_up_a is None or gate_up_b is None: + raise ValueError(f"{prefix}: missing gate_up lora_A or lora_B tensor") + converted.update( + _convert_gate_up_lora( + prefix=prefix, + lora_a_key=gate_up_a[0], + lora_a=gate_up_a[1], + lora_b_key=gate_up_b[0], + lora_b=gate_up_b[1], + rank=rank, + ) + ) + + down_a = fused_tensors.get("down_lora_A") + down_b = fused_tensors.get("down_lora_B") + if down_a is not None or down_b is not None: + if down_a is None or down_b is None: + raise ValueError(f"{prefix}: missing down lora_A or lora_B tensor") + converted.update( + _convert_down_lora( + prefix=prefix, + lora_a_key=down_a[0], + lora_a=down_a[1], + lora_b_key=down_b[0], + lora_b=down_b[1], + rank=rank, + ) + ) + + return converted + + +def convert_checkpoint_to_megatron_moe_lora_if_needed(checkpoint_dir: str) -> None: + """Convert a PEFT MoE target-parameter adapter to Megatron format if needed.""" + adapter_path = os.path.join(checkpoint_dir, "adapter_model.safetensors") + config_path = os.path.join(checkpoint_dir, "adapter_config.json") + + if not os.path.exists(adapter_path) or not os.path.exists(config_path): + return + + tensors = safetensors.torch.load_file(adapter_path) + if not _has_peft_target_parameter_moe_lora(tensors): + return + + with open(config_path) as f: + adapter_config = json.load(f) + + rank = _rank_from_adapter_config(adapter_config) + converted = convert_peft_target_parameter_moe_lora_to_megatron( + tensors, + rank=rank, + ) + + safetensors.torch.save_file(converted, adapter_path) + + adapter_config["target_modules"] = [ + module + for module in adapter_config.get("target_modules", []) + if "experts" not in module + ] + ["gate_proj", "up_proj", "down_proj"] + adapter_config.pop("target_parameters", None) + + with open(config_path, "w") as f: + json.dump(adapter_config, f, indent=2) diff --git a/src/art/utils/convert_moe_lora.py b/src/art/utils/convert_moe_lora.py deleted file mode 100644 index 0ea80f63a..000000000 --- a/src/art/utils/convert_moe_lora.py +++ /dev/null @@ -1,181 +0,0 @@ -"""Convert fused MoE LoRA adapters to per-expert format for vLLM compatibility. - -Unsloth with transformers v5 saves MoE expert LoRA as fused 2D tensors: - mlp.experts.base_layer.lora_A [num_experts*rank, intermediate*2] (gate_up_proj) - mlp.experts.base_layer.lora_B [hidden, num_experts*rank] (gate_up_proj) - mlp.experts.lora_A [num_experts*rank, hidden] (down_proj) - mlp.experts.lora_B [intermediate, num_experts*rank] (down_proj) - -vLLM expects per-expert keys: - mlp.experts.0.gate_proj.lora_A [rank, hidden] - mlp.experts.0.gate_proj.lora_B [intermediate, rank] - ... -""" - -import json -import os -import re - -import safetensors.torch -import torch - - -def _has_fused_moe_lora(tensors: dict[str, torch.Tensor]) -> bool: - """Check if the adapter contains fused MoE LoRA tensors.""" - return any( - re.search(r"mlp\.experts\.(base_layer\.)?lora_[AB]\.weight$", key) - for key in tensors - ) - - -def _infer_moe_params( - tensors: dict[str, torch.Tensor], - adapter_config: dict, -) -> tuple[int, int, int, int]: - """Infer num_experts, rank, intermediate_size, hidden_size from tensor shapes.""" - rank = adapter_config.get("r", adapter_config.get("lora_rank", 8)) - - for key, tensor in tensors.items(): - # gate_up_proj lora_A: [num_experts*rank, intermediate*2] - if re.search(r"mlp\.experts\.base_layer\.lora_A\.weight$", key): - num_experts_times_rank = tensor.shape[0] - intermediate_times_2 = tensor.shape[1] - num_experts = num_experts_times_rank // rank - intermediate_size = intermediate_times_2 // 2 - break - # down_proj lora_B: [intermediate, num_experts*rank] - if re.search(r"mlp\.experts\.lora_B\.weight$", key): - intermediate_size = tensor.shape[0] - num_experts = tensor.shape[1] // rank - break - else: - raise ValueError("Could not find fused MoE tensors to infer parameters") - - # Get hidden_size from gate_up_proj lora_B: [hidden, num_experts*rank] - # or from down_proj lora_A: [num_experts*rank, hidden] - for key, tensor in tensors.items(): - if re.search(r"mlp\.experts\.base_layer\.lora_B\.weight$", key): - hidden_size = tensor.shape[0] - break - if re.search(r"mlp\.experts\.lora_A\.weight$", key): - hidden_size = tensor.shape[1] - break - else: - raise ValueError("Could not infer hidden_size from fused MoE tensors") - - return num_experts, rank, intermediate_size, hidden_size - - -def convert_fused_moe_lora( - tensors: dict[str, torch.Tensor], - num_experts: int, - rank: int, - intermediate_size: int, - hidden_size: int, -) -> dict[str, torch.Tensor]: - """Convert fused MoE LoRA tensors to per-expert format. - - Non-expert tensors (e.g. self_attn) are passed through unchanged. - """ - new_tensors: dict[str, torch.Tensor] = {} - - for key, tensor in tensors.items(): - # Non-expert tensors: keep as-is - m = re.match( - r"(.*\.mlp\.experts)\.(base_layer\.lora_(A|B)|lora_(A|B))\.weight$", - key, - ) - if not m: - new_tensors[key] = tensor - continue - - prefix = m.group(1) - is_base_layer = "base_layer" in key - is_A = "lora_A" in key - - if is_base_layer: - # gate_up_proj (fused gate + up) - if is_A: - # [num_experts*rank, intermediate*2] → per expert - per_expert = tensor.reshape(num_experts, rank, intermediate_size * 2) - for e in range(num_experts): - expert_a = per_expert[e] # [rank, intermediate*2] - gate_a = expert_a[:, :intermediate_size] - up_a = expert_a[:, intermediate_size:] - new_tensors[f"{prefix}.{e}.gate_proj.lora_B.weight"] = ( - gate_a.T.contiguous() - ) - new_tensors[f"{prefix}.{e}.up_proj.lora_B.weight"] = ( - up_a.T.contiguous() - ) - else: - # [hidden, num_experts*rank] → per expert - per_expert = tensor.reshape(hidden_size, num_experts, rank) - for e in range(num_experts): - expert_b = per_expert[:, e, :] # [hidden, rank] - new_tensors[f"{prefix}.{e}.gate_proj.lora_A.weight"] = ( - expert_b.T.contiguous() - ) - new_tensors[f"{prefix}.{e}.up_proj.lora_A.weight"] = ( - expert_b.T.contiguous() - ) - else: - # down_proj - if is_A: - # [num_experts*rank, hidden] → per expert - per_expert = tensor.reshape(num_experts, rank, hidden_size) - for e in range(num_experts): - expert_a = per_expert[e] # [rank, hidden] - new_tensors[f"{prefix}.{e}.down_proj.lora_B.weight"] = ( - expert_a.T.contiguous() - ) - else: - # [intermediate, num_experts*rank] → per expert - per_expert = tensor.reshape(intermediate_size, num_experts, rank) - for e in range(num_experts): - expert_b = per_expert[:, e, :] # [intermediate, rank] - new_tensors[f"{prefix}.{e}.down_proj.lora_A.weight"] = ( - expert_b.T.contiguous() - ) - - return new_tensors - - -def convert_checkpoint_if_needed(checkpoint_dir: str) -> None: - """Convert a checkpoint's MoE LoRA adapter to per-expert format if needed. - - This is a no-op for non-MoE adapters. - """ - adapter_path = os.path.join(checkpoint_dir, "adapter_model.safetensors") - config_path = os.path.join(checkpoint_dir, "adapter_config.json") - - if not os.path.exists(adapter_path) or not os.path.exists(config_path): - return - - tensors = safetensors.torch.load_file(adapter_path) - if not _has_fused_moe_lora(tensors): - return - - with open(config_path) as f: - adapter_config = json.load(f) - - num_experts, rank, intermediate_size, hidden_size = _infer_moe_params( - tensors, adapter_config - ) - - new_tensors = convert_fused_moe_lora( - tensors, num_experts, rank, intermediate_size, hidden_size - ) - - # Overwrite the adapter with the converted tensors - safetensors.torch.save_file(new_tensors, adapter_path) - - # Update adapter_config.json target_modules - adapter_config["target_modules"] = [ - m for m in adapter_config.get("target_modules", []) if "experts" not in m - ] + ["gate_proj", "up_proj", "down_proj"] - # Remove target_parameters if present (not needed for per-expert format) - adapter_config.pop("target_parameters", None) - - with open(config_path, "w") as f: - json.dump(adapter_config, f, indent=2) diff --git a/tests/unit/test_megatron_dedicated.py b/tests/unit/test_megatron_dedicated.py index 5595c8c56..f572ebc7e 100644 --- a/tests/unit/test_megatron_dedicated.py +++ b/tests/unit/test_megatron_dedicated.py @@ -196,13 +196,17 @@ class FakePeftModel: def save_pretrained(self, lora_path: str) -> None: Path(lora_path).mkdir(parents=True, exist_ok=True) + def fake_model_from_config(config: Any, **_kwargs: Any) -> FakeModel: + seen["config"] = config + return FakeModel() + monkeypatch.setattr( "transformers.AutoConfig.from_pretrained", lambda *_args, **_kwargs: top_level_config, ) monkeypatch.setattr( "transformers.AutoModelForCausalLM.from_config", - lambda config, **_kwargs: seen.setdefault("config", config) or FakeModel(), + fake_model_from_config, ) monkeypatch.setattr("accelerate.init_empty_weights", nullcontext) monkeypatch.setattr( @@ -212,7 +216,7 @@ def save_pretrained(self, lora_path: str) -> None: ), ) monkeypatch.setattr( - "art.megatron.service.convert_checkpoint_if_needed", + "art.megatron.service.convert_checkpoint_to_megatron_moe_lora_if_needed", lambda _path: None, ) diff --git a/tests/unit/test_model_openai_client_costs.py b/tests/unit/test_model_openai_client_costs.py index b88e6bb60..657f0a08c 100644 --- a/tests/unit/test_model_openai_client_costs.py +++ b/tests/unit/test_model_openai_client_costs.py @@ -5,6 +5,7 @@ from art import TrainableModel from art.costs import build_cost_calculator, get_model_pricing +from art.model import _OpenAIChatCompletionsProxy class _FakeUsage: @@ -88,6 +89,64 @@ async def test_openai_client_automatically_logs_train_tinker_costs( assert metrics["costs/train/tinker_sample"] == pytest.approx(0.0006) assert metrics["costs/train"] == pytest.approx(0.00072) + @pytest.mark.asyncio + async def test_openai_chat_proxy_adds_default_extra_body(self) -> None: + class _Recorder: + def __init__(self) -> None: + self.kwargs: dict[str, Any] = {} + + async def create(self, *args: Any, **kwargs: Any) -> _FakeResponse: + del args + self.kwargs = kwargs + return _FakeResponse(0, 0) + + recorder = _Recorder() + proxy = _OpenAIChatCompletionsProxy( + recorder, + lambda _response: None, + { + "chat_template_kwargs": { + "enable_thinking": False, + "preserve_thinking": True, + } + }, + ) + + await proxy.create( + model="test-model", + messages=[], + extra_body={"chat_template_kwargs": {"preserve_thinking": False}}, + ) + + assert recorder.kwargs["extra_body"] == { + "chat_template_kwargs": { + "enable_thinking": False, + "preserve_thinking": False, + } + } + + def test_trainable_model_uses_configured_chat_template_kwargs( + self, + ) -> None: + model = TrainableModel( + name="test-run", + project="test-project", + base_model="test-model", + _internal_config={ + "chat_template_kwargs": { + "enable_thinking": False, + "preserve_thinking": True, + } + }, + ) + + assert model._default_chat_completion_extra_body() == { + "chat_template_kwargs": { + "enable_thinking": False, + "preserve_thinking": True, + } + } + @pytest.mark.asyncio async def test_openai_client_automatically_logs_eval_tinker_costs( self, diff --git a/tests/unit/test_preprocessing_tokenize.py b/tests/unit/test_preprocessing_tokenize.py index 70b882d9f..2a913e260 100644 --- a/tests/unit/test_preprocessing_tokenize.py +++ b/tests/unit/test_preprocessing_tokenize.py @@ -1,6 +1,6 @@ import sys import types -from typing import cast +from typing import Any, cast from openai.types.chat.chat_completion import Choice import pytest @@ -20,6 +20,9 @@ class _FakeTokenizer: eos_token = "\x00" eos_token_id = 0 + def __init__(self) -> None: + self.apply_chat_template_kwargs: list[dict[str, Any]] = [] + def apply_chat_template( self, messages, @@ -28,7 +31,8 @@ def apply_chat_template( return_dict=None, **kwargs, ): - del tools, kwargs + del tools + self.apply_chat_template_kwargs.append(dict(kwargs)) rendered = "".join( f"<{message['role']}>{message.get('content', '')}" for message in messages ) @@ -68,7 +72,6 @@ def apply_chat_template( return_dict=None, **kwargs, ): - del kwargs for message in messages: tool_calls = message.get("tool_calls") if tool_calls is None: @@ -84,6 +87,7 @@ def apply_chat_template( tools=tools, tokenize=tokenize, return_dict=return_dict, + **kwargs, ) @@ -117,6 +121,39 @@ def test_tokenize_trajectory_requests_list_chat_template_output() -> None: assert assistant_ids == tokenizer.encode("OK", add_special_tokens=False) +def test_tokenize_trajectory_passes_chat_template_kwargs() -> None: + tokenizer = _FakeTokenizer() + messages = cast( + MessagesAndChoices, + [ + {"role": "user", "content": "Hi"}, + {"role": "assistant", "content": "OK"}, + ], + ) + history = History(messages_and_choices=messages) + trajectory = Trajectory(messages_and_choices=messages, reward=1.0) + + result = tokenize_trajectory( + tokenizer=tokenizer, # type: ignore[arg-type] + image_processor=None, + history=history, + advantage=1.0, + allow_training_without_logprobs=True, + trajectory=trajectory, + chat_template_kwargs={ + "enable_thinking": False, + "preserve_thinking": True, + }, + ) + + assert result is not None + assert tokenizer.apply_chat_template_kwargs + assert all( + call.get("enable_thinking") is False and call.get("preserve_thinking") is True + for call in tokenizer.apply_chat_template_kwargs + ) + + def test_tokenize_sft_batch_requests_list_chat_template_output( monkeypatch: pytest.MonkeyPatch, ) -> None: From f46488c87a33555785f7d7a2097170f030cde685 Mon Sep 17 00:00:00 2001 From: Kovbo Date: Thu, 30 Apr 2026 20:22:16 +0000 Subject: [PATCH 05/13] add megatron Qewn 3.6 support --- dev/yes-no-maybe-local-backend.py | 2 +- dev/yes-no-maybe-megatron.py | 378 +++++-------------- src/art/megatron/lora.py | 55 ++- src/art/megatron/merge.py | 5 + src/art/megatron/service.py | 35 +- src/art/megatron/train.py | 30 +- src/art/preprocessing/tokenize.py | 8 +- src/art/utils/convert_megatron_moe_lora.py | 171 ++++++--- src/art/vllm/dedicated_server.py | 9 +- src/art/vllm/engine.py | 8 +- tests/unit/test_convert_megatron_moe_lora.py | 74 ++++ tests/unit/test_megatron_dedicated.py | 10 +- tests/unit/test_megatron_qwen_helpers.py | 62 +++ tests/unit/test_preprocessing_tokenize.py | 97 +++++ 14 files changed, 585 insertions(+), 359 deletions(-) create mode 100644 tests/unit/test_convert_megatron_moe_lora.py diff --git a/dev/yes-no-maybe-local-backend.py b/dev/yes-no-maybe-local-backend.py index c5594f504..fa378a27c 100644 --- a/dev/yes-no-maybe-local-backend.py +++ b/dev/yes-no-maybe-local-backend.py @@ -38,7 +38,7 @@ async def rollout( elif content == "maybe": reward = 1.0 else: - reward = random.random() + reward = 0.0 return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward) diff --git a/dev/yes-no-maybe-megatron.py b/dev/yes-no-maybe-megatron.py index b34f6f5dc..2e09210d8 100644 --- a/dev/yes-no-maybe-megatron.py +++ b/dev/yes-no-maybe-megatron.py @@ -1,12 +1,18 @@ -"""Yes-no-maybe metrics demo for the Megatron backend.""" +"""Yes-no-maybe training demo for the Megatron backend. -from __future__ import annotations +By default this runs Qwen 3.6 in dedicated merged mode, which needs two GPUs: +GPU 0 runs Megatron training and GPU 1 runs the dedicated vLLM inference server. +After each train step, Megatron keeps the LoRA adapter checkpoint and pushes merged +weights into vLLM for rollouts, because direct vLLM LoRA serving does not yet +reflect these target-parameter MoE adapters reliably. Override TRAINER_GPU_IDS, +INFERENCE_GPU_IDS, or ROLLOUT_WEIGHTS_MODE if you need a different layout. +""" import asyncio from itertools import permutations -import json import os -import time +import random +import uuid from dotenv import load_dotenv import openai @@ -15,312 +21,124 @@ from art.megatron import MegatronBackend -def _get_env_bool(name: str, default: bool | None = None) -> bool | None: - value = os.environ.get(name) - if value is None: - return default - lowered = value.strip().lower() - if lowered in {"1", "true", "yes", "on"}: - return True - if lowered in {"0", "false", "no", "off"}: - return False - raise ValueError(f"Invalid boolean value for {name}: {value!r}") - - -def _get_env_int_list(name: str, default: list[int] | None = None) -> list[int] | None: - value = os.environ.get(name) - if value is None: - return default - parts = [part.strip() for part in value.split(",") if part.strip()] - if not parts: - raise ValueError(f"Invalid GPU ID list for {name}: {value!r}") - return [int(part) for part in parts] - - -def _chat_completion_extra_body(base_model: str) -> dict[str, object] | None: - if base_model.startswith("Qwen/Qwen3"): - return {"chat_template_kwargs": {"enable_thinking": False}} - return None - - -def with_quotes(word: str) -> str: - return f"'{word}'" - - -def build_prompts() -> list[str]: - prompts: list[str] = [] - for prefix in ["respond", "just respond"]: - for use_quotes in [True, False]: - for length in [3, 2]: - for words in permutations(["yes", "no", "maybe"], length): - rendered_words = ( - [with_quotes(word) for word in words] - if use_quotes - else list(words) - ) - if length == 3: - suffix = ", ".join(rendered_words) - else: - suffix = f"{rendered_words[0]} or {rendered_words[1]}" - prompts.append(f"{prefix} with {suffix}") - return prompts - - -def first_word(content: str | None) -> str: - if not content: - return "" - words = content.strip().lower().split(maxsplit=1) - if not words: - return "" - return words[0].strip(".,!?:;\"'()[]{}") - - -def reward_for_answer(answer: str) -> float: - if answer == "yes": - return 0.5 - if answer == "no": - return 0.75 - if answer == "maybe": - return 1.0 - return 0.0 - - -def summarize(groups: list[art.TrajectoryGroup]) -> dict[str, float]: - trajectories = [trajectory for group in groups for trajectory in group.trajectories] - answers = [str(trajectory.metadata["answer"]) for trajectory in trajectories] - rewards = [trajectory.reward for trajectory in trajectories] - total = len(trajectories) - assert total > 0 - return { - "num_rollouts": float(total), - "avg_reward": sum(rewards) / total, - "yes_rate": answers.count("yes") / total, - "no_rate": answers.count("no") / total, - "maybe_rate": answers.count("maybe") / total, - "invalid_rate": sum(answer not in {"yes", "no", "maybe"} for answer in answers) - / total, - } - - async def rollout( - client: openai.AsyncOpenAI, - model: art.TrainableModel, - prompt: str, - *, - max_tokens: int, - timeout: float, + client: openai.AsyncOpenAI, model: art.TrainableModel, prompt: str ) -> art.Trajectory: - messages: art.Messages = [{"role": "user", "content": prompt}] - completion = await client.chat.completions.create( - model=model.get_inference_name(), - messages=messages, - max_tokens=max_tokens, - timeout=timeout, - extra_body=_chat_completion_extra_body(model.base_model), - ) - choice = completion.choices[0] - answer = first_word(choice.message.content) - return art.Trajectory( - messages_and_choices=[*messages, choice], - reward=reward_for_answer(answer), - metadata={"answer": answer}, - ) - - -async def evaluate( - client: openai.AsyncOpenAI, - model: art.TrainableModel, - prompts: list[str], - *, - max_tokens: int, - timeout: float, -) -> dict[str, float]: - groups = await art.gather_trajectory_groups( - art.TrajectoryGroup( - [rollout(client, model, prompt, max_tokens=max_tokens, timeout=timeout)] - ) - for prompt in prompts + messages: art.Messages = [ + { + "role": "user", + "content": prompt, + } + ] + chat_completion = await client.chat.completions.create( + messages=messages, model=model.get_inference_name(), max_tokens=100, timeout=100 ) - return summarize(groups) + choice = chat_completion.choices[0] + content = choice.message.content + assert isinstance(content, str) + if content == "yes": + reward = 0.5 + elif content == "no": + reward = 0.75 + elif content == "maybe": + reward = 1.0 + else: + reward = 0.0 + return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward) -def build_internal_config() -> art.dev.InternalModelConfig: - trainer_gpu_ids = _get_env_int_list("TRAINER_GPU_IDS") - inference_gpu_ids = _get_env_int_list("INFERENCE_GPU_IDS") - rollout_weights_mode = os.environ.get("ROLLOUT_WEIGHTS_MODE") - - internal_config = art.dev.InternalModelConfig( - engine_args=art.dev.EngineArgs( - gpu_memory_utilization=float( - os.environ.get("GPU_MEMORY_UTILIZATION", "0.8") - ), - max_model_len=int(os.environ.get("MAX_MODEL_LEN", "4096")), - max_num_seqs=int(os.environ.get("MAX_NUM_SEQS", "8")), - tensor_parallel_size=int(os.environ.get("TENSOR_PARALLEL_SIZE", "1")), - enforce_eager=_get_env_bool("ENFORCE_EAGER"), - ), - ) - max_seq_length = os.environ.get("MAX_SEQ_LENGTH") - if max_seq_length is not None: - init_args: art.dev.InitArgs = {"max_seq_length": int(max_seq_length)} - load_in_16bit = _get_env_bool("LOAD_IN_16BIT") - if load_in_16bit is not None: - init_args["load_in_16bit"] = load_in_16bit - load_in_4bit = _get_env_bool("LOAD_IN_4BIT") - if load_in_4bit is not None: - init_args["load_in_4bit"] = load_in_4bit - internal_config["init_args"] = init_args - if trainer_gpu_ids is not None: - assert inference_gpu_ids is not None - internal_config["trainer_gpu_ids"] = trainer_gpu_ids - internal_config["inference_gpu_ids"] = inference_gpu_ids - if rollout_weights_mode is not None: - internal_config["rollout_weights_mode"] = rollout_weights_mode - return internal_config +def with_quotes(w: str) -> str: + return f"'{w}'" -async def main() -> None: +async def main(): load_dotenv() - base_model = os.environ.get("BASE_MODEL", "Qwen/Qwen3-30B-A3B-Instruct-2507") - project = os.environ.get("PROJECT", "yes-no-maybe-megatron") - model_name = os.environ.get("MODEL_NAME", f"megatron-ynm-{int(time.time())}") - num_steps = int(os.environ.get("NUM_STEPS", "20")) - rollouts_per_prompt = int(os.environ.get("ROLLOUTS_PER_PROMPT", "32")) - max_tokens = int(os.environ.get("MAX_TOKENS", "100")) - timeout = float(os.environ.get("TIMEOUT", "100")) - learning_rate = float(os.environ.get("LEARNING_RATE", "1e-4")) - packed_sequence_length = int( - os.environ.get( - "PACKED_SEQUENCE_LENGTH", - os.environ.get("MAX_SEQ_LENGTH", "4096"), - ) - ) - backend = MegatronBackend() + base_model = os.environ.get("BASE_MODEL", "Qwen/Qwen3.6-35B-A3B") model = art.TrainableModel( - name=model_name, - project=project, + name=os.environ.get("MODEL_NAME", f"yes-no-maybe-megatron-{uuid.uuid4().hex[:8]}"), + project="yes-no-maybe-megatron", base_model=base_model, - report_metrics=[], - _internal_config=build_internal_config(), + _internal_config=art.dev.InternalModelConfig( + engine_args=art.dev.EngineArgs( + gpu_memory_utilization=float( + os.environ.get("GPU_MEMORY_UTILIZATION", "0.8") + ), + max_model_len=int(os.environ.get("MAX_MODEL_LEN", "4096")), + max_num_seqs=int(os.environ.get("MAX_NUM_SEQS", "8")), + tensor_parallel_size=int(os.environ.get("TENSOR_PARALLEL_SIZE", "1")), + ), + trainer_gpu_ids=[ + int(gpu_id) + for gpu_id in os.environ.get("TRAINER_GPU_IDS", "0").split(",") + ], + inference_gpu_ids=[ + int(gpu_id) + for gpu_id in os.environ.get("INFERENCE_GPU_IDS", "1").split(",") + ], + rollout_weights_mode=os.environ.get("ROLLOUT_WEIGHTS_MODE", "merged"), + chat_template_kwargs={ + "enable_thinking": False, + "preserve_thinking": True, + }, + ), ) - prompts = build_prompts() - prompts = prompts[: int(os.environ.get("PROMPTS_LIMIT", str(len(prompts))))] - eval_prompts = prompts[: int(os.environ.get("EVAL_PROMPTS", "24"))] try: - print(json.dumps({"event": "register_start"}), flush=True) await model.register(backend) - print( - json.dumps( - { - "event": "register_done", - "step": int(await model.get_step()), - "model": model.get_inference_name(), - } - ), - flush=True, - ) - client = model.openai_client() - print( - json.dumps({"event": "eval_start", "step": int(await model.get_step())}), - flush=True, - ) - initial_eval = await evaluate( - client, - model, - eval_prompts, - max_tokens=max_tokens, - timeout=timeout, + prompts = [ + f"{prefix} with {', '.join([with_quotes(w) if use_quotes else w for w in words]) if len(words) == 3 else f'{words[0]}' + (f' or {words[1]}' if len(words) > 1 else '')}" + for prefix in ["respond", "just respond"] + for use_quotes in [True, False] + for words in ( + list(p) for n in [3, 2] for p in permutations(["yes", "no", "maybe"], n) + ) + ] + prompts = prompts[: int(os.environ.get("PROMPTS_LIMIT", str(len(prompts))))] + + openai_client = model.openai_client() + max_steps = int(os.environ.get("NUM_STEPS", "20")) + groups_per_step = int(os.environ.get("GROUPS_PER_STEP", str(len(prompts)))) + rollouts_per_group = int( + os.environ.get( + "ROLLOUTS_PER_GROUP", + os.environ.get("ROLLOUTS_PER_PROMPT", "32"), + ) ) - print( - json.dumps( - { - "event": "eval", - "step": int(await model.get_step()), - "model": model.get_inference_name(), - **initial_eval, - } - ), - flush=True, + packed_sequence_length = int( + os.environ.get( + "PACKED_SEQUENCE_LENGTH", + os.environ.get("MAX_SEQ_LENGTH", "4096"), + ) ) - start_step = await model.get_step() - for offset in range(num_steps): - current_step = start_step + offset - print( - json.dumps( - { - "event": "rollout_start", - "step": current_step, - "model": model.get_inference_name(), - } - ), - flush=True, + for _ in range(start_step, start_step + max_steps): + step_prompts = random.sample( + prompts, + k=min(groups_per_step, len(prompts)), ) train_groups = await art.gather_trajectory_groups( - art.TrajectoryGroup( - rollout( - client, - model, - prompt, - max_tokens=max_tokens, - timeout=timeout, + ( + art.TrajectoryGroup( + rollout(openai_client, model, prompt) + for _ in range(rollouts_per_group) ) - for _ in range(rollouts_per_prompt) + for prompt in step_prompts ) - for prompt in prompts - ) - train_summary = summarize(train_groups) - print( - json.dumps( - { - "event": "train_start", - "step": current_step, - "model": model.get_inference_name(), - **train_summary, - } - ), - flush=True, ) result = await backend.train( model, train_groups, - learning_rate=learning_rate, + learning_rate=1e-4, packed_sequence_length=packed_sequence_length, ) - print( - json.dumps( - { - "event": "train_step", - "step": result.step, - "model": model.get_inference_name(), - **train_summary, - "backend_metrics": result.metrics, - } - ), - flush=True, - ) - - eval_summary = await evaluate( - client, - model, - eval_prompts, - max_tokens=max_tokens, - timeout=timeout, - ) - print( - json.dumps( - { - "event": "eval", - "step": current_step + 1, - "model": model.get_inference_name(), - **eval_summary, - } - ), - flush=True, + await model.log( + train_groups, + metrics=result.metrics, + step=result.step, + split="train", ) finally: await backend.close() diff --git a/src/art/megatron/lora.py b/src/art/megatron/lora.py index 9fba022f2..5795a1951 100644 --- a/src/art/megatron/lora.py +++ b/src/art/megatron/lora.py @@ -1,5 +1,6 @@ from collections.abc import Sequence import math +import os from typing import Any, Literal, cast from megatron.bridge.models.gpt_provider import GPTModelProvider @@ -13,8 +14,10 @@ ) from megatron.core.ssm.gated_delta_net import GatedDeltaNet from megatron.core.tensor_parallel.mappings import ( + gather_from_sequence_parallel_region, reduce_from_tensor_model_parallel_region, reduce_scatter_to_sequence_parallel_region, + scatter_to_sequence_parallel_region, ) from megatron.core.transformer.attention import SelfAttention from megatron.core.transformer.moe.experts import TEGroupedMLP @@ -25,8 +28,8 @@ from .cute_grouped_lora_quack import quack_grouped_lora, quack_grouped_lora_dual -LORA_RANK = 1 -LORA_ALPHA = 32 +LORA_RANK = int(os.environ.get("ART_MEGATRON_LORA_RANK", "1")) +LORA_ALPHA = int(os.environ.get("ART_MEGATRON_LORA_ALPHA", "32")) ShardDomain = Literal["tp", "expert_tp"] GradSyncDomain = Literal["tp_default", "expert_tp"] @@ -98,6 +101,45 @@ def _normalize_axis(axis: int, ndim: int) -> int: return axis +def _match_sequence_parallel_output_shape( + adapter_out: torch.Tensor, + base_out: torch.Tensor, + *, + adapter_model_prefix: str, +) -> torch.Tensor: + if adapter_out.shape == base_out.shape: + return adapter_out + + tp_size = _get_shard_world_size("tp") + if ( + tp_size > 1 + and adapter_out.ndim == base_out.ndim + and adapter_out.shape[0] == base_out.shape[0] * tp_size + and adapter_out.shape[1:] == base_out.shape[1:] + ): + adapter_out = scatter_to_sequence_parallel_region(adapter_out) + if adapter_out.shape == base_out.shape: + return adapter_out + + if ( + tp_size > 1 + and adapter_out.ndim == base_out.ndim + and adapter_out.shape[0] * tp_size == base_out.shape[0] + and adapter_out.shape[1:] == base_out.shape[1:] + ): + adapter_out = gather_from_sequence_parallel_region( + adapter_out, + tensor_parallel_output_grad=True, + ) + if adapter_out.shape == base_out.shape: + return adapter_out + + raise RuntimeError( + f"{adapter_model_prefix}: LoRA adapter output shape {tuple(adapter_out.shape)} " + f"does not match base output shape {tuple(base_out.shape)}" + ) + + def _linear_disables_tensor_parallel_comm(linear: Any) -> bool: # Shared experts can keep TP-sharded weights while deferring TP comm to the # overlap path by setting parallel_mode=None / explicit_expert_comm=True. @@ -725,6 +767,10 @@ def __init__( alpha=alpha, num_local_experts=num_local_experts, ) + # PEFT target-parameter MoE LoRA stores one A matrix for the fused + # gate/up projection. Keep Megatron's gate/up A tied so checkpoints can + # round-trip through the PEFT/vLLM fused format without losing state. + self.up_lora.A_T = self.gate_lora.A_T self.uses_direct_quack_grouped_lora_dual = True @staticmethod @@ -856,6 +902,11 @@ def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor | None]: [self.gate_lora(x), self.up_lora(x)], dim=-1, ) + adapter_out = _match_sequence_parallel_output_shape( + adapter_out, + base_out, + adapter_model_prefix=self.gate_lora.adapter_model_prefix.rsplit(".", 1)[0], + ) return base_out + adapter_out, bias_out diff --git a/src/art/megatron/merge.py b/src/art/megatron/merge.py index a77c22cf3..147f14bad 100644 --- a/src/art/megatron/merge.py +++ b/src/art/megatron/merge.py @@ -5,6 +5,10 @@ import torch +from art.utils.convert_megatron_moe_lora import ( + convert_megatron_moe_lora_to_peft_target_parameter, +) + safetensors = importlib.import_module("safetensors") safetensors_torch = importlib.import_module("safetensors.torch") safe_open = safetensors.safe_open @@ -124,6 +128,7 @@ def merge_lora_adapter(lora_path: str) -> None: return adapter_model_path = base_dir / "adapter_model.safetensors" + adapter_model = convert_megatron_moe_lora_to_peft_target_parameter(adapter_model) save_file(adapter_model, adapter_model_path) for filename in shard_filenames: filename.unlink() diff --git a/src/art/megatron/service.py b/src/art/megatron/service.py index 7eebb5c83..8544a8831 100644 --- a/src/art/megatron/service.py +++ b/src/art/megatron/service.py @@ -27,9 +27,6 @@ from ..preprocessing.pack import DiskPackedTensors from ..preprocessing.tokenize import SFTBatch from ..unsloth.service import do_sleep, do_wake_up, gc_and_empty_cuda_cache -from ..utils.convert_megatron_moe_lora import ( - convert_checkpoint_to_megatron_moe_lora_if_needed, -) from ..utils.get_model_step import get_step_from_dir from ..utils.network import find_free_tcp_port from ..utils.output_dirs import get_step_checkpoint_dir @@ -59,10 +56,9 @@ def create_identity_lora( ) -> None: """Create an identity LoRA adapter for a Megatron model. - PEFT saves MoE expert LoRA for target_parameters in a fused format, while - ART's Megatron loader currently expects per-expert gate/up/down LoRA keys. - Long term, we can teach Megatron's LoRA loader to accept PEFT fused - target_parameters directly, then delete convert_megatron_moe_lora.py entirely. + PEFT saves MoE expert LoRA for target_parameters in the fused format that + vLLM expects. ART's Megatron runtime converts that format in memory before + loading adapters into Megatron. Args: base_model: HuggingFace model identifier. @@ -135,16 +131,6 @@ def _skip_meta_to( os.makedirs(lora_path, exist_ok=True) peft_model.save_pretrained(lora_path) - convert_checkpoint_to_megatron_moe_lora_if_needed(lora_path) - - # Write final adapter_config with per-expert target_modules - LoraConfig( - base_model_name_or_path=base_model, - r=rank, - lora_alpha=lora_alpha, - target_modules=default_target_modules(base_model), - bias="none", - ).save_pretrained(lora_path) del peft_model, model if torch.cuda.is_available(): @@ -224,10 +210,11 @@ def _get_optimizer_state_path(self, job_type: Literal["rl", "sft"]) -> str: return optimizer_state_path def _default_lora_adapter_config(self) -> LoraConfig: + peft_args = self.config.get("peft_args", {}) return LoraConfig( base_model_name_or_path=self.base_model, - r=LORA_RANK, - lora_alpha=LORA_ALPHA, + r=int(peft_args.get("r", LORA_RANK)), + lora_alpha=int(peft_args.get("lora_alpha", LORA_ALPHA)), target_modules=default_target_modules(self.base_model), bias="none", ) @@ -247,9 +234,12 @@ def _adapter_has_weights(self, lora_path: str) -> bool: return False def _create_identity_lora(self, lora_path: str) -> None: + peft_args = self.config.get("peft_args", {}) create_identity_lora( self.base_model, lora_path, + rank=int(peft_args.get("r", LORA_RANK)), + lora_alpha=int(peft_args.get("lora_alpha", LORA_ALPHA)), random_state=self._megatron_random_state(), ) @@ -541,6 +531,13 @@ async def _ensure_megatron_running(self) -> None: random_state = self._megatron_random_state() if random_state is not None: launch_env["ART_MEGATRON_RANDOM_STATE"] = str(random_state) + peft_args = self.config.get("peft_args", {}) + launch_env["ART_MEGATRON_LORA_RANK"] = str( + int(peft_args.get("r", LORA_RANK)) + ) + launch_env["ART_MEGATRON_LORA_ALPHA"] = str( + int(peft_args.get("lora_alpha", LORA_ALPHA)) + ) command = ( f"{setup_cmd}uv run --project {shlex.quote(str(project_root))} " diff --git a/src/art/megatron/train.py b/src/art/megatron/train.py index 6967c6724..2c213a51b 100644 --- a/src/art/megatron/train.py +++ b/src/art/megatron/train.py @@ -80,6 +80,9 @@ PackedTensors, packed_tensors_from_dir, ) +from art.utils.convert_megatron_moe_lora import ( + convert_peft_target_parameter_moe_lora_to_megatron, +) safetensors = importlib.import_module("safetensors") safetensors_torch = importlib.import_module("safetensors.torch") @@ -774,7 +777,7 @@ def _load_lora_and_optimizer( optimizer_state_path: str, ) -> dict[str, torch.Tensor]: print0(runtime.rank, "Loading adapter model from", lora_path) - adapter_model = load_lora_adapter_state_dict(lora_path) + adapter_model = _load_megatron_adapter_state_dict(lora_path) load_adapter_into_model(runtime.model, adapter_model) runtime.optimizer = _build_optimizer( runtime.model, @@ -800,6 +803,26 @@ def _load_lora_and_optimizer( return adapter_model +def _load_lora_rank(lora_path: str) -> int: + config_path = os.path.join(lora_path, "adapter_config.json") + if not os.path.exists(config_path): + return 1 + with open(config_path) as f: + adapter_config = json.load(f) + rank = adapter_config.get("r", 1) + if not isinstance(rank, int) or rank <= 0: + raise ValueError(f"Invalid LoRA rank in {config_path}: {rank!r}") + return rank + + +def _load_megatron_adapter_state_dict(lora_path: str) -> dict[str, torch.Tensor]: + adapter_model = load_lora_adapter_state_dict(lora_path) + return convert_peft_target_parameter_moe_lora_to_megatron( + adapter_model, + rank=_load_lora_rank(lora_path), + ) + + def _save_lora_and_optimizer( runtime: TrainingRuntime, *, @@ -932,10 +955,15 @@ def maybe_load_adapter_into_model( print0(rank, "No adapter model found at", adapter_model_path) return {} print0(rank, "Loading adapter model from", adapter_model_path) + lora_path = os.path.dirname(adapter_model_path) with safe_open(adapter_model_path, framework="pt") as adapter_file: adapter_model = { key: adapter_file.get_tensor(key) for key in adapter_file.keys() } + adapter_model = convert_peft_target_parameter_moe_lora_to_megatron( + adapter_model, + rank=_load_lora_rank(lora_path), + ) load_adapter_into_model(model_chunks, adapter_model, optimizer) return adapter_model diff --git a/src/art/preprocessing/tokenize.py b/src/art/preprocessing/tokenize.py index fb3e80115..230c980ac 100644 --- a/src/art/preprocessing/tokenize.py +++ b/src/art/preprocessing/tokenize.py @@ -251,12 +251,16 @@ def tokenize_trajectory( # mapping here instead of the OpenAI JSON string. messages = _normalize_tool_call_arguments_for_chat_template(tokenizer, messages) tools = _normalize_tools_for_chat_template(history.tools) + # These real-completion renders are only used for debug text and sentinel + # selection. Use a closed final message here because some chat templates + # normalize generated blocks, which can make Transformers reject + # continue_final_message before we reach the trainable sentinel render below. chat = cast( str, tokenizer.apply_chat_template( messages, tools=tools, - continue_final_message=True, + continue_final_message=False, tokenize=False, **(chat_template_kwargs or {}), ), @@ -266,7 +270,7 @@ def tokenize_trajectory( tokenizer.apply_chat_template( messages, tools=tools, - continue_final_message=True, + continue_final_message=False, return_dict=False, **(chat_template_kwargs or {}), ), diff --git a/src/art/utils/convert_megatron_moe_lora.py b/src/art/utils/convert_megatron_moe_lora.py index b5808055c..a515655b9 100644 --- a/src/art/utils/convert_megatron_moe_lora.py +++ b/src/art/utils/convert_megatron_moe_lora.py @@ -1,4 +1,4 @@ -"""Convert PEFT target-parameter MoE LoRA to ART Megatron per-expert LoRA. +"""Convert between PEFT target-parameter and ART Megatron MoE LoRA tensors. PEFT saves LoRA for fused MoE expert parameters as tensors under: mlp.experts.base_layer.lora_* (gate_up_proj) @@ -9,16 +9,17 @@ mlp.experts.0.up_proj.lora_A.weight mlp.experts.0.down_proj.lora_A.weight +Checkpoints stay in the PEFT fused format on disk because vLLM expects that +layout. Megatron converts to per-expert keys in memory while loading, then the +Megatron shard merger converts trained tensors back before writing the final +adapter_model.safetensors. + TODO: Teach Megatron's LoRA loader to accept PEFT fused target_parameters directly, then delete this converter entirely. """ -import json -import os import re -from typing import Any -import safetensors.torch import torch _FUSED_EXPERT_PATTERN = re.compile( @@ -26,18 +27,12 @@ r"(?Pbase_layer\.)?" r"(?Plora_[AB])\.weight$" ) - - -def _has_peft_target_parameter_moe_lora(tensors: dict[str, torch.Tensor]) -> bool: - """Check whether the adapter contains PEFT fused target-parameter MoE LoRA.""" - return any(_FUSED_EXPERT_PATTERN.search(key) for key in tensors) - - -def _rank_from_adapter_config(adapter_config: dict[str, Any]) -> int: - rank = adapter_config.get("r", adapter_config.get("lora_rank", 8)) - if not isinstance(rank, int) or rank <= 0: - raise ValueError(f"Invalid LoRA rank in adapter_config: {rank!r}") - return rank +_MEGATRON_EXPERT_PATTERN = re.compile( + r"(?P.*\.mlp\.experts)\." + r"(?P\d+)\." + r"(?Pgate_proj|up_proj|down_proj)\." + r"(?Plora_[AB])\.weight$" +) def _reshape_expert_a( @@ -74,7 +69,7 @@ def _reshape_expert_b( f"{key}: second dimension {num_experts_times_rank} does not match " f"num_experts * rank ({expected})" ) - return tensor.reshape(out_features, num_experts, rank).permute(1, 0, 2) + return tensor.reshape(out_features, rank, num_experts).permute(2, 0, 1) def _convert_gate_up_lora( @@ -199,35 +194,127 @@ def convert_peft_target_parameter_moe_lora_to_megatron( return converted -def convert_checkpoint_to_megatron_moe_lora_if_needed(checkpoint_dir: str) -> None: - """Convert a PEFT MoE target-parameter adapter to Megatron format if needed.""" - adapter_path = os.path.join(checkpoint_dir, "adapter_model.safetensors") - config_path = os.path.join(checkpoint_dir, "adapter_config.json") +def _stack_expert_tensors( + prefix: str, + projection_tensors: dict[int, torch.Tensor], + *, + projection: str, + lora: str, +) -> torch.Tensor: + expert_ids = sorted(projection_tensors) + if expert_ids != list(range(len(expert_ids))): + raise ValueError( + f"{prefix}.{projection}.{lora}: expected contiguous expert ids, got " + f"{expert_ids}" + ) + tensors = [projection_tensors[expert_id] for expert_id in expert_ids] + first_shape = tensors[0].shape + for expert_id, tensor in zip(expert_ids, tensors): + if tensor.shape != first_shape: + raise ValueError( + f"{prefix}.{expert_id}.{projection}.{lora}: expected shape " + f"{first_shape}, got {tensor.shape}" + ) + return torch.stack(tensors) - if not os.path.exists(adapter_path) or not os.path.exists(config_path): - return - tensors = safetensors.torch.load_file(adapter_path) - if not _has_peft_target_parameter_moe_lora(tensors): - return +def _flatten_expert_a(per_expert_a: torch.Tensor) -> torch.Tensor: + num_experts, rank, in_features = per_expert_a.shape + return per_expert_a.reshape(num_experts * rank, in_features).contiguous() - with open(config_path) as f: - adapter_config = json.load(f) - rank = _rank_from_adapter_config(adapter_config) - converted = convert_peft_target_parameter_moe_lora_to_megatron( - tensors, - rank=rank, +def _flatten_expert_b(per_expert_b: torch.Tensor) -> torch.Tensor: + num_experts, out_features, rank = per_expert_b.shape + return ( + per_expert_b.permute(1, 2, 0) + .reshape(out_features, num_experts * rank) + .contiguous() ) - safetensors.torch.save_file(converted, adapter_path) - adapter_config["target_modules"] = [ - module - for module in adapter_config.get("target_modules", []) - if "experts" not in module - ] + ["gate_proj", "up_proj", "down_proj"] - adapter_config.pop("target_parameters", None) +def convert_megatron_moe_lora_to_peft_target_parameter( + tensors: dict[str, torch.Tensor], +) -> dict[str, torch.Tensor]: + """Convert ART Megatron per-expert MoE LoRA tensors to PEFT fused keys.""" + converted: dict[str, torch.Tensor] = {} + grouped: dict[ + str, + dict[str, dict[str, dict[int, torch.Tensor]]], + ] = {} + + for key, tensor in tensors.items(): + match = _MEGATRON_EXPERT_PATTERN.match(key) + if match is None: + converted[key] = tensor + continue + prefix = match.group("prefix") + projection = match.group("projection") + lora = match.group("lora") + expert_id = int(match.group("expert")) + grouped.setdefault(prefix, {}).setdefault(projection, {}).setdefault(lora, {})[ + expert_id + ] = tensor + + for prefix, projections in grouped.items(): + required = { + "gate_proj": {"lora_A", "lora_B"}, + "up_proj": {"lora_A", "lora_B"}, + "down_proj": {"lora_A", "lora_B"}, + } + for projection, loras in required.items(): + missing_loras = loras - set(projections.get(projection, {})) + if missing_loras: + raise ValueError( + f"{prefix}.{projection}: missing {sorted(missing_loras)}" + ) + + gate_a = _stack_expert_tensors( + prefix, + projections["gate_proj"]["lora_A"], + projection="gate_proj", + lora="lora_A", + ) + up_a = _stack_expert_tensors( + prefix, + projections["up_proj"]["lora_A"], + projection="up_proj", + lora="lora_A", + ) + if not torch.equal(gate_a, up_a): + raise ValueError( + f"{prefix}: cannot convert Megatron gate/up LoRA to PEFT " + "target_parameters because gate_proj.lora_A and up_proj.lora_A differ" + ) + gate_b = _stack_expert_tensors( + prefix, + projections["gate_proj"]["lora_B"], + projection="gate_proj", + lora="lora_B", + ) + up_b = _stack_expert_tensors( + prefix, + projections["up_proj"]["lora_B"], + projection="up_proj", + lora="lora_B", + ) + down_a = _stack_expert_tensors( + prefix, + projections["down_proj"]["lora_A"], + projection="down_proj", + lora="lora_A", + ) + down_b = _stack_expert_tensors( + prefix, + projections["down_proj"]["lora_B"], + projection="down_proj", + lora="lora_B", + ) + + converted[f"{prefix}.base_layer.lora_A.weight"] = _flatten_expert_a(gate_a) + converted[f"{prefix}.base_layer.lora_B.weight"] = _flatten_expert_b( + torch.cat([gate_b, up_b], dim=1) + ) + converted[f"{prefix}.lora_A.weight"] = _flatten_expert_a(down_a) + converted[f"{prefix}.lora_B.weight"] = _flatten_expert_b(down_b) - with open(config_path, "w") as f: - json.dump(adapter_config, f, indent=2) + return converted diff --git a/src/art/vllm/dedicated_server.py b/src/art/vllm/dedicated_server.py index 47921be6b..12fe1257a 100644 --- a/src/art/vllm/dedicated_server.py +++ b/src/art/vllm/dedicated_server.py @@ -42,7 +42,6 @@ def _patch_art_dedicated_routes() -> None: from fastapi import APIRouter, FastAPI, Request from fastapi.responses import JSONResponse from vllm.entrypoints.openai import api_server - from vllm.tasks import SupportedTask if getattr(api_server, "_art_dedicated_routes_patched", False): return @@ -51,9 +50,13 @@ def _patch_art_dedicated_routes() -> None: def art_build_app( args: argparse.Namespace, - supported_tasks: tuple[SupportedTask, ...] | None = None, + supported_tasks: object | None = None, + model_config: object | None = None, ) -> FastAPI: - app = original_build_app(args, supported_tasks) + if model_config is None: + app = original_build_app(args, supported_tasks) + else: + app = original_build_app(args, supported_tasks, model_config) router = APIRouter() @router.post("/art/set_served_model_name") diff --git a/src/art/vllm/engine.py b/src/art/vllm/engine.py index c8da5c55b..53e7a76a3 100644 --- a/src/art/vllm/engine.py +++ b/src/art/vllm/engine.py @@ -26,10 +26,10 @@ async def get_llm(args: vllm.AsyncEngineArgs) -> AsyncLLM: # ty:ignore[unresolv """ # Download model only if it's not a local path if not os.path.exists(args.model): - process = await asyncio.create_subprocess_shell( - f"HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download {args.model}" - ) - await process.wait() + from huggingface_hub import snapshot_download + + os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") + await asyncio.to_thread(snapshot_download, repo_id=args.model) llm = AsyncLLM.from_engine_args( replace( diff --git a/tests/unit/test_convert_megatron_moe_lora.py b/tests/unit/test_convert_megatron_moe_lora.py new file mode 100644 index 000000000..50cbc6a60 --- /dev/null +++ b/tests/unit/test_convert_megatron_moe_lora.py @@ -0,0 +1,74 @@ +import torch + +from art.utils.convert_megatron_moe_lora import ( + convert_megatron_moe_lora_to_peft_target_parameter, + convert_peft_target_parameter_moe_lora_to_megatron, +) + + +def _peft_tensors(prefix: str) -> dict[str, torch.Tensor]: + num_experts = 2 + rank = 2 + hidden_size = 3 + intermediate_size = 4 + return { + f"{prefix}.base_layer.lora_A.weight": torch.arange( + num_experts * rank * hidden_size + ).reshape(num_experts * rank, hidden_size), + f"{prefix}.base_layer.lora_B.weight": torch.arange( + 100, + 100 + 2 * intermediate_size * num_experts * rank, + ).reshape(2 * intermediate_size, num_experts * rank), + f"{prefix}.lora_A.weight": torch.arange( + 200, + 200 + num_experts * rank * intermediate_size, + ).reshape(num_experts * rank, intermediate_size), + f"{prefix}.lora_B.weight": torch.arange( + 300, + 300 + hidden_size * num_experts * rank, + ).reshape(hidden_size, num_experts * rank), + } + + +def test_convert_peft_target_parameter_moe_lora_to_megatron_round_trips() -> None: + prefix = "base_model.model.model.layers.0.mlp.experts" + original = _peft_tensors(prefix) + original["base_model.model.model.layers.0.self_attn.q_proj.lora_A.weight"] = ( + torch.ones(1) + ) + + megatron = convert_peft_target_parameter_moe_lora_to_megatron( + original, + rank=2, + ) + converted = convert_megatron_moe_lora_to_peft_target_parameter(megatron) + + assert set(converted) == set(original) + for key, tensor in original.items(): + assert torch.equal(converted[key], tensor) + + +def test_convert_peft_target_parameter_moe_lora_uses_rank_major_b_layout() -> None: + prefix = "base_model.model.model.layers.0.mlp.experts" + original = _peft_tensors(prefix) + + megatron = convert_peft_target_parameter_moe_lora_to_megatron( + original, + rank=2, + ) + + gate_up_b = original[f"{prefix}.base_layer.lora_B.weight"] + down_b = original[f"{prefix}.lora_B.weight"] + + assert torch.equal( + megatron[f"{prefix}.1.gate_proj.lora_B.weight"], + gate_up_b.reshape(8, 2, 2).permute(2, 0, 1)[1, :4], + ) + assert torch.equal( + megatron[f"{prefix}.1.up_proj.lora_B.weight"], + gate_up_b.reshape(8, 2, 2).permute(2, 0, 1)[1, 4:], + ) + assert torch.equal( + megatron[f"{prefix}.1.down_proj.lora_B.weight"], + down_b.reshape(3, 2, 2).permute(2, 0, 1)[1], + ) diff --git a/tests/unit/test_megatron_dedicated.py b/tests/unit/test_megatron_dedicated.py index f572ebc7e..ae2016ab7 100644 --- a/tests/unit/test_megatron_dedicated.py +++ b/tests/unit/test_megatron_dedicated.py @@ -215,11 +215,6 @@ def fake_model_from_config(config: Any, **_kwargs: Any) -> FakeModel: seen.setdefault("lora_config", lora_config) or FakePeftModel() ), ) - monkeypatch.setattr( - "art.megatron.service.convert_checkpoint_to_megatron_moe_lora_if_needed", - lambda _path: None, - ) - create_identity_lora("Qwen/Qwen3.5-35B-A3B", str(tmp_path)) assert seen["config"] is top_level_config.text_config @@ -384,6 +379,11 @@ async def test_megatron_service_start_openai_server_shared_lora_bootstraps_step_ "art.megatron.service.dev.get_openai_server_config", lambda **_kwargs: {"server_args": {"port": 8123}, "engine_args": {}}, ) + + async def fake_get_llm(_args: Any) -> object: + return object() + + monkeypatch.setattr("art.megatron.service.get_llm", fake_get_llm) monkeypatch.setattr( "art.megatron.service.openai_server_task", lambda **_kwargs: asyncio.sleep(0), diff --git a/tests/unit/test_megatron_qwen_helpers.py b/tests/unit/test_megatron_qwen_helpers.py index 0c0b77829..cba833983 100644 --- a/tests/unit/test_megatron_qwen_helpers.py +++ b/tests/unit/test_megatron_qwen_helpers.py @@ -7,6 +7,7 @@ import torch +from art.megatron import lora as megatron_lora from art.megatron.lora import SelfAttentionLinearQKVLoRA from art.megatron.train import _canonical_art_param_name @@ -57,3 +58,64 @@ def test_self_attention_linear_qkv_lora_accepts_nongated_qwen3_layout( assert wrapped.attention_output_gate is False assert wrapped.q_proj_lora.B_T.shape[-1] == q_out_features + + +def test_match_sequence_parallel_output_shape_scatters_first_dim( + monkeypatch: pytest.MonkeyPatch, +) -> None: + adapter_out = torch.arange(12).reshape(4, 3) + base_out = torch.empty(2, 3) + scatter_calls: list[torch.Tensor] = [] + + def fake_scatter(tensor: torch.Tensor) -> torch.Tensor: + scatter_calls.append(tensor) + return tensor[:2].contiguous() + + monkeypatch.setattr(megatron_lora, "_get_shard_world_size", lambda _domain: 2) + monkeypatch.setattr( + megatron_lora, + "scatter_to_sequence_parallel_region", + fake_scatter, + ) + + result = megatron_lora._match_sequence_parallel_output_shape( + adapter_out, + base_out, + adapter_model_prefix="model.layers.0.mlp.shared_expert", + ) + + assert scatter_calls == [adapter_out] + assert result.shape == base_out.shape + assert torch.equal(result, adapter_out[:2]) + + +def test_match_sequence_parallel_output_shape_gathers_first_dim( + monkeypatch: pytest.MonkeyPatch, +) -> None: + adapter_out = torch.arange(6).reshape(2, 3) + base_out = torch.empty(4, 3) + gather_calls: list[tuple[torch.Tensor, bool]] = [] + + def fake_gather( + tensor: torch.Tensor, + tensor_parallel_output_grad: bool = True, + ) -> torch.Tensor: + gather_calls.append((tensor, tensor_parallel_output_grad)) + return torch.cat([tensor, tensor], dim=0) + + monkeypatch.setattr(megatron_lora, "_get_shard_world_size", lambda _domain: 2) + monkeypatch.setattr( + megatron_lora, + "gather_from_sequence_parallel_region", + fake_gather, + ) + + result = megatron_lora._match_sequence_parallel_output_shape( + adapter_out, + base_out, + adapter_model_prefix="model.layers.0.mlp.shared_expert", + ) + + assert gather_calls == [(adapter_out, True)] + assert result.shape == base_out.shape + assert torch.equal(result, torch.cat([adapter_out, adapter_out], dim=0)) diff --git a/tests/unit/test_preprocessing_tokenize.py b/tests/unit/test_preprocessing_tokenize.py index 2a913e260..724d98cca 100644 --- a/tests/unit/test_preprocessing_tokenize.py +++ b/tests/unit/test_preprocessing_tokenize.py @@ -91,6 +91,31 @@ def apply_chat_template( ) +class _ContinueFinalMessageRejectingTokenizer(_FakeTokenizer): + def apply_chat_template( + self, + messages, + tools=None, + tokenize=True, + return_dict=None, + **kwargs, + ): + if kwargs.get("continue_final_message") is True and messages[-1].get( + "content", "" + ).startswith(""): + raise ValueError( + "continue_final_message is set but the final message does not appear " + "in the chat after applying the chat template!" + ) + return super().apply_chat_template( + messages, + tools=tools, + tokenize=tokenize, + return_dict=return_dict, + **kwargs, + ) + + def test_tokenize_trajectory_requests_list_chat_template_output() -> None: tokenizer = _FakeTokenizer() messages = cast( @@ -154,6 +179,78 @@ def test_tokenize_trajectory_passes_chat_template_kwargs() -> None: ) +def test_tokenize_trajectory_does_not_continue_real_completion_with_thinking() -> None: + tokenizer = _ContinueFinalMessageRejectingTokenizer() + choice = Choice.model_validate( + { + "finish_reason": "stop", + "index": 0, + "logprobs": { + "content": [ + { + "token": "token_id:79", + "bytes": [79], + "logprob": -0.1, + "top_logprobs": [], + }, + { + "token": "token_id:75", + "bytes": [75], + "logprob": -0.2, + "top_logprobs": [], + }, + ], + "refusal": None, + }, + "message": { + "content": "\n reasoning \n\n\nOK", + "refusal": None, + "role": "assistant", + "annotations": None, + "audio": None, + "function_call": None, + "tool_calls": None, + }, + } + ) + messages = cast( + MessagesAndChoices, + [ + {"role": "user", "content": "Hi"}, + choice, + ], + ) + history = History(messages_and_choices=messages) + trajectory = Trajectory(messages_and_choices=messages, reward=1.0) + + result = tokenize_trajectory( + tokenizer=tokenizer, # type: ignore[arg-type] + image_processor=None, + history=history, + advantage=1.0, + allow_training_without_logprobs=False, + trajectory=trajectory, + chat_template_kwargs={ + "enable_thinking": False, + "preserve_thinking": True, + }, + ) + + assert result is not None + assistant_ids = [ + token_id + for token_id, mask in zip(result.token_ids, result.assistant_mask) + if mask + ] + assert assistant_ids == [79, 75] + continue_values = [ + call.get("continue_final_message") + for call in tokenizer.apply_chat_template_kwargs + ] + assert continue_values[:2] == [False, False] + assert continue_values[-1] is True + + def test_tokenize_sft_batch_requests_list_chat_template_output( monkeypatch: pytest.MonkeyPatch, ) -> None: From daa423fb1a07eb16af9135c23639b347fc4cc497 Mon Sep 17 00:00:00 2001 From: Kovbo Date: Thu, 30 Apr 2026 21:47:25 +0000 Subject: [PATCH 06/13] fork unsloth to resolve new TRL and transformers --- pyproject.toml | 5 ++--- uv.lock | 60 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a0eba8a9b..97436d5e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,8 +22,8 @@ backend = [ "peft>=0.18.0", "hf-xet>=1.1.0", "bitsandbytes>=0.45.5", - "unsloth==2026.4.8", - "unsloth-zoo==2026.4.9", + "unsloth[colab-no-deps,huggingface] @ git+https://github.com/Kovbo/unsloth.git@140026a39cfce9ee8702c46140d4c2d4846d1eb6", + "unsloth-zoo @ git+https://github.com/Kovbo/unsloth-zoo.git@68677b9814e90ccb647cbda91062e0d4001798bd", "torch==2.10.0", "torchao==0.17.0", "accelerate==1.13.0", @@ -138,7 +138,6 @@ override-dependencies = [ "nvidia-resiliency-ext<0.5", "quack-kernels>=0.2.7", "transformers==5.6.2", - "trl==1.3.0", "transformer-engine==2.11.0", ] exclude-dependencies = ["pynvml", "emerging-optimizers"] diff --git a/uv.lock b/uv.lock index 2a7538edb..8edeccaa8 100644 --- a/uv.lock +++ b/uv.lock @@ -29,7 +29,6 @@ overrides = [ { name = "quack-kernels", specifier = ">=0.2.7" }, { name = "transformer-engine", specifier = "==2.11.0" }, { name = "transformers", specifier = "==5.6.2" }, - { name = "trl", specifier = "==1.3.0" }, ] excludes = [ "emerging-optimizers", @@ -5666,7 +5665,7 @@ backend = [ { name = "torchao" }, { name = "transformers" }, { name = "trl" }, - { name = "unsloth" }, + { name = "unsloth", extra = ["colab-no-deps", "huggingface"] }, { name = "unsloth-zoo" }, { name = "vllm", marker = "sys_platform == 'linux'" }, { name = "wandb" }, @@ -5787,8 +5786,8 @@ requires-dist = [ { name = "transformers", marker = "extra == 'tinker'", specifier = "==5.6.2" }, { name = "trl", marker = "extra == 'backend'", specifier = "==1.3.0" }, { name = "typer", specifier = ">=0.15.2" }, - { name = "unsloth", marker = "extra == 'backend'", specifier = "==2026.4.8" }, - { name = "unsloth-zoo", marker = "extra == 'backend'", specifier = "==2026.4.9" }, + { name = "unsloth", extras = ["colab-no-deps", "huggingface"], marker = "extra == 'backend'", git = "https://github.com/Kovbo/unsloth.git?rev=140026a39cfce9ee8702c46140d4c2d4846d1eb6" }, + { name = "unsloth-zoo", marker = "extra == 'backend'", git = "https://github.com/Kovbo/unsloth-zoo.git?rev=68677b9814e90ccb647cbda91062e0d4001798bd" }, { name = "uvicorn", marker = "extra == 'tinker'", specifier = ">=0.35.0" }, { name = "vllm", marker = "sys_platform == 'linux' and extra == 'backend'", specifier = "==0.19.1" }, { name = "wandb", marker = "extra == 'backend'", specifier = "==0.25.0" }, @@ -8341,6 +8340,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/23/8146aad7d88f4fcb3a6218f41a60f6c2d4e3a72de72da1825dc7c8f7877c/semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177", size = 15552, upload-time = "2022-05-26T13:35:21.206Z" }, ] +[[package]] +name = "sentence-transformers" +version = "5.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "scikit-learn" }, + { name = "scipy" }, + { name = "torch" }, + { name = "tqdm" }, + { name = "transformers" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/68/7f98c221940ce783b492ad6140384daf2e2918cd7175009d6a362c22b9ee/sentence_transformers-5.4.1.tar.gz", hash = "sha256:436bcb1182a0ff42a8fb2b1c43498a70d0a75b688d182f2cd0d1dd115af61ddc", size = 428910, upload-time = "2026-04-14T13:34:59.006Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/d9/3a9b6f2ccdedc9dc00fe37b2fc58f58f8efbff44565cf4bf39d8568bb13a/sentence_transformers-5.4.1-py3-none-any.whl", hash = "sha256:a6d640fc363849b63affb8e140e9d328feabab86f83d58ac3e16b1c28140b790", size = 571311, upload-time = "2026-04-14T13:34:57.731Z" }, +] + [[package]] name = "sentencepiece" version = "0.2.1" @@ -9662,45 +9680,51 @@ wheels = [ [[package]] name = "unsloth" version = "2026.4.8" -source = { registry = "https://pypi.org/simple" } +source = { git = "https://github.com/Kovbo/unsloth.git?rev=140026a39cfce9ee8702c46140d4c2d4846d1eb6#140026a39cfce9ee8702c46140d4c2d4846d1eb6" } dependencies = [ + { name = "nest-asyncio" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "typer" }, +] + +[package.optional-dependencies] +colab-no-deps = [ { name = "accelerate" }, { name = "bitsandbytes" }, + { name = "peft" }, + { name = "protobuf" }, + { name = "trl" }, + { name = "xformers", marker = "(platform_machine == 'AMD64' and 'linux' in sys_platform) or (platform_machine == 'x86_64' and 'linux' in sys_platform) or (platform_machine == 'AMD64' and sys_platform == 'win32') or (platform_machine == 'x86_64' and sys_platform == 'win32')" }, +] +huggingface = [ + { name = "accelerate" }, { name = "datasets" }, { name = "diffusers" }, { name = "hf-transfer" }, { name = "huggingface-hub" }, - { name = "nest-asyncio" }, { name = "numpy" }, { name = "packaging" }, { name = "peft" }, { name = "protobuf" }, { name = "psutil" }, - { name = "pydantic" }, - { name = "pyyaml" }, + { name = "sentence-transformers" }, { name = "sentencepiece" }, - { name = "torch" }, { name = "torchvision" }, { name = "tqdm" }, { name = "transformers" }, { name = "triton", marker = "'linux' in sys_platform" }, { name = "triton-windows", marker = "(platform_machine == 'AMD64' and sys_platform == 'win32') or (platform_machine == 'x86_64' and sys_platform == 'win32')" }, { name = "trl" }, - { name = "typer" }, { name = "tyro" }, { name = "unsloth-zoo" }, { name = "wheel" }, - { name = "xformers", marker = "(platform_machine == 'AMD64' and 'linux' in sys_platform) or (platform_machine == 'x86_64' and 'linux' in sys_platform) or (platform_machine == 'AMD64' and sys_platform == 'win32') or (platform_machine == 'x86_64' and sys_platform == 'win32')" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1f/c1/0331327cc0537da282a11eee4ed2b98fd1a527772a1d7b87336b0fd0d709/unsloth-2026.4.8.tar.gz", hash = "sha256:57f1278d117b5fedc38eaf6f59aab6578c30a2c47e2f34e9049a366aa1a2cb28", size = 71199823, upload-time = "2026-04-23T14:05:14.602Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/9d/00445c8134207de8be674efd219f11944af37d10a2544f5c044a143e7583/unsloth-2026.4.8-py3-none-any.whl", hash = "sha256:f1992e0be0c79a702fc7dbe7b0f2b48e427602fa101754bd56320ad55571aba0", size = 67048222, upload-time = "2026-04-23T14:05:08.981Z" }, ] [[package]] name = "unsloth-zoo" version = "2026.4.9" -source = { registry = "https://pypi.org/simple" } +source = { git = "https://github.com/Kovbo/unsloth-zoo.git?rev=68677b9814e90ccb647cbda91062e0d4001798bd#68677b9814e90ccb647cbda91062e0d4001798bd" } dependencies = [ { name = "accelerate" }, { name = "cut-cross-entropy" }, @@ -9727,10 +9751,6 @@ dependencies = [ { name = "tyro" }, { name = "wheel" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/17/47/837e40c08542466f4aa3489bdf303f6abeec10fe5ace430102b36fdceb16/unsloth_zoo-2026.4.9.tar.gz", hash = "sha256:074e3b84bffa61c08ba1292822fd34ee243ac83f54732dce37b877fa34eb3109", size = 389589, upload-time = "2026-04-22T16:17:21.186Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/46/2e/5528072e1eefeddd16adc0173e1a41e72585a4770507ab7f52313bbc3435/unsloth_zoo-2026.4.9-py3-none-any.whl", hash = "sha256:782bad1d34db3fc5686b33c0073708fbc3477eca1f427412bc299f48fbdf44e3", size = 421927, upload-time = "2026-04-22T16:17:18.726Z" }, -] [[package]] name = "uritemplate" From 41f568604e071a9eccf2326be190a313bb5145da Mon Sep 17 00:00:00 2001 From: Kovbo Date: Thu, 30 Apr 2026 22:06:17 +0000 Subject: [PATCH 07/13] refactor --- requirements/backend.vcs.txt | 4 ++-- src/art/local/backend.py | 18 ++++++---------- src/art/preprocessing/tokenize.py | 35 ++++++++++++++----------------- 3 files changed, 24 insertions(+), 33 deletions(-) diff --git a/requirements/backend.vcs.txt b/requirements/backend.vcs.txt index 6fd0a7507..a9cba0079 100644 --- a/requirements/backend.vcs.txt +++ b/requirements/backend.vcs.txt @@ -7,5 +7,5 @@ # Torchtune pinned to known-good commit torchtune @ git+https://github.com/pytorch/torchtune.git@2344509cf83bd886538fe3e8263e5145d1afb5c2 -# Unsloth Zoo pinned to known-good commit - unsloth-zoo @ git+https://github.com/bradhilton/unsloth-zoo@f536ee6f554c11dea3ae142e05ab487c206c5c70 +# Unsloth Zoo fork pinned to known-good Transformers 5 commit + unsloth-zoo @ git+https://github.com/Kovbo/unsloth-zoo.git@68677b9814e90ccb647cbda91062e0d4001798bd diff --git a/src/art/local/backend.py b/src/art/local/backend.py index cfa1d89e4..823c3aead 100644 --- a/src/art/local/backend.py +++ b/src/art/local/backend.py @@ -342,9 +342,8 @@ def _get_packed_tensors( except Exception: self._image_processors[model.base_model] = None tokenizer = self._tokenizers[model.base_model] - chat_template_kwargs = ( - model._internal_config or dev.InternalModelConfig() - ).get("chat_template_kwargs") + internal_config = cast(dev.InternalModelConfig, model._internal_config or {}) + chat_template_kwargs = internal_config.get("chat_template_kwargs") tokenized_results = list( tokenize_trajectory_groups( tokenizer, @@ -358,9 +357,7 @@ def _get_packed_tensors( if not tokenized_results: return None model_max_sequence_length = ( - (model._internal_config or dev.InternalModelConfig()) - .get("init_args", {}) - .get("max_seq_length", 32_768) + internal_config.get("init_args", {}).get("max_seq_length", 32_768) ) if packed_sequence_length is None: assert not self._requires_explicit_packed_sequence_length, ( @@ -961,18 +958,15 @@ async def _train_sft( instruction_part, response_part = get_instruction_response_parts( model.base_model, tokenizer ) - chat_template_kwargs = ( - model._internal_config or dev.InternalModelConfig() - ).get("chat_template_kwargs") + internal_config = cast(dev.InternalModelConfig, model._internal_config or {}) + chat_template_kwargs = internal_config.get("chat_template_kwargs") if verbose: print(f"Using instruction_part: {instruction_part!r}") print(f"Using response_part: {response_part!r}") max_seq_length = ( - (model._internal_config or dev.InternalModelConfig()) - .get("init_args", {}) - .get("max_seq_length", 32_768) + internal_config.get("init_args", {}).get("max_seq_length", 32_768) ) max_seq_length = int(max_seq_length) if max_seq_length is not None else None diff --git a/src/art/preprocessing/tokenize.py b/src/art/preprocessing/tokenize.py index 9e1942813..8e0c25ae5 100644 --- a/src/art/preprocessing/tokenize.py +++ b/src/art/preprocessing/tokenize.py @@ -281,6 +281,7 @@ def tokenize_trajectory( # selection. Use a closed final message here because some chat templates # normalize generated blocks, which can make Transformers reject # continue_final_message before we reach the trainable sentinel render below. + template_kwargs = chat_template_kwargs or {} chat = cast( str, tokenizer.apply_chat_template( @@ -288,18 +289,16 @@ def tokenize_trajectory( tools=tools, continue_final_message=False, tokenize=False, - **(chat_template_kwargs or {}), + **template_kwargs, ), ) - original_token_ids = cast( - list[int], - tokenizer.apply_chat_template( - messages, - tools=tools, - continue_final_message=False, - return_dict=False, - **(chat_template_kwargs or {}), - ), + original_token_ids = _apply_chat_template_token_ids( + tokenizer, + messages, + tools=tools, + continue_final_message=False, + return_dict=False, + **template_kwargs, ) sentinel_token_id = max(set(range(tokenizer.vocab_size)) - set(original_token_ids)) sentinel_token = tokenizer.decode(sentinel_token_id) @@ -326,15 +325,13 @@ def tokenize_trajectory( ) else: token_template_messages.append(cast(dict[str, Any], message)) - token_ids = cast( - list[int], - tokenizer.apply_chat_template( - token_template_messages, - tools=tools, - continue_final_message=True, - return_dict=False, - **(chat_template_kwargs or {}), - ), + token_ids = _apply_chat_template_token_ids( + tokenizer, + token_template_messages, + tools=tools, + continue_final_message=True, + return_dict=False, + **template_kwargs, ) assistant_mask: list[int] = [0] * len(token_ids) logprobs = [float("nan")] * len(token_ids) From f4d3d048dc881ac95599f42e492fe7c89eb3e4d5 Mon Sep 17 00:00:00 2001 From: Kovbo Date: Thu, 30 Apr 2026 22:28:21 +0000 Subject: [PATCH 08/13] format --- dev/yes-no-maybe-megatron.py | 4 +++- src/art/local/backend.py | 8 ++++---- src/art/megatron/service.py | 4 +--- src/art/vllm/dedicated_server.py | 18 ++++++++++++------ 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/dev/yes-no-maybe-megatron.py b/dev/yes-no-maybe-megatron.py index 2e09210d8..ad9e470ad 100644 --- a/dev/yes-no-maybe-megatron.py +++ b/dev/yes-no-maybe-megatron.py @@ -57,7 +57,9 @@ async def main(): backend = MegatronBackend() base_model = os.environ.get("BASE_MODEL", "Qwen/Qwen3.6-35B-A3B") model = art.TrainableModel( - name=os.environ.get("MODEL_NAME", f"yes-no-maybe-megatron-{uuid.uuid4().hex[:8]}"), + name=os.environ.get( + "MODEL_NAME", f"yes-no-maybe-megatron-{uuid.uuid4().hex[:8]}" + ), project="yes-no-maybe-megatron", base_model=base_model, _internal_config=art.dev.InternalModelConfig( diff --git a/src/art/local/backend.py b/src/art/local/backend.py index 823c3aead..e1a6e8b28 100644 --- a/src/art/local/backend.py +++ b/src/art/local/backend.py @@ -356,8 +356,8 @@ def _get_packed_tensors( ) if not tokenized_results: return None - model_max_sequence_length = ( - internal_config.get("init_args", {}).get("max_seq_length", 32_768) + model_max_sequence_length = internal_config.get("init_args", {}).get( + "max_seq_length", 32_768 ) if packed_sequence_length is None: assert not self._requires_explicit_packed_sequence_length, ( @@ -965,8 +965,8 @@ async def _train_sft( print(f"Using instruction_part: {instruction_part!r}") print(f"Using response_part: {response_part!r}") - max_seq_length = ( - internal_config.get("init_args", {}).get("max_seq_length", 32_768) + max_seq_length = internal_config.get("init_args", {}).get( + "max_seq_length", 32_768 ) max_seq_length = int(max_seq_length) if max_seq_length is not None else None diff --git a/src/art/megatron/service.py b/src/art/megatron/service.py index 8544a8831..a114ebdc4 100644 --- a/src/art/megatron/service.py +++ b/src/art/megatron/service.py @@ -532,9 +532,7 @@ async def _ensure_megatron_running(self) -> None: if random_state is not None: launch_env["ART_MEGATRON_RANDOM_STATE"] = str(random_state) peft_args = self.config.get("peft_args", {}) - launch_env["ART_MEGATRON_LORA_RANK"] = str( - int(peft_args.get("r", LORA_RANK)) - ) + launch_env["ART_MEGATRON_LORA_RANK"] = str(int(peft_args.get("r", LORA_RANK))) launch_env["ART_MEGATRON_LORA_ALPHA"] = str( int(peft_args.get("lora_alpha", LORA_ALPHA)) ) diff --git a/src/art/vllm/dedicated_server.py b/src/art/vllm/dedicated_server.py index 12fe1257a..20479cb24 100644 --- a/src/art/vllm/dedicated_server.py +++ b/src/art/vllm/dedicated_server.py @@ -9,8 +9,11 @@ import argparse import asyncio +from collections.abc import Callable +import inspect import json import os +from typing import Any, cast def parse_args(argv: list[str] | None = None) -> argparse.Namespace: @@ -46,17 +49,20 @@ def _patch_art_dedicated_routes() -> None: if getattr(api_server, "_art_dedicated_routes_patched", False): return - original_build_app = api_server.build_app + original_build_app = cast("Callable[..., FastAPI]", api_server.build_app) + original_build_app_supports_model_config = ( + len(inspect.signature(api_server.build_app).parameters) >= 3 + ) def art_build_app( args: argparse.Namespace, - supported_tasks: object | None = None, - model_config: object | None = None, + supported_tasks: Any = None, + model_config: Any = None, ) -> FastAPI: - if model_config is None: - app = original_build_app(args, supported_tasks) - else: + if model_config is not None and original_build_app_supports_model_config: app = original_build_app(args, supported_tasks, model_config) + else: + app = original_build_app(args, supported_tasks) router = APIRouter() @router.post("/art/set_served_model_name") From 4b5f225f46a3e4ecc4c44d0e2fd4520124bdb9a7 Mon Sep 17 00:00:00 2001 From: Kovbo Date: Thu, 30 Apr 2026 23:59:59 +0000 Subject: [PATCH 09/13] rename var --- dev/run_qwen3_5_megatron_yes_no_maybe.py | 164 ----------------------- src/art/dev/get_model_config.py | 4 +- src/art/dev/validate.py | 4 +- src/art/megatron/train.py | 8 +- tests/unit/test_megatron_dedicated.py | 6 +- 5 files changed, 11 insertions(+), 175 deletions(-) delete mode 100644 dev/run_qwen3_5_megatron_yes_no_maybe.py diff --git a/dev/run_qwen3_5_megatron_yes_no_maybe.py b/dev/run_qwen3_5_megatron_yes_no_maybe.py deleted file mode 100644 index 056865142..000000000 --- a/dev/run_qwen3_5_megatron_yes_no_maybe.py +++ /dev/null @@ -1,164 +0,0 @@ -"""Launch a multi-step Qwen3.5 Megatron yes-no-maybe run on SkyPilot.""" - -import argparse -import os -import textwrap - -from dotenv import load_dotenv -import sky -from sky import ClusterStatus - -load_dotenv() - -DEFAULT_IMAGE_ID = "docker:nvidia/cuda:12.8.1-devel-ubuntu22.04" - - -def _format_env_bool(value: bool) -> str: - return "true" if value else "false" - - -def _format_int_list(values: list[int]) -> str: - return ",".join(str(value) for value in values) - - -parser = argparse.ArgumentParser( - description="Launch a Qwen3.5 Megatron yes-no-maybe convergence run." -) -parser.add_argument("--fast", action="store_true") -parser.add_argument("--base-model", type=str, default="Qwen/Qwen3.5-35B-A3B") -parser.add_argument("--accelerator", type=str, default="H200:2") -parser.add_argument( - "--cluster-name", type=str, default="art-qwen35-megatron-yes-no-maybe" -) -parser.add_argument("--image-id", type=str, default=DEFAULT_IMAGE_ID) -parser.add_argument("--project", type=str, default="qwen35-megatron-ynm") -parser.add_argument("--gpu-memory-utilization", type=float, default=0.65) -parser.add_argument("--max-model-len", type=int, default=1024) -parser.add_argument("--max-seq-length", type=int, default=1024) -parser.add_argument("--packed-sequence-length", type=int, default=None) -parser.add_argument("--max-num-seqs", type=int, default=8) -parser.add_argument("--num-steps", type=int, default=10) -parser.add_argument("--rollouts-per-prompt", type=int, default=8) -parser.add_argument("--eval-prompts", type=int, default=24) -parser.add_argument("--max-tokens", type=int, default=5) -parser.add_argument("--timeout", type=float, default=600.0) -parser.add_argument("--learning-rate", type=float, default=5e-5) -parser.add_argument( - "--load-in-4bit", action=argparse.BooleanOptionalAction, default=False -) -parser.add_argument( - "--load-in-16bit", action=argparse.BooleanOptionalAction, default=True -) -parser.add_argument("--trainer-gpu-ids", type=int, nargs="+", default=[0]) -parser.add_argument("--inference-gpu-ids", type=int, nargs="+", default=[1]) -args = parser.parse_args() - -cluster_name = args.cluster_name -cluster_prefix = os.environ.get("CLUSTER_PREFIX") -if cluster_prefix: - cluster_name = f"{cluster_prefix}-{cluster_name}" - -setup_script = textwrap.dedent("""\ - echo 'Setting up environment...' - apt-get update - apt-get install -y python3 python3-pip python-is-python3 git curl - curl -LsSf https://astral.sh/uv/install.sh | sh - source $HOME/.local/bin/env -""") - -env = [ - f"PROJECT={args.project}", - "MODEL_NAME=qwen35-megatron-ynm-$(date +%Y%m%d-%H%M%S)", - f"BASE_MODEL={args.base_model}", - f"GPU_MEMORY_UTILIZATION={args.gpu_memory_utilization}", - f"MAX_MODEL_LEN={args.max_model_len}", - f"MAX_SEQ_LENGTH={args.max_seq_length}", - "PACKED_SEQUENCE_LENGTH=" - + str( - args.packed_sequence_length - if args.packed_sequence_length is not None - else args.max_seq_length - ), - f"MAX_NUM_SEQS={args.max_num_seqs}", - f"LOAD_IN_4BIT={_format_env_bool(args.load_in_4bit)}", - f"LOAD_IN_16BIT={_format_env_bool(args.load_in_16bit)}", - f"NUM_STEPS={args.num_steps}", - f"ROLLOUTS_PER_PROMPT={args.rollouts_per_prompt}", - f"EVAL_PROMPTS={args.eval_prompts}", - f"MAX_TOKENS={args.max_tokens}", - f"TIMEOUT={args.timeout}", - f"LEARNING_RATE={args.learning_rate}", - f"TRAINER_GPU_IDS={_format_int_list(args.trainer_gpu_ids)}", - f"INFERENCE_GPU_IDS={_format_int_list(args.inference_gpu_ids)}", -] -env_block = " \\\n ".join(env) - -run_script = textwrap.dedent( - f"""\ - source $HOME/.local/bin/env - cd ~/sky_workdir - bash src/art/megatron/setup.sh - {env_block} \\ - ~/.local/bin/uv run dev/yes-no-maybe-megatron.py -""" -) - -task = sky.Task( - name="qwen3.5-megatron-yes-no-maybe", - setup=setup_script, - run=run_script, - workdir=".", -) -task.set_resources( - sky.Resources( - accelerators=args.accelerator, - cloud=sky.clouds.Kubernetes(), - image_id=args.image_id, - ) -) -if os.path.exists(".env"): - task.set_file_mounts({"~/sky_workdir/.env": ".env"}) - -print(f"Launching on cluster: {cluster_name}") -print(f" base_model: {args.base_model}") -print(f" project: {args.project}") -print(f" accelerator: {args.accelerator}") -print(f" image_id: {args.image_id}") -print(f" gpu_memory_utilization: {args.gpu_memory_utilization}") -print(f" max_model_len: {args.max_model_len}") -print(f" max_seq_length: {args.max_seq_length}") -print( - " packed_sequence_length: " - f"{args.packed_sequence_length if args.packed_sequence_length is not None else args.max_seq_length}" -) -print(f" max_num_seqs: {args.max_num_seqs}") -print(f" num_steps: {args.num_steps}") -print(f" rollouts_per_prompt: {args.rollouts_per_prompt}") -print(f" eval_prompts: {args.eval_prompts}") -print(f" max_tokens: {args.max_tokens}") -print(f" timeout: {args.timeout}") -print(f" learning_rate: {args.learning_rate}") -print(f" load_in_4bit: {args.load_in_4bit}") -print(f" load_in_16bit: {args.load_in_16bit}") -print(f" trainer_gpu_ids: {args.trainer_gpu_ids}") -print(f" inference_gpu_ids: {args.inference_gpu_ids}") - -cluster_status = sky.stream_and_get(sky.status(cluster_names=[cluster_name])) -if cluster_status and cluster_status[0]["status"] == ClusterStatus.UP: - print(f"Cluster {cluster_name} is UP. Canceling any active jobs...") - sky.stream_and_get(sky.cancel(cluster_name, all=True)) - -job_id, _ = sky.stream_and_get( - sky.launch( - task, - cluster_name=cluster_name, - retry_until_up=True, - idle_minutes_to_autostop=60, - down=True, - fast=args.fast, - ) -) - -print(f"Job submitted (ID: {job_id}). Streaming logs...") -exit_code = sky.tail_logs(cluster_name=cluster_name, job_id=job_id, follow=True) -print(f"Job {job_id} finished with exit code {exit_code}.") diff --git a/src/art/dev/get_model_config.py b/src/art/dev/get_model_config.py index 0e5b20618..e4301087f 100644 --- a/src/art/dev/get_model_config.py +++ b/src/art/dev/get_model_config.py @@ -1,10 +1,10 @@ from .engine import EngineArgs from .model import InitArgs, InternalModelConfig, PeftArgs, TrainerArgs -from .validate import QWEN3_5_DELTANET_MODELS, is_dedicated_mode +from .validate import QWEN_DELTANET_MODELS, is_dedicated_mode def default_target_modules(base_model: str) -> list[str]: - if base_model in QWEN3_5_DELTANET_MODELS: + if base_model in QWEN_DELTANET_MODELS: return [ "q_proj", "k_proj", diff --git a/src/art/dev/validate.py b/src/art/dev/validate.py index a00e14fbf..8bdcb2e82 100644 --- a/src/art/dev/validate.py +++ b/src/art/dev/validate.py @@ -2,10 +2,10 @@ from .model import InternalModelConfig, RolloutWeightsMode -# Dense and MoE Qwen3.5-family models with Gated DeltaNet linear-attention +# Dense and MoE Qwen 3.5/3.6 models with Gated DeltaNet linear-attention # layers. These need LoRA coverage for linear_attn projections, and Megatron # compile should stay disabled until the DeltaNet torch.compile failure is fixed. -QWEN3_5_DELTANET_MODELS = { +QWEN_DELTANET_MODELS = { "Qwen/Qwen3.5-4B", "Qwen/Qwen3.5-27B", "Qwen/Qwen3.5-35B-A3B", diff --git a/src/art/megatron/train.py b/src/art/megatron/train.py index 092f791b9..61f23c7d0 100644 --- a/src/art/megatron/train.py +++ b/src/art/megatron/train.py @@ -37,7 +37,7 @@ from torch.distributed import all_reduce from art import dev, types -from art.dev.validate import QWEN3_5_DELTANET_MODELS +from art.dev.validate import QWEN_DELTANET_MODELS from art.loss import loss_fn, shift_tensor from art.megatron.bridge_adapter_compat import build_adapter_weights_by_base from art.megatron.compile_workarounds import install_torch_compile_workarounds @@ -204,7 +204,7 @@ def _compile_enabled(model_identifier: str) -> bool: disabled = _env_flag("ART_DISABLE_MEGATRON_COMPILE") if disabled is not None: return disabled is not True - return model_identifier not in QWEN3_5_DELTANET_MODELS + return model_identifier not in QWEN_DELTANET_MODELS def _install_gpt_preprocess_hook(model_chunks: ModelChunks) -> None: @@ -389,12 +389,12 @@ def build_training_runtime( elif ( rank == 0 and _env_flag("ART_DISABLE_MEGATRON_COMPILE") is None - and resolved_model_identifier in QWEN3_5_DELTANET_MODELS + and resolved_model_identifier in QWEN_DELTANET_MODELS ): print( "Disabling torch.compile for", resolved_model_identifier, - "because Qwen3.5-family Gated DeltaNet currently fails under torch.compile.", + "because Qwen Gated DeltaNet currently fails under torch.compile.", ) optimizer_config = optimizer_config or _default_optimizer_config() diff --git a/tests/unit/test_megatron_dedicated.py b/tests/unit/test_megatron_dedicated.py index ae2016ab7..eb4d95e40 100644 --- a/tests/unit/test_megatron_dedicated.py +++ b/tests/unit/test_megatron_dedicated.py @@ -15,7 +15,7 @@ from art import TrainableModel, types from art.dev.model import InternalModelConfig -from art.dev.validate import QWEN3_5_DELTANET_MODELS +from art.dev.validate import QWEN_DELTANET_MODELS from art.megatron.backend import MegatronBackend from art.megatron.jobs import ( MegatronMergedTrainJob, @@ -152,10 +152,10 @@ def test_unwrap_art_wrapper_name_strips_compiled_wrapper_segments() -> None: ) -def test_compile_enabled_disables_qwen35_deltanet_by_default() -> None: +def test_compile_enabled_disables_qwen_deltanet_by_default() -> None: assert _compile_enabled("Qwen/Qwen3-30B-A3B-Instruct-2507") is True assert _compile_enabled("Qwen/Qwen3.5-32B-Instruct") is True - for model_identifier in QWEN3_5_DELTANET_MODELS: + for model_identifier in QWEN_DELTANET_MODELS: assert _compile_enabled(model_identifier) is False From 1af1e5731ce739697a715851072c7e2678cfc171 Mon Sep 17 00:00:00 2001 From: Kovbo Date: Fri, 1 May 2026 20:08:54 +0000 Subject: [PATCH 10/13] support megatron LoRA --- dev/yes-no-maybe-megatron.py | 61 ++++++++++---------- src/art/megatron/merge.py | 14 +++++ src/art/megatron/service.py | 50 +++++++++++++--- src/art/megatron/train.py | 3 + src/art/utils/convert_megatron_moe_lora.py | 26 +++++++++ tests/unit/test_convert_megatron_moe_lora.py | 20 +++++++ tests/unit/test_megatron_dedicated.py | 12 ++-- 7 files changed, 145 insertions(+), 41 deletions(-) diff --git a/dev/yes-no-maybe-megatron.py b/dev/yes-no-maybe-megatron.py index ad9e470ad..280147a36 100644 --- a/dev/yes-no-maybe-megatron.py +++ b/dev/yes-no-maybe-megatron.py @@ -1,11 +1,9 @@ """Yes-no-maybe training demo for the Megatron backend. -By default this runs Qwen 3.6 in dedicated merged mode, which needs two GPUs: -GPU 0 runs Megatron training and GPU 1 runs the dedicated vLLM inference server. -After each train step, Megatron keeps the LoRA adapter checkpoint and pushes merged -weights into vLLM for rollouts, because direct vLLM LoRA serving does not yet -reflect these target-parameter MoE adapters reliably. Override TRAINER_GPU_IDS, -INFERENCE_GPU_IDS, or ROLLOUT_WEIGHTS_MODE if you need a different layout. +By default this runs Qwen 3.6 in standard direct LoRA mode on one GPU. Set +MEGATRON_DEDICATED=1 to use a dedicated vLLM inference server on a separate GPU; +then override TRAINER_GPU_IDS and INFERENCE_GPU_IDS if you need a different +layout. """ import asyncio @@ -55,36 +53,39 @@ async def main(): load_dotenv() backend = MegatronBackend() - base_model = os.environ.get("BASE_MODEL", "Qwen/Qwen3.6-35B-A3B") + base_model = os.environ.get("BASE_MODEL", "Qwen/Qwen3.6-27B") + use_dedicated = os.environ.get("MEGATRON_DEDICATED", "0") == "1" + rollout_weights_mode = os.environ.get("ROLLOUT_WEIGHTS_MODE", "lora") + internal_config = art.dev.InternalModelConfig( + engine_args=art.dev.EngineArgs( + gpu_memory_utilization=float(os.environ.get("GPU_MEMORY_UTILIZATION", "0.8")), + max_model_len=int(os.environ.get("MAX_MODEL_LEN", "4096")), + max_num_seqs=int(os.environ.get("MAX_NUM_SEQS", "8")), + tensor_parallel_size=int(os.environ.get("TENSOR_PARALLEL_SIZE", "1")), + ), + rollout_weights_mode=rollout_weights_mode, + chat_template_kwargs={ + "enable_thinking": False, + "preserve_thinking": True, + }, + ) + if use_dedicated: + internal_config["trainer_gpu_ids"] = [ + int(gpu_id) + for gpu_id in os.environ.get("TRAINER_GPU_IDS", "0").split(",") + ] + internal_config["inference_gpu_ids"] = [ + int(gpu_id) + for gpu_id in os.environ.get("INFERENCE_GPU_IDS", "1").split(",") + ] + model = art.TrainableModel( name=os.environ.get( "MODEL_NAME", f"yes-no-maybe-megatron-{uuid.uuid4().hex[:8]}" ), project="yes-no-maybe-megatron", base_model=base_model, - _internal_config=art.dev.InternalModelConfig( - engine_args=art.dev.EngineArgs( - gpu_memory_utilization=float( - os.environ.get("GPU_MEMORY_UTILIZATION", "0.8") - ), - max_model_len=int(os.environ.get("MAX_MODEL_LEN", "4096")), - max_num_seqs=int(os.environ.get("MAX_NUM_SEQS", "8")), - tensor_parallel_size=int(os.environ.get("TENSOR_PARALLEL_SIZE", "1")), - ), - trainer_gpu_ids=[ - int(gpu_id) - for gpu_id in os.environ.get("TRAINER_GPU_IDS", "0").split(",") - ], - inference_gpu_ids=[ - int(gpu_id) - for gpu_id in os.environ.get("INFERENCE_GPU_IDS", "1").split(",") - ], - rollout_weights_mode=os.environ.get("ROLLOUT_WEIGHTS_MODE", "merged"), - chat_template_kwargs={ - "enable_thinking": False, - "preserve_thinking": True, - }, - ), + _internal_config=internal_config, ) try: diff --git a/src/art/megatron/merge.py b/src/art/megatron/merge.py index 147f14bad..c6c427c2b 100644 --- a/src/art/megatron/merge.py +++ b/src/art/megatron/merge.py @@ -6,7 +6,9 @@ import torch from art.utils.convert_megatron_moe_lora import ( + add_language_model_prefix_for_vllm, convert_megatron_moe_lora_to_peft_target_parameter, + uses_qwen_language_model_prefix, ) safetensors = importlib.import_module("safetensors") @@ -15,6 +17,16 @@ save_file = safetensors_torch.save_file +def _uses_qwen_language_model_prefix(lora_path: Path) -> bool: + config_path = lora_path / "adapter_config.json" + if not config_path.exists(): + return False + with open(config_path, "r", encoding="utf-8") as config_file: + adapter_config = json.load(config_file) + base_model = adapter_config.get("base_model_name_or_path") + return isinstance(base_model, str) and uses_qwen_language_model_prefix(base_model) + + def _load_adapter_shards( base_dir: Path, ) -> tuple[ @@ -129,6 +141,8 @@ def merge_lora_adapter(lora_path: str) -> None: adapter_model_path = base_dir / "adapter_model.safetensors" adapter_model = convert_megatron_moe_lora_to_peft_target_parameter(adapter_model) + if _uses_qwen_language_model_prefix(base_dir): + adapter_model = add_language_model_prefix_for_vllm(adapter_model) save_file(adapter_model, adapter_model_path) for filename in shard_filenames: filename.unlink() diff --git a/src/art/megatron/service.py b/src/art/megatron/service.py index a114ebdc4..048c98e2c 100644 --- a/src/art/megatron/service.py +++ b/src/art/megatron/service.py @@ -27,6 +27,10 @@ from ..preprocessing.pack import DiskPackedTensors from ..preprocessing.tokenize import SFTBatch from ..unsloth.service import do_sleep, do_wake_up, gc_and_empty_cuda_cache +from ..utils.convert_megatron_moe_lora import ( + add_language_model_prefix_for_vllm, + uses_qwen_language_model_prefix, +) from ..utils.get_model_step import get_step_from_dir from ..utils.network import find_free_tcp_port from ..utils.output_dirs import get_step_checkpoint_dir @@ -45,6 +49,24 @@ safetensors = importlib.import_module("safetensors") safe_open = safetensors.safe_open +safetensors_torch = importlib.import_module("safetensors.torch") +save_file = safetensors_torch.save_file + + +def _rewrite_identity_lora_for_vllm(base_model: str, lora_path: str) -> None: + if not uses_qwen_language_model_prefix(base_model): + return + + adapter_model_path = os.path.join(lora_path, "adapter_model.safetensors") + if os.path.exists(adapter_model_path): + with safe_open(adapter_model_path, framework="pt") as adapter_file: + adapter_model = { + key: adapter_file.get_tensor(key) for key in adapter_file.keys() + } + save_file( + add_language_model_prefix_for_vllm(adapter_model), + adapter_model_path, + ) def create_identity_lora( @@ -87,12 +109,16 @@ def create_identity_lora( ) model.name_or_path = base_model - lora_config = LoraConfig( - base_model_name_or_path=base_model, - r=rank, - lora_alpha=lora_alpha, - target_modules=[], - target_parameters=[ + if uses_qwen_language_model_prefix(base_model): + target_modules = default_target_modules(base_model) + target_parameters = [ + "mlp.experts.gate_up_proj", + "mlp.experts.down_proj", + ] + task_type = "CAUSAL_LM" + else: + target_modules = [] + target_parameters = [ name for name, _ in model.named_parameters() if name.endswith( @@ -111,7 +137,16 @@ def create_identity_lora( "mlp.shared_expert.down_proj.weight", ) ) - ], + ] + task_type = None + + lora_config = LoraConfig( + base_model_name_or_path=base_model, + r=rank, + lora_alpha=lora_alpha, + target_modules=target_modules, + target_parameters=target_parameters, + task_type=task_type, bias="none", ) @@ -131,6 +166,7 @@ def _skip_meta_to( os.makedirs(lora_path, exist_ok=True) peft_model.save_pretrained(lora_path) + _rewrite_identity_lora_for_vllm(base_model, lora_path) del peft_model, model if torch.cuda.is_available(): diff --git a/src/art/megatron/train.py b/src/art/megatron/train.py index 61f23c7d0..285cd2912 100644 --- a/src/art/megatron/train.py +++ b/src/art/megatron/train.py @@ -82,6 +82,7 @@ ) from art.utils.convert_megatron_moe_lora import ( convert_peft_target_parameter_moe_lora_to_megatron, + strip_language_model_prefix_for_megatron, ) safetensors = importlib.import_module("safetensors") @@ -815,6 +816,7 @@ def _load_lora_rank(lora_path: str) -> int: def _load_megatron_adapter_state_dict(lora_path: str) -> dict[str, torch.Tensor]: adapter_model = load_lora_adapter_state_dict(lora_path) + adapter_model = strip_language_model_prefix_for_megatron(adapter_model) return convert_peft_target_parameter_moe_lora_to_megatron( adapter_model, rank=_load_lora_rank(lora_path), @@ -958,6 +960,7 @@ def maybe_load_adapter_into_model( adapter_model = { key: adapter_file.get_tensor(key) for key in adapter_file.keys() } + adapter_model = strip_language_model_prefix_for_megatron(adapter_model) adapter_model = convert_peft_target_parameter_moe_lora_to_megatron( adapter_model, rank=_load_lora_rank(lora_path), diff --git a/src/art/utils/convert_megatron_moe_lora.py b/src/art/utils/convert_megatron_moe_lora.py index a515655b9..bc1148f87 100644 --- a/src/art/utils/convert_megatron_moe_lora.py +++ b/src/art/utils/convert_megatron_moe_lora.py @@ -33,6 +33,32 @@ r"(?Pgate_proj|up_proj|down_proj)\." r"(?Plora_[AB])\.weight$" ) +_TEXT_LAYER_PREFIX = "base_model.model.model.layers." +_LANGUAGE_MODEL_LAYER_PREFIX = "base_model.model.model.language_model.layers." + + +def uses_qwen_language_model_prefix(base_model: str) -> bool: + return base_model.startswith(("Qwen/Qwen3.5", "Qwen/Qwen3.6")) + + +def add_language_model_prefix_for_vllm( + tensors: dict[str, torch.Tensor], +) -> dict[str, torch.Tensor]: + """Rewrite Megatron text-model LoRA keys to vLLM's Qwen3.5/3.6 wrapper path.""" + return { + key.replace(_TEXT_LAYER_PREFIX, _LANGUAGE_MODEL_LAYER_PREFIX, 1): tensor + for key, tensor in tensors.items() + } + + +def strip_language_model_prefix_for_megatron( + tensors: dict[str, torch.Tensor], +) -> dict[str, torch.Tensor]: + """Rewrite vLLM/HF Qwen3.5/3.6 wrapper LoRA keys to Megatron text-model keys.""" + return { + key.replace(_LANGUAGE_MODEL_LAYER_PREFIX, _TEXT_LAYER_PREFIX, 1): tensor + for key, tensor in tensors.items() + } def _reshape_expert_a( diff --git a/tests/unit/test_convert_megatron_moe_lora.py b/tests/unit/test_convert_megatron_moe_lora.py index 50cbc6a60..00435343f 100644 --- a/tests/unit/test_convert_megatron_moe_lora.py +++ b/tests/unit/test_convert_megatron_moe_lora.py @@ -1,8 +1,10 @@ import torch from art.utils.convert_megatron_moe_lora import ( + add_language_model_prefix_for_vllm, convert_megatron_moe_lora_to_peft_target_parameter, convert_peft_target_parameter_moe_lora_to_megatron, + strip_language_model_prefix_for_megatron, ) @@ -72,3 +74,21 @@ def test_convert_peft_target_parameter_moe_lora_uses_rank_major_b_layout() -> No megatron[f"{prefix}.1.down_proj.lora_B.weight"], down_b.reshape(3, 2, 2).permute(2, 0, 1)[1], ) + + +def test_qwen_language_model_prefix_rewrites_round_trip() -> None: + tensors = { + "base_model.model.model.layers.0.self_attn.q_proj.lora_A.weight": torch.ones( + 1 + ), + "base_model.model.lm_head.weight": torch.ones(1), + } + + vllm_tensors = add_language_model_prefix_for_vllm(tensors) + + assert ( + "base_model.model.model.language_model.layers.0.self_attn.q_proj.lora_A.weight" + in vllm_tensors + ) + assert "base_model.model.lm_head.weight" in vllm_tensors + assert strip_language_model_prefix_for_megatron(vllm_tensors) == tensors diff --git a/tests/unit/test_megatron_dedicated.py b/tests/unit/test_megatron_dedicated.py index eb4d95e40..14e92b0a1 100644 --- a/tests/unit/test_megatron_dedicated.py +++ b/tests/unit/test_megatron_dedicated.py @@ -14,6 +14,7 @@ pytest.importorskip("vllm") from art import TrainableModel, types +from art.dev.get_model_config import default_target_modules from art.dev.model import InternalModelConfig from art.dev.validate import QWEN_DELTANET_MODELS from art.megatron.backend import MegatronBackend @@ -218,10 +219,13 @@ def fake_model_from_config(config: Any, **_kwargs: Any) -> FakeModel: create_identity_lora("Qwen/Qwen3.5-35B-A3B", str(tmp_path)) assert seen["config"] is top_level_config.text_config - assert ( - "model.layers.0.linear_attn.in_proj_qkv.weight" - in seen["lora_config"].target_parameters - ) + assert seen["lora_config"].target_modules == set(default_target_modules( + "Qwen/Qwen3.5-35B-A3B" + )) + assert seen["lora_config"].target_parameters == [ + "mlp.experts.gate_up_proj", + "mlp.experts.down_proj", + ] @pytest.mark.asyncio From 4571a73b293766a8e97accc70093c3d211900c26 Mon Sep 17 00:00:00 2001 From: Kovbo Date: Fri, 1 May 2026 22:48:49 +0000 Subject: [PATCH 11/13] add dense support --- src/art/megatron/provider.py | 45 ++++++++++++------ src/art/megatron/service.py | 19 ++++++-- .../test_megatron_provider_support.py | 46 +++++++++++++++---- tests/unit/test_megatron_dedicated.py | 43 ++++++++++++++++- 4 files changed, 126 insertions(+), 27 deletions(-) diff --git a/src/art/megatron/provider.py b/src/art/megatron/provider.py index 4b23f02c6..b99ad0d5a 100644 --- a/src/art/megatron/provider.py +++ b/src/art/megatron/provider.py @@ -20,8 +20,12 @@ Qwen3VLSelfAttention, ) from megatron.bridge.models.qwen_vl.modelling_qwen3_vl.model import Qwen3VLModel -from megatron.bridge.models.qwen_vl.qwen35_vl_bridge import Qwen35VLMoEBridge +from megatron.bridge.models.qwen_vl.qwen35_vl_bridge import ( + Qwen35VLBridge, + Qwen35VLMoEBridge, +) from megatron.bridge.models.qwen_vl.qwen35_vl_provider import ( + Qwen35VLModelProvider, Qwen35VLMoEModelProvider, _patch_standard_attention_specs, ) @@ -185,12 +189,18 @@ def _resolve_default_deepep_num_sms(provider: GPTModelProvider) -> int: return sm_count if sm_count >= 2 else 20 +def _is_moe_provider(provider: GPTModelProvider) -> bool: + return int(getattr(provider, "num_moe_experts", 0) or 0) > 0 + + def _apply_default_parallel_topology(provider: GPTModelProvider) -> None: visible_gpu_count = max(torch.cuda.device_count(), 1) provider.tensor_model_parallel_size = visible_gpu_count provider.context_parallel_size = 1 provider.pipeline_model_parallel_size = 1 - provider.expert_model_parallel_size = visible_gpu_count + provider.expert_model_parallel_size = ( + visible_gpu_count if _is_moe_provider(provider) else 1 + ) provider.expert_tensor_parallel_size = 1 @@ -229,7 +239,10 @@ def _maybe_print_finalized_env_settings(provider: GPTModelProvider) -> None: "recompute_modules": provider.recompute_modules, "moe_shared_expert_overlap": provider.moe_shared_expert_overlap, "moe_flex_dispatcher_backend": ( - "deepep" if _tp_ep_parallel_domain_size(provider) > 1 else None + "deepep" + if _is_moe_provider(provider) + and _tp_ep_parallel_domain_size(provider) > 1 + else None ), "sequence_parallel": provider.sequence_parallel, }, @@ -347,8 +360,11 @@ def get_provider_bundle( dtype=torch_dtype, trust_remote_code=True, ) - assert isinstance(bridge._model_bridge, (Qwen3MoEBridge, Qwen35VLMoEBridge)), ( - "Only Qwen3 and Qwen3.5 MoE models are supported" + assert isinstance( + bridge._model_bridge, + (Qwen3MoEBridge, Qwen35VLBridge, Qwen35VLMoEBridge), + ), ( + "Only Qwen3 MoE and Qwen3.5/3.6 dense or MoE models are supported" ) if torch_dtype != torch.bfloat16: model_name_or_path = bridge.hf_pretrained.model_name_or_path @@ -360,7 +376,7 @@ def get_provider_bundle( ) ) provider = bridge.to_megatron_provider() - if isinstance(provider, Qwen35VLMoEModelProvider): + if isinstance(provider, (Qwen35VLModelProvider, Qwen35VLMoEModelProvider)): from megatron.bridge.models.gpt_provider import mtp_block_spec def _patch_qwen35_block_spec(block_spec: TransformerBlockSubmodules) -> None: @@ -384,7 +400,7 @@ def _qwen35_layer_spec( provider.transformer_layer_spec = _qwen35_layer_spec def _provide_qwen35_with_flex_attention( - self: Qwen35VLMoEModelProvider, + self: Qwen35VLModelProvider | Qwen35VLMoEModelProvider, pre_process: bool | None = None, post_process: bool | None = None, vp_stage: int | None = None, @@ -446,18 +462,19 @@ def _flex_attention_layer_spec( provider.recompute_granularity = "full" provider.recompute_method = "uniform" provider.recompute_num_layers = 1 - provider.moe_shared_expert_overlap = True + provider.moe_shared_expert_overlap = _is_moe_provider(provider) _apply_default_parallel_topology(provider) _apply_runtime_env_overrides(provider) - if _tp_ep_parallel_domain_size(provider) > 1: + if _is_moe_provider(provider) and _tp_ep_parallel_domain_size(provider) > 1: # use DeepEP for MoE expert comm. comm can be the same amount of time as actual MLP # compute, so these are very beneficial apply_flex_dispatcher_backend(provider, moe_flex_dispatcher_backend="deepep") - provider.moe_permute_fusion = True - provider.moe_router_dtype = "fp32" - # params are disabled anyways, but should know about this if we switch to full FT - # because DP 'dummy' microbatches will unintentionally have loss for this - provider.moe_aux_loss_coeff = 0.0 + if _is_moe_provider(provider): + provider.moe_permute_fusion = True + provider.moe_router_dtype = "fp32" + # params are disabled anyways, but should know about this if we switch to full FT + # because DP 'dummy' microbatches will unintentionally have loss for this + provider.moe_aux_loss_coeff = 0.0 # effectively just a flag modifying finalize_model_grads behavior for DPxCP provider.calculate_per_token_loss = True provider.sequence_parallel = provider.tensor_model_parallel_size > 1 diff --git a/src/art/megatron/service.py b/src/art/megatron/service.py index 048c98e2c..6cdc8c02f 100644 --- a/src/art/megatron/service.py +++ b/src/art/megatron/service.py @@ -69,6 +69,13 @@ def _rewrite_identity_lora_for_vllm(base_model: str, lora_path: str) -> None: ) +def _is_moe_model_config(config: Any) -> bool: + text_config = getattr(config, "text_config", None) + return int( + getattr(text_config, "num_experts", getattr(config, "num_experts", 0)) or 0 + ) > 0 + + def create_identity_lora( base_model: str, lora_path: str, @@ -111,10 +118,14 @@ def create_identity_lora( if uses_qwen_language_model_prefix(base_model): target_modules = default_target_modules(base_model) - target_parameters = [ - "mlp.experts.gate_up_proj", - "mlp.experts.down_proj", - ] + target_parameters = ( + [ + "mlp.experts.gate_up_proj", + "mlp.experts.down_proj", + ] + if _is_moe_model_config(base_config) + else [] + ) task_type = "CAUSAL_LM" else: target_modules = [] diff --git a/tests/integration/test_megatron_provider_support.py b/tests/integration/test_megatron_provider_support.py index e6030c902..4d167592e 100644 --- a/tests/integration/test_megatron_provider_support.py +++ b/tests/integration/test_megatron_provider_support.py @@ -10,7 +10,10 @@ pytest.importorskip("megatron.bridge.models.qwen_vl.qwen35_vl_bridge") from megatron.bridge.models.qwen.qwen3_moe_bridge import Qwen3MoEBridge -from megatron.bridge.models.qwen_vl.qwen35_vl_bridge import Qwen35VLMoEBridge +from megatron.bridge.models.qwen_vl.qwen35_vl_bridge import ( + Qwen35VLBridge, + Qwen35VLMoEBridge, +) from megatron.core.transformer.enums import AttnBackend from art.megatron.flex_attention import FlexDotProductAttention @@ -21,6 +24,14 @@ class _FakeProvider: def __init__(self) -> None: self.transformer_layer_spec = self._base_layer_spec self.finalized = False + self.overlap_moe_expert_parallel_comm = False + self.delay_wgrad_compute = False + self.ep_overlap_early_attn_memory_release = False + self.moe_apply_probs_on_input = False + self.bias_activation_fusion = True + self.fine_grained_activation_offloading = False + self.offload_modules = [] + self.recompute_modules = None def _base_layer_spec( self, config: object, vp_stage: int | None = None @@ -67,12 +78,28 @@ def to_megatron_provider(self) -> _FakeProvider: return self._provider -@pytest.mark.parametrize("bridge_type", [Qwen3MoEBridge, Qwen35VLMoEBridge]) -def test_get_provider_accepts_supported_qwen_moe_bridges( +@pytest.mark.parametrize( + ( + "bridge_type", + "num_moe_experts", + "expected_expert_model_parallel_size", + "expected_moe_shared_expert_overlap", + ), + [ + (Qwen3MoEBridge, 8, 2, False), + (Qwen35VLBridge, 0, 1, False), + (Qwen35VLMoEBridge, 8, 2, False), + ], +) +def test_get_provider_accepts_supported_qwen_bridges( monkeypatch: pytest.MonkeyPatch, bridge_type: type[object], + num_moe_experts: int, + expected_expert_model_parallel_size: int, + expected_moe_shared_expert_overlap: bool, ) -> None: provider = _FakeProvider() + provider.num_moe_experts = num_moe_experts fake_bridge = _FakeBridge( model_bridge=object.__new__(bridge_type), provider=provider, @@ -95,12 +122,15 @@ def test_get_provider_accepts_supported_qwen_moe_bridges( assert resolved.tensor_model_parallel_size == 2 assert resolved.context_parallel_size == 1 assert resolved.pipeline_model_parallel_size == 1 - assert resolved.expert_model_parallel_size == 2 + assert ( + resolved.expert_model_parallel_size == expected_expert_model_parallel_size + ) assert resolved.expert_tensor_parallel_size == 1 assert resolved.sequence_parallel is True - assert resolved.moe_shared_expert_overlap is True - assert resolved.moe_router_dtype == "fp32" - assert resolved.moe_aux_loss_coeff == 0.0 + assert resolved.moe_shared_expert_overlap is expected_moe_shared_expert_overlap + if num_moe_experts: + assert resolved.moe_router_dtype == "fp32" + assert resolved.moe_aux_loss_coeff == 0.0 assert resolved.calculate_per_token_loss is True layer_spec = provider_module._resolve_layer_spec( @@ -127,7 +157,7 @@ def test_get_provider_rejects_unsupported_bridge( with pytest.raises( AssertionError, - match="Only Qwen3 and Qwen3.5 MoE models are supported", + match="Only Qwen3 MoE and Qwen3.5/3.6 dense or MoE models are supported", ): provider_module.get_provider("unsupported-model") diff --git a/tests/unit/test_megatron_dedicated.py b/tests/unit/test_megatron_dedicated.py index 14e92b0a1..24fd83b1c 100644 --- a/tests/unit/test_megatron_dedicated.py +++ b/tests/unit/test_megatron_dedicated.py @@ -174,7 +174,7 @@ def test_create_identity_lora_uses_nested_text_config_when_top_level_lacks_vocab monkeypatch: pytest.MonkeyPatch, ) -> None: top_level_config = SimpleNamespace( - text_config=SimpleNamespace(vocab_size=128), + text_config=SimpleNamespace(vocab_size=128, num_experts=256), ) seen: dict[str, Any] = {} @@ -228,6 +228,47 @@ def fake_model_from_config(config: Any, **_kwargs: Any) -> FakeModel: ] +def test_create_identity_lora_uses_dense_qwen_target_modules_only( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + top_level_config = SimpleNamespace( + text_config=SimpleNamespace(vocab_size=128), + ) + seen: dict[str, Any] = {} + + class FakeModel: + name_or_path = "" + + class FakePeftModel: + def save_pretrained(self, lora_path: str) -> None: + Path(lora_path).mkdir(parents=True, exist_ok=True) + + monkeypatch.setattr( + "transformers.AutoConfig.from_pretrained", + lambda *_args, **_kwargs: top_level_config, + ) + monkeypatch.setattr( + "transformers.AutoModelForCausalLM.from_config", + lambda config, **_kwargs: seen.setdefault("config", config) or FakeModel(), + ) + monkeypatch.setattr("accelerate.init_empty_weights", nullcontext) + monkeypatch.setattr( + "peft.get_peft_model", + lambda _model, lora_config, **_kwargs: ( + seen.setdefault("lora_config", lora_config) or FakePeftModel() + ), + ) + + create_identity_lora("Qwen/Qwen3.6-27B", str(tmp_path)) + + assert seen["config"] is top_level_config.text_config + assert seen["lora_config"].target_modules == set( + default_target_modules("Qwen/Qwen3.6-27B") + ) + assert seen["lora_config"].target_parameters == [] + + @pytest.mark.asyncio async def test_megatron_service_start_openai_server_dedicated_starts_subprocess( tmp_path: Path, From 473495e82ef6351c9ee92e98565c9e138e914224 Mon Sep 17 00:00:00 2001 From: Kovbo Date: Fri, 1 May 2026 23:49:00 +0000 Subject: [PATCH 12/13] fix dense qwen training --- src/art/megatron/lora.py | 12 ++++++++++-- .../test_megatron_qwen35_lora_wrapping.py | 2 ++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/art/megatron/lora.py b/src/art/megatron/lora.py index 5795a1951..fe43f398f 100644 --- a/src/art/megatron/lora.py +++ b/src/art/megatron/lora.py @@ -847,6 +847,9 @@ def __init__( ) -> None: super().__init__() self.linear_fc1 = linear_fc1 + if isinstance(linear_fc1, TELayerNormColumnParallelLinear): + linear_fc1.return_layernorm_output = True + linear_fc1.return_layernorm_output_gathered = True self.gate_lora = self._build_fc1_lora( adapter_model_prefix=f"{adapter_model_prefix}.gate_proj", linear_fc1=linear_fc1, @@ -897,9 +900,14 @@ def _build_fc1_lora( ) def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor | None]: - base_out, bias_out = self.linear_fc1(x) + base_output, bias_out = self.linear_fc1(x) + if isinstance(base_output, tuple): + base_out, lora_input = base_output + else: + base_out = base_output + lora_input = x adapter_out = torch.cat( - [self.gate_lora(x), self.up_lora(x)], + [self.gate_lora(lora_input), self.up_lora(lora_input)], dim=-1, ) adapter_out = _match_sequence_parallel_output_shape( diff --git a/tests/integration/test_megatron_qwen35_lora_wrapping.py b/tests/integration/test_megatron_qwen35_lora_wrapping.py index a85719976..143143320 100644 --- a/tests/integration/test_megatron_qwen35_lora_wrapping.py +++ b/tests/integration/test_megatron_qwen35_lora_wrapping.py @@ -250,6 +250,8 @@ def test_apply_lora_adapters_accepts_layernorm_column_fc1_dense_path() -> None: assert isinstance(target_layer.mlp.linear_fc1, SharedExpertsLinearFC1LoRA) assert isinstance(target_layer.mlp.linear_fc2, SharedExpertsLinearFC2LoRA) + assert dense_fc1.return_layernorm_output is True + assert dense_fc1.return_layernorm_output_gathered is True @pytest.mark.skipif( From 4337d81548886759af9c218009b67454e5f37c76 Mon Sep 17 00:00:00 2001 From: Bohdan Date: Mon, 11 May 2026 11:41:45 -0700 Subject: [PATCH 13/13] update dependencies --- pyproject.toml | 31 +- scripts/ci/build_and_push_uv_cache.sh | 2 +- scripts/ci/compute_uv_fingerprint.py | 2 +- skypilot-config.yaml | 5 +- uv.lock | 752 +++++++++++++++----------- 5 files changed, 456 insertions(+), 336 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 97436d5e1..b4c5f6463 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ backend = [ "bitsandbytes>=0.45.5", "unsloth[colab-no-deps,huggingface] @ git+https://github.com/Kovbo/unsloth.git@140026a39cfce9ee8702c46140d4c2d4846d1eb6", "unsloth-zoo @ git+https://github.com/Kovbo/unsloth-zoo.git@68677b9814e90ccb647cbda91062e0d4001798bd", - "torch==2.10.0", + "torch==2.11.0", "torchao==0.17.0", "accelerate==1.13.0", "awscli>=1.38.1", @@ -40,21 +40,21 @@ backend = [ "gql<5", "nvidia-cudnn-frontend<1.21 ; sys_platform == 'linux'", "nvidia-resiliency-ext<0.5 ; sys_platform == 'linux'", - "vllm==0.19.1 ; sys_platform == 'linux'", + "vllm==0.20.0 ; sys_platform == 'linux'", ] megatron = [ "numpy<2", - "torch==2.10.0", + "torch==2.11.0", "apex @ git+https://github.com/NVIDIA/apex.git@25.09", - "transformer-engine==2.11.0", - "transformer-engine-cu12==2.11.0", - "transformer-engine-torch @ git+https://github.com/NVIDIA/TransformerEngine.git@v2.11#subdirectory=transformer_engine/pytorch", + "transformer-engine==2.14.1", + "transformer-engine-cu13==2.14.1", + "transformer-engine-torch==2.14.1", "megatron-core==0.16.0rc0", "pybind11>=2.13.6", "megatron-bridge @ git+https://github.com/NVIDIA-NeMo/Megatron-Bridge.git@e049cc00c24d03e2ae45d2608c7a44e2d2364e3d", "deep_ep @ git+https://github.com/deepseek-ai/DeepEP.git@v1.2.1 ; sys_platform == 'linux'", - "causal-conv1d @ https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.6.1.post4/causal_conv1d-1.6.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_full_version < '3.12'", - "mamba-ssm @ https://github.com/state-spaces/mamba/releases/download/v2.3.1/mamba_ssm-2.3.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_full_version < '3.12'", + "causal-conv1d==1.6.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_full_version < '3.12'", + "mamba-ssm==2.3.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_full_version < '3.12'", "nvidia-ml-py==13.580.82", "nvidia-resiliency-ext<0.5 ; sys_platform == 'linux'", "ml-dtypes>=0.5.0 ; python_full_version < '3.13'", @@ -73,7 +73,7 @@ tinker = [ "pyarrow>=15.0.0", "pydantic>=2.12.5", "tinker>=0.8.1", - "torch==2.10.0", + "torch==2.11.0", "transformers==5.6.2", "uvicorn>=0.35.0", "datrie>=0.8.3", @@ -133,15 +133,16 @@ markers = [ required-version = ">=0.11.7" override-dependencies = [ "datasets==4.8.5", - "flashinfer-python==0.6.6", + "flashinfer-python==0.6.8.post1", "numpy<2", "nvidia-resiliency-ext<0.5", - "quack-kernels>=0.2.7", + "quack-kernels>=0.3.3", + "torch==2.11.0", "transformers==5.6.2", - "transformer-engine==2.11.0", + "transformer-engine==2.14.1", ] exclude-dependencies = ["pynvml", "emerging-optimizers"] -no-build-isolation-package = ["apex", "transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "deep-ep", "nv-grouped-gemm"] +no-build-isolation-package = ["apex", "transformer-engine", "transformer-engine-cu13", "transformer-engine-torch", "megatron-core", "megatron-bridge", "deep-ep", "nv-grouped-gemm"] [tool.uv.extra-build-dependencies] apex = ["torch>=2.8.0"] @@ -166,7 +167,7 @@ requires-dist = [] [[tool.uv.dependency-metadata]] name = "transformer-engine-torch" -version = "2.11.0" +version = "2.14.1" requires-dist = [ "einops", "onnx", @@ -174,7 +175,7 @@ requires-dist = [ "packaging", "pydantic", "torch", - "transformer-engine-cu12", + "transformer-engine-cu13", ] [tool.ty.environment] diff --git a/scripts/ci/build_and_push_uv_cache.sh b/scripts/ci/build_and_push_uv_cache.sh index f4db6bcb4..50dd557e6 100755 --- a/scripts/ci/build_and_push_uv_cache.sh +++ b/scripts/ci/build_and_push_uv_cache.sh @@ -4,7 +4,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -BASE_IMAGE="${BASE_IMAGE:-pytorch/pytorch:2.9.0-cuda12.8-cudnn9-devel}" +BASE_IMAGE="${BASE_IMAGE:-pytorch/pytorch:2.11.0-cuda13.0-cudnn9-devel}" PYTHON_MM="${PYTHON_MM:-3.11}" UV_CACHE_RELEASE_TAG="${UV_CACHE_RELEASE_TAG:-prek-uv-cache}" UV_CACHE_ASSET_PREFIX="${UV_CACHE_ASSET_PREFIX:-prek-uv-cache}" diff --git a/scripts/ci/compute_uv_fingerprint.py b/scripts/ci/compute_uv_fingerprint.py index 75e67305a..5fa3ee0d9 100755 --- a/scripts/ci/compute_uv_fingerprint.py +++ b/scripts/ci/compute_uv_fingerprint.py @@ -34,7 +34,7 @@ def _build_parser() -> argparse.ArgumentParser: ) parser.add_argument( "--base-image", - default="pytorch/pytorch:2.9.0-cuda12.8-cudnn9-devel", + default="pytorch/pytorch:2.11.0-cuda13.0-cudnn9-devel", help="Base image reference used for CI runtime/build cache compatibility", ) parser.add_argument( diff --git a/skypilot-config.yaml b/skypilot-config.yaml index 57eaf8d73..fd340cdc2 100644 --- a/skypilot-config.yaml +++ b/skypilot-config.yaml @@ -383,7 +383,7 @@ workdir: . resources: accelerators: ["H200:1", "H100-SXM:1", "H100:1", "A100-80GB:1"] - image_id: docker:pytorch/pytorch:2.9.0-cuda12.8-cudnn9-devel + image_id: docker:pytorch/pytorch:2.11.0-cuda13.0-cudnn9-devel ports: - 7999 # main ART server - 8000 # vLLM server @@ -399,7 +399,8 @@ config: kubernetes: pod_config: spec: - schedulerName: gpu-binpack + schedulerName: binpack-scheduler + activeDeadlineSeconds: 172800 # Work around uv 0.10.5 stripping execute bits from Ray binaries. containers: - name: ray-node diff --git a/uv.lock b/uv.lock index 8edeccaa8..37bb6c94a 100644 --- a/uv.lock +++ b/uv.lock @@ -23,11 +23,12 @@ resolution-markers = [ [manifest] overrides = [ { name = "datasets", specifier = "==4.8.5" }, - { name = "flashinfer-python", specifier = "==0.6.6" }, + { name = "flashinfer-python", specifier = "==0.6.8.post1" }, { name = "numpy", specifier = "<2" }, { name = "nvidia-resiliency-ext", specifier = "<0.5" }, - { name = "quack-kernels", specifier = ">=0.2.7" }, - { name = "transformer-engine", specifier = "==2.11.0" }, + { name = "quack-kernels", specifier = ">=0.3.3" }, + { name = "torch", specifier = "==2.11.0" }, + { name = "transformer-engine", specifier = "==2.14.1" }, { name = "transformers", specifier = "==5.6.2" }, ] excludes = [ @@ -46,8 +47,8 @@ version = "1.2.1+9af0e0d" [[manifest.dependency-metadata]] name = "transformer-engine-torch" -version = "2.11.0" -requires-dist = ["einops", "onnx", "onnxscript", "packaging", "pydantic", "torch", "transformer-engine-cu12"] +version = "2.14.1" +requires-dist = ["einops", "onnx", "onnxscript", "packaging", "pydantic", "torch", "transformer-engine-cu13"] [[package]] name = "abnf" @@ -1030,52 +1031,12 @@ wheels = [ name = "causal-conv1d" version = "1.6.1" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14' and sys_platform == 'linux'", - "python_full_version == '3.13.*' and sys_platform == 'linux'", - "python_full_version == '3.12.*' and sys_platform == 'linux'", - "python_full_version >= '3.14' and sys_platform == 'win32'", - "python_full_version >= '3.14' and sys_platform == 'emscripten'", - "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version == '3.13.*' and sys_platform == 'win32'", - "python_full_version == '3.13.*' and sys_platform == 'emscripten'", - "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version == '3.12.*' and sys_platform == 'win32'", - "python_full_version == '3.12.*' and sys_platform == 'emscripten'", - "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version < '3.12' and sys_platform == 'win32'", - "python_full_version < '3.12' and sys_platform == 'emscripten'", - "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", -] dependencies = [ - { name = "ninja", marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "packaging", marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "torch", marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/63/15/ec51d77a2df03ee93410f8ee97fceeb7181da213813c51243e9dd6d7e144/causal_conv1d-1.6.1.tar.gz", hash = "sha256:e4a697ec2db3906f012e675125569f8b510b4559bc53e3095143d91369e1221b", size = 29426, upload-time = "2026-03-10T08:56:35.305Z" } - -[[package]] -name = "causal-conv1d" -version = "1.6.1" -source = { url = "https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.6.1.post4/causal_conv1d-1.6.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" } -resolution-markers = [ - "python_full_version < '3.12' and sys_platform == 'linux'", -] -dependencies = [ - { name = "ninja", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, - { name = "packaging", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, - { name = "torch", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, -] -wheels = [ - { url = "https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.6.1.post4/causal_conv1d-1.6.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl", hash = "sha256:fd2292d5488ac082ba15184e738e4462b27327693d0de9d0326df27bed5ae33e" }, -] - -[package.metadata] -requires-dist = [ { name = "ninja" }, { name = "packaging" }, { name = "torch" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/63/15/ec51d77a2df03ee93410f8ee97fceeb7181da213813c51243e9dd6d7e144/causal_conv1d-1.6.1.tar.gz", hash = "sha256:e4a697ec2db3906f012e675125569f8b510b4559bc53e3095143d91369e1221b", size = 29426, upload-time = "2026-03-10T08:56:35.305Z" } [[package]] name = "cbor2" @@ -1611,30 +1572,58 @@ wheels = [ name = "cuda-bindings" version = "12.9.4" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'emscripten'", + "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", + "python_full_version == '3.12.*' and sys_platform == 'win32'", + "python_full_version == '3.12.*' and sys_platform == 'emscripten'", + "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", + "python_full_version < '3.12' and sys_platform == 'win32'", + "python_full_version < '3.12' and sys_platform == 'emscripten'", + "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", +] dependencies = [ - { name = "cuda-pathfinder" }, + { name = "cuda-pathfinder", marker = "sys_platform != 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/2b/ebcbb60aa6dba830474cd360c42e10282f7a343c0a1f58d24fbd3b7c2d77/cuda_bindings-12.9.4-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a6a429dc6c13148ff1e27c44f40a3dd23203823e637b87fd0854205195988306", size = 11840604, upload-time = "2025-10-21T14:51:34.565Z" }, - { url = "https://files.pythonhosted.org/packages/45/e7/b47792cc2d01c7e1d37c32402182524774dadd2d26339bd224e0e913832e/cuda_bindings-12.9.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c912a3d9e6b6651853eed8eed96d6800d69c08e94052c292fec3f282c5a817c9", size = 12210593, upload-time = "2025-10-21T14:51:36.574Z" }, { url = "https://files.pythonhosted.org/packages/dd/be/90d32049e06abcfba4b2e7df1dbcb5e16215c8852eef0cd8b25f38a66bd4/cuda_bindings-12.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:443b0875916879c2e4c3722941e25e42d5ab9bcbf34c9e83404fb100fa1f6913", size = 11490933, upload-time = "2025-10-21T14:51:38.792Z" }, - { url = "https://files.pythonhosted.org/packages/0c/c2/65bfd79292b8ff18be4dd7f7442cea37bcbc1a228c1886f1dea515c45b67/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:694ba35023846625ef471257e6b5a4bc8af690f961d197d77d34b1d1db393f56", size = 11760260, upload-time = "2025-10-21T14:51:40.79Z" }, - { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019, upload-time = "2025-10-21T14:51:43.167Z" }, { url = "https://files.pythonhosted.org/packages/df/6b/9c1b1a6c01392bfdd758e9486f52a1a72bc8f49e98f9355774ef98b5fb4e/cuda_bindings-12.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:696ca75d249ddf287d01b9a698b8e2d8a05046495a9c051ca15659dc52d17615", size = 11586961, upload-time = "2025-10-21T14:51:45.394Z" }, - { url = "https://files.pythonhosted.org/packages/05/8b/b4b2d1c7775fa403b64333e720cfcfccef8dcb9cdeb99947061ca5a77628/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf8bfaedc238f3b115d957d1fd6562b7e8435ba57f6d0e2f87d0e7149ccb2da5", size = 11570071, upload-time = "2025-10-21T14:51:47.472Z" }, - { url = "https://files.pythonhosted.org/packages/63/56/e465c31dc9111be3441a9ba7df1941fe98f4aa6e71e8788a3fb4534ce24d/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:32bdc5a76906be4c61eb98f546a6786c5773a881f3b166486449b5d141e4a39f", size = 11906628, upload-time = "2025-10-21T14:51:49.905Z" }, { url = "https://files.pythonhosted.org/packages/05/d0/d0e4e2e047d8e899f023fa15ad5e9894ce951253f4c894f1cd68490fdb14/cuda_bindings-12.9.4-cp313-cp313-win_amd64.whl", hash = "sha256:a2e82c8985948f953c2be51df45c3fe11c812a928fca525154fb9503190b3e64", size = 11556719, upload-time = "2025-10-21T14:51:52.248Z" }, - { url = "https://files.pythonhosted.org/packages/ec/07/6aff13bc1e977e35aaa6b22f52b172e2890c608c6db22438cf7ed2bf43a6/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3adf4958dcf68ae7801a59b73fb00a8b37f8d0595060d66ceae111b1002de38d", size = 11566797, upload-time = "2025-10-21T14:51:54.581Z" }, - { url = "https://files.pythonhosted.org/packages/a3/84/1e6be415e37478070aeeee5884c2022713c1ecc735e6d82d744de0252eee/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56e0043c457a99ac473ddc926fe0dc4046694d99caef633e92601ab52cbe17eb", size = 11925991, upload-time = "2025-10-21T14:51:56.535Z" }, { url = "https://files.pythonhosted.org/packages/4d/3c/972edfddb4ae8a9fccd3c3766ed47453b6f805b6026b32f10209dd4b8ad4/cuda_bindings-12.9.4-cp313-cp313t-win_amd64.whl", hash = "sha256:b32d8b685f0e66f5658bcf4601ef034e89fc2843582886f0a58784a4302da06c", size = 11894363, upload-time = "2025-10-21T14:51:58.633Z" }, - { url = "https://files.pythonhosted.org/packages/1e/b5/96a6696e20c4ffd2b327f54c7d0fde2259bdb998d045c25d5dedbbe30290/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f53a7f453d4b2643d8663d036bafe29b5ba89eb904c133180f295df6dc151e5", size = 11624530, upload-time = "2025-10-21T14:52:01.539Z" }, - { url = "https://files.pythonhosted.org/packages/d1/af/6dfd8f2ed90b1d4719bc053ff8940e494640fe4212dc3dd72f383e4992da/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8b72ee72a9cc1b531db31eebaaee5c69a8ec3500e32c6933f2d3b15297b53686", size = 11922703, upload-time = "2025-10-21T14:52:03.585Z" }, { url = "https://files.pythonhosted.org/packages/e6/87/652796522cc1a7af559460e1ce59b642e05c1468b9c08522a9a096b4cf04/cuda_bindings-12.9.4-cp314-cp314-win_amd64.whl", hash = "sha256:53a10c71fdbdb743e0268d07964e5a996dd00b4e43831cbfce9804515d97d575", size = 11517716, upload-time = "2025-10-21T14:52:06.013Z" }, - { url = "https://files.pythonhosted.org/packages/39/73/d2fc40c043bac699c3880bf88d3cebe9d88410cd043795382826c93a89f0/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:20f2699d61d724de3eb3f3369d57e2b245f93085cab44fd37c3bea036cea1a6f", size = 11565056, upload-time = "2025-10-21T14:52:08.338Z" }, - { url = "https://files.pythonhosted.org/packages/6c/19/90ac264acc00f6df8a49378eedec9fd2db3061bf9263bf9f39fd3d8377c3/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d80bffc357df9988dca279734bc9674c3934a654cab10cadeed27ce17d8635ee", size = 11924658, upload-time = "2025-10-21T14:52:10.411Z" }, { url = "https://files.pythonhosted.org/packages/ab/52/a30f46e822bfa6b4a659d1e8de8c4a4adf908ea075dac568b55362541bd8/cuda_bindings-12.9.4-cp314-cp314t-win_amd64.whl", hash = "sha256:53e11991a92ff6f26a0c8a98554cd5d6721c308a6b7bfb08bebac9201e039e43", size = 12055608, upload-time = "2025-10-21T14:52:12.335Z" }, ] +[[package]] +name = "cuda-bindings" +version = "13.2.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'linux'", + "python_full_version == '3.13.*' and sys_platform == 'linux'", + "python_full_version == '3.12.*' and sys_platform == 'linux'", + "python_full_version < '3.12' and sys_platform == 'linux'", +] +dependencies = [ + { name = "cuda-pathfinder", marker = "sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/a9/3a8241c6e19483ac1f1dcf5c10238205dcb8a6e9d0d4d4709240dff28ff4/cuda_bindings-13.2.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:721104c603f059780d287969be3d194a18d0cc3b713ed9049065a1107706759d", size = 5730273, upload-time = "2026-03-11T00:12:37.18Z" }, + { url = "https://files.pythonhosted.org/packages/e9/94/2748597f47bb1600cd466b20cab4159f1530a3a33fe7f70fee199b3abb9e/cuda_bindings-13.2.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1eba9504ac70667dd48313395fe05157518fd6371b532790e96fbb31bbb5a5e1", size = 6313924, upload-time = "2026-03-11T00:12:39.462Z" }, + { url = "https://files.pythonhosted.org/packages/52/c8/b2589d68acf7e3d63e2be330b84bc25712e97ed799affbca7edd7eae25d6/cuda_bindings-13.2.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e865447abfb83d6a98ad5130ed3c70b1fc295ae3eeee39fd07b4ddb0671b6788", size = 5722404, upload-time = "2026-03-11T00:12:44.041Z" }, + { url = "https://files.pythonhosted.org/packages/1f/92/f899f7bbb5617bb65ec52a6eac1e9a1447a86b916c4194f8a5001b8cde0c/cuda_bindings-13.2.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46d8776a55d6d5da9dd6e9858fba2efcda2abe6743871dee47dd06eb8cb6d955", size = 6320619, upload-time = "2026-03-11T00:12:45.939Z" }, + { url = "https://files.pythonhosted.org/packages/df/93/eef988860a3ca985f82c4f3174fc0cdd94e07331ba9a92e8e064c260337f/cuda_bindings-13.2.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6629ca2df6f795b784752409bcaedbd22a7a651b74b56a165ebc0c9dcbd504d0", size = 5614610, upload-time = "2026-03-11T00:12:50.337Z" }, + { url = "https://files.pythonhosted.org/packages/18/23/6db3aba46864aee357ab2415135b3fe3da7e9f1fa0221fa2a86a5968099c/cuda_bindings-13.2.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7dca0da053d3b4cc4869eff49c61c03f3c5dbaa0bcd712317a358d5b8f3f385d", size = 6149914, upload-time = "2026-03-11T00:12:52.374Z" }, + { url = "https://files.pythonhosted.org/packages/c0/87/87a014f045b77c6de5c8527b0757fe644417b184e5367db977236a141602/cuda_bindings-13.2.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a6464b30f46692d6c7f65d4a0e0450d81dd29de3afc1bb515653973d01c2cd6e", size = 5685673, upload-time = "2026-03-11T00:12:56.371Z" }, + { url = "https://files.pythonhosted.org/packages/ee/5e/c0fe77a73aaefd3fff25ffaccaac69c5a63eafdf8b9a4c476626ef0ac703/cuda_bindings-13.2.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4af9f3e1be603fa12d5ad6cfca7844c9d230befa9792b5abdf7dd79979c3626", size = 6191386, upload-time = "2026-03-11T00:12:58.965Z" }, + { url = "https://files.pythonhosted.org/packages/5f/58/ed2c3b39c8dd5f96aa7a4abef0d47a73932c7a988e30f5fa428f00ed0da1/cuda_bindings-13.2.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df850a1ff8ce1b3385257b08e47b70e959932f5f432d0a4e46a355962b4e4771", size = 5507469, upload-time = "2026-03-11T00:13:04.063Z" }, + { url = "https://files.pythonhosted.org/packages/1f/01/0c941b112ceeb21439b05895eace78ca1aa2eaaf695c8521a068fd9b4c00/cuda_bindings-13.2.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8a16384c6494e5485f39314b0b4afb04bee48d49edb16d5d8593fd35bbd231b", size = 6059693, upload-time = "2026-03-11T00:13:06.003Z" }, +] + [[package]] name = "cuda-pathfinder" version = "1.4.4" @@ -1647,13 +1636,107 @@ wheels = [ name = "cuda-python" version = "12.9.4" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'emscripten'", + "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", + "python_full_version == '3.12.*' and sys_platform == 'win32'", + "python_full_version == '3.12.*' and sys_platform == 'emscripten'", + "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", + "python_full_version < '3.12' and sys_platform == 'win32'", + "python_full_version < '3.12' and sys_platform == 'emscripten'", + "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", +] dependencies = [ - { name = "cuda-bindings" }, + { name = "cuda-bindings", version = "12.9.4", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/af/f3/6b032a554019cfb3447e671798c1bd3e79b5f1af20d10253f56cea269ef2/cuda_python-12.9.4-py3-none-any.whl", hash = "sha256:d2cacea882a69863f1e7d27ee71d75f0684f4c76910aff839067e4f89c902279", size = 7594, upload-time = "2025-10-21T14:55:12.846Z" }, ] +[[package]] +name = "cuda-python" +version = "13.2.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'linux'", + "python_full_version == '3.13.*' and sys_platform == 'linux'", + "python_full_version == '3.12.*' and sys_platform == 'linux'", + "python_full_version < '3.12' and sys_platform == 'linux'", +] +dependencies = [ + { name = "cuda-bindings", version = "13.2.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, + { name = "cuda-pathfinder", marker = "sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/da/b4dbe129f941afe1c24a09ba53521b78875626763d96414798a74763282f/cuda_python-13.2.0-py3-none-any.whl", hash = "sha256:2f092b0ec13a860115fa595411889ee939ad203450ea4f91e9461b174ea7b084", size = 8145, upload-time = "2026-03-11T13:55:19.143Z" }, +] + +[[package]] +name = "cuda-tile" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/d6/753aecb3e8fcee80d20f9d32b4504276691c2f77fc10abbbd8e82197e24c/cuda_tile-1.3.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:59d9843fa723ceb4d680ec246e12e3ded857266e4c2bf5c5d21e530d6d765060", size = 245441, upload-time = "2026-04-20T15:51:06.618Z" }, + { url = "https://files.pythonhosted.org/packages/c5/2d/8b416239413bf11d17d42ccee43258f3787da13bcea7b2e42e8bbf04b3da/cuda_tile-1.3.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:2888d6b89fae053a53ca7bb703c508a5cf90671d266934573c5b6c25978022c4", size = 246706, upload-time = "2026-04-20T15:51:03.467Z" }, + { url = "https://files.pythonhosted.org/packages/46/b0/68303196d577e497ddf3cef0fd92785d83f47f6239543a5b19dc4076e487/cuda_tile-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:791b363251fbc64db4402d92153ba3d14bc0aaa4d218cea66562af02a7a76bd9", size = 240640, upload-time = "2026-04-20T15:52:15.428Z" }, + { url = "https://files.pythonhosted.org/packages/f3/49/4592bc94ca05a07c7947ea114fd12734c8497f2daffee9faa79a03e39fb5/cuda_tile-1.3.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:375316b64c51ee7cfadb2f170a30c1547bc41eb39f1e233a6556713857d2e81f", size = 245744, upload-time = "2026-04-20T15:52:09.621Z" }, + { url = "https://files.pythonhosted.org/packages/40/76/84cb68be463c827bf79da9fa0aa5140838de6455ef6f438bbe0ffa75d378/cuda_tile-1.3.0-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:e4865acbff1172aaee304bf9c550586088d8b4545a384423597a590899386709", size = 247301, upload-time = "2026-04-20T15:51:04.042Z" }, + { url = "https://files.pythonhosted.org/packages/db/6f/d2fd16c2b0d878021dc703eea5f8fe09599d6b04bdc2531a36fc617751fd/cuda_tile-1.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:93e20ed31e46e5bf704fb31d13e1c08338d2177838798876f7ee9ec4384b75ba", size = 240923, upload-time = "2026-04-20T15:52:14.939Z" }, + { url = "https://files.pythonhosted.org/packages/9d/7d/ee943554f83d6a143d9e0a5cf27cd7f5f8f6ef447c7e8366d9ad6a5d1bf2/cuda_tile-1.3.0-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:8a9bd4dae193cddf438f55d617b6f25b4b0b0fcf4ac4acde7d2695898e396c30", size = 245750, upload-time = "2026-04-20T15:52:12.91Z" }, + { url = "https://files.pythonhosted.org/packages/35/20/e1daea2dc4e094290ba727750f8342095ae857ff3ba4f81c489f48688613/cuda_tile-1.3.0-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:a44a81e255fdb7bf8e1f7511fe3a019e6045024574509ea8548e0f71f25f8473", size = 247300, upload-time = "2026-04-20T15:51:03.072Z" }, + { url = "https://files.pythonhosted.org/packages/2b/77/c13afad1a06824c1c942afd0205e78ff17f0ee06fc1a943f6e2135cf4112/cuda_tile-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:efcb93c25563fe23d6aa083c22893fd703122eaf684b0d36874982d28a6dad0b", size = 240925, upload-time = "2026-04-20T15:52:21.283Z" }, +] + +[[package]] +name = "cuda-toolkit" +version = "13.0.2" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/b2/453099f5f3b698d7d0eab38916aac44c7f76229f451709e2eb9db6615dcd/cuda_toolkit-13.0.2-py2.py3-none-any.whl", hash = "sha256:b198824cf2f54003f50d64ada3a0f184b42ca0846c1c94192fa269ecd97a66eb", size = 2364, upload-time = "2025-12-19T23:24:07.328Z" }, +] + +[package.optional-dependencies] +cublas = [ + { name = "nvidia-cublas", marker = "sys_platform == 'linux'" }, +] +cudart = [ + { name = "nvidia-cuda-runtime", marker = "sys_platform == 'linux'" }, +] +cufft = [ + { name = "nvidia-cufft", marker = "sys_platform == 'linux'" }, +] +cufile = [ + { name = "nvidia-cufile", marker = "sys_platform == 'linux'" }, +] +cupti = [ + { name = "nvidia-cuda-cupti", marker = "sys_platform == 'linux'" }, +] +curand = [ + { name = "nvidia-curand", marker = "sys_platform == 'linux'" }, +] +cusolver = [ + { name = "nvidia-cusolver", marker = "sys_platform == 'linux'" }, +] +cusparse = [ + { name = "nvidia-cusparse", marker = "sys_platform == 'linux'" }, +] +nvjitlink = [ + { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux'" }, +] +nvrtc = [ + { name = "nvidia-cuda-nvrtc", marker = "sys_platform == 'linux'" }, +] +nvtx = [ + { name = "nvidia-nvtx", marker = "sys_platform == 'linux'" }, +] + [[package]] name = "cudo-compute" version = "0.3.6" @@ -2245,6 +2328,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/a8/20d0723294217e47de6d9e2e40fd4a9d2f7c4b6ef974babd482a59743694/fastjsonschema-2.21.2-py3-none-any.whl", hash = "sha256:1c797122d0a86c5cace2e54bf4e819c36223b552017172f32c5c024a6b77e463", size = 24024, upload-time = "2025-08-14T18:49:34.776Z" }, ] +[[package]] +name = "fastsafetensors" +version = "0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typer", marker = "sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/98/053c622e61bb766d31327a88215082320a4ba8bd6a62c4c5435221844103/fastsafetensors-0.3.tar.gz", hash = "sha256:89f392569d2281d1a966d3b64f99a6386149116e37eef4f4890168c87a8c4f19", size = 57500, upload-time = "2026-04-22T07:16:30.84Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/fc/78ca177fe45fa5ea0020b5a570cbe5a59cb9b3b4ff49e011261c75711634/fastsafetensors-0.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7e875afdc7e76bc0ddf46fd4b32db9f232543a8dea383dc7eb9de8f1dcd9e090", size = 1828752, upload-time = "2026-04-22T07:16:23.697Z" }, + { url = "https://files.pythonhosted.org/packages/0a/06/bca80663bf8136f273643d149953dd29ca2c52aa4faac4b67506b871a5ec/fastsafetensors-0.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3ce38241c5afedf94ef37531b8b8703016b2ea39350cfd33e819e65d4d5305e0", size = 1855661, upload-time = "2026-04-22T07:16:25.833Z" }, + { url = "https://files.pythonhosted.org/packages/70/45/459a11e31aec2e9b803ea19cd796b3b678435086d688c91c29d3f880c996/fastsafetensors-0.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:881b8dd5ebc5a73691ef9695a2d224f05bc9c5a60a95e1329f13df784502ae24", size = 1855525, upload-time = "2026-04-22T07:16:29.253Z" }, +] + [[package]] name = "fastuuid" version = "0.14.0" @@ -2352,19 +2449,20 @@ wheels = [ [[package]] name = "flashinfer-cubin" -version = "0.6.6" +version = "0.6.8.post1" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/12/e8/826f9452bc5f76b94d7eb025f03dcaf1b51b9ed7790386c0285191e69be4/flashinfer_cubin-0.6.6-py3-none-any.whl", hash = "sha256:36508dfc792eb5ecfb15d2c140a7702812e1fa1ab0fb03929b2ed55e3e8191f3", size = 267661457, upload-time = "2026-03-11T01:36:36.538Z" }, + { url = "https://files.pythonhosted.org/packages/11/b7/5e3b1a8c67031b421a8bd29c2bc29b900a550bb3392e8bda18bb15b5e476/flashinfer_cubin-0.6.8.post1-py3-none-any.whl", hash = "sha256:43636d4cd39e694a83d76a89f87fefcdf4cecb4c4f7dd22dac25ec368c1e901f", size = 295154113, upload-time = "2026-04-18T18:28:21.738Z" }, ] [[package]] name = "flashinfer-python" -version = "0.6.6" +version = "0.6.8.post1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "apache-tvm-ffi" }, { name = "click" }, + { name = "cuda-tile" }, { name = "einops" }, { name = "ninja" }, { name = "numpy" }, @@ -2377,9 +2475,9 @@ dependencies = [ { name = "torch" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/03/70/c5a235297351021f5d3d3233523a85f5a6468495587489ad2f257e8eafe2/flashinfer_python-0.6.6.tar.gz", hash = "sha256:0730ba7c7aad332961933bcebc5119762797161ede57d955f6fd199818ed1d92", size = 5344156, upload-time = "2026-03-11T01:36:21.434Z" } +sdist = { url = "https://files.pythonhosted.org/packages/53/1e/2760fef9e74abc4480961048e5790b4c9e955872fb4d7d97900cfddced5a/flashinfer_python-0.6.8.post1.tar.gz", hash = "sha256:b18e4121baf9b93fa9a9f368ba9b981a0342895f50ab9dddc224aeb964ed346f", size = 6675885, upload-time = "2026-04-18T18:28:13.299Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/61/385d06755f3ab66333018285657adf0daf8a90a129448231fd09e315bd2e/flashinfer_python-0.6.6-py3-none-any.whl", hash = "sha256:078f158636969eec1a0d3dea19c3ca90b426b66df89bbf7b7b8276ce2ec08148", size = 7817047, upload-time = "2026-03-11T01:36:19.198Z" }, + { url = "https://files.pythonhosted.org/packages/73/6d/1e8a8533913e33a50a486332ce0673f4fdb860f6eb9ed450327c5c1762cb/flashinfer_python-0.6.8.post1-py3-none-any.whl", hash = "sha256:818f9b8cc2fe66c42a1f6264be4841ac8821ada703685a02cfccb2b5124a710b", size = 9385316, upload-time = "2026-04-18T18:28:10.285Z" }, ] [[package]] @@ -4168,16 +4266,20 @@ wheels = [ [[package]] name = "llvmlite" -version = "0.44.0" +version = "0.47.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880, upload-time = "2025-01-20T11:14:41.342Z" } +sdist = { url = "https://files.pythonhosted.org/packages/01/88/a8952b6d5c21e74cbf158515b779666f692846502623e9e3c39d8e8ba25f/llvmlite-0.47.0.tar.gz", hash = "sha256:62031ce968ec74e95092184d4b0e857e444f8fdff0b8f9213707699570c33ccc", size = 193614, upload-time = "2026-03-31T18:29:53.497Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/99/fe/d030f1849ebb1f394bb3f7adad5e729b634fb100515594aca25c354ffc62/llvmlite-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5d22c3bfc842668168a786af4205ec8e3ad29fb1bc03fd11fd48460d0df64c1", size = 42361858, upload-time = "2025-01-20T11:13:07.623Z" }, - { url = "https://files.pythonhosted.org/packages/d7/7a/ce6174664b9077fc673d172e4c888cb0b128e707e306bc33fff8c2035f0d/llvmlite-0.44.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f01a394e9c9b7b1d4e63c327b096d10f6f0ed149ef53d38a09b3749dcf8c9610", size = 41184200, upload-time = "2025-01-20T11:13:20.058Z" }, - { url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901, upload-time = "2025-01-20T11:13:46.711Z" }, - { url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247, upload-time = "2025-01-20T11:13:56.159Z" }, - { url = "https://files.pythonhosted.org/packages/d2/1b/656f5a357de7135a3777bd735cc7c9b8f23b4d37465505bd0eaf4be9befe/llvmlite-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf", size = 42361904, upload-time = "2025-01-20T11:14:22.949Z" }, - { url = "https://files.pythonhosted.org/packages/d8/e1/12c5f20cb9168fb3464a34310411d5ad86e4163c8ff2d14a2b57e5cc6bac/llvmlite-0.44.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc", size = 41184245, upload-time = "2025-01-20T11:14:31.731Z" }, + { url = "https://files.pythonhosted.org/packages/46/27/5799b020e4cdfb25a7c951c06a96397c135efcdc21b78d853bbd9c814c7d/llvmlite-0.47.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ca14f02e29134e837982497959a8e2193d6035235de1cb41a9cb2bd6da4eedbb", size = 56275177, upload-time = "2026-03-31T18:28:31.01Z" }, + { url = "https://files.pythonhosted.org/packages/7e/51/48a53fedf01cb1f3f43ef200be17ebf83c8d9a04018d3783c1a226c342c2/llvmlite-0.47.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:12a69d4bb05f402f30477e21eeabe81911e7c251cecb192bed82cd83c9db10d8", size = 55128631, upload-time = "2026-03-31T18:28:36.046Z" }, + { url = "https://files.pythonhosted.org/packages/e6/4b/e3f2cd17822cf772a4a51a0a8080b0032e6d37b2dbe8cfb724eac4e31c52/llvmlite-0.47.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5853bf26160857c0c2573415ff4efe01c4c651e59e2c55c2a088740acfee51cd", size = 56275178, upload-time = "2026-03-31T18:28:48.342Z" }, + { url = "https://files.pythonhosted.org/packages/b6/55/a3b4a543185305a9bdf3d9759d53646ed96e55e7dfd43f53e7a421b8fbae/llvmlite-0.47.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:003bcf7fa579e14db59c1a1e113f93ab8a06b56a4be31c7f08264d1d4072d077", size = 55128632, upload-time = "2026-03-31T18:28:52.901Z" }, + { url = "https://files.pythonhosted.org/packages/31/b8/69f5565f1a280d032525878a86511eebed0645818492feeb169dfb20ae8e/llvmlite-0.47.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2699a74321189e812d476a43d6d7f652f51811e7b5aad9d9bba842a1c7927acb", size = 56275178, upload-time = "2026-03-31T18:29:05.748Z" }, + { url = "https://files.pythonhosted.org/packages/d6/da/b32cafcb926fb0ce2aa25553bf32cb8764af31438f40e2481df08884c947/llvmlite-0.47.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6c6951e2b29930227963e53ee152441f0e14be92e9d4231852102d986c761e40", size = 55128632, upload-time = "2026-03-31T18:29:11.235Z" }, + { url = "https://files.pythonhosted.org/packages/64/1d/a760e993e0c0ba6db38d46b9f48f6c7dceb8ac838824997fb9e25f97bc04/llvmlite-0.47.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ddbccff2aeaff8670368340a158abefc032fe9b3ccf7d9c496639263d00151aa", size = 56275176, upload-time = "2026-03-31T18:29:24.149Z" }, + { url = "https://files.pythonhosted.org/packages/84/3b/e679bc3b29127182a7f4aa2d2e9e5bea42adb93fb840484147d59c236299/llvmlite-0.47.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4a7b778a2e144fc64468fb9bf509ac1226c9813a00b4d7afea5d988c4e22fca", size = 55128631, upload-time = "2026-03-31T18:29:29.536Z" }, + { url = "https://files.pythonhosted.org/packages/11/03/16090dd6f74ba2b8b922276047f15962fbeea0a75d5601607edb301ba945/llvmlite-0.47.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa1cbd800edd3b20bc141521f7fd45a6185a5b84109aa6855134e81397ffe72b", size = 56275178, upload-time = "2026-03-31T18:29:42.58Z" }, + { url = "https://files.pythonhosted.org/packages/f5/cb/0abf1dd4c5286a95ffe0c1d8c67aec06b515894a0dd2ac97f5e27b82ab0b/llvmlite-0.47.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6725179b89f03b17dabe236ff3422cb8291b4c1bf40af152826dfd34e350ae8", size = 55128632, upload-time = "2026-03-31T18:29:46.939Z" }, ] [[package]] @@ -4220,67 +4322,16 @@ wheels = [ name = "mamba-ssm" version = "2.3.1" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14' and sys_platform == 'linux'", - "python_full_version == '3.13.*' and sys_platform == 'linux'", - "python_full_version == '3.12.*' and sys_platform == 'linux'", - "python_full_version >= '3.14' and sys_platform == 'win32'", - "python_full_version >= '3.14' and sys_platform == 'emscripten'", - "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version == '3.13.*' and sys_platform == 'win32'", - "python_full_version == '3.13.*' and sys_platform == 'emscripten'", - "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version == '3.12.*' and sys_platform == 'win32'", - "python_full_version == '3.12.*' and sys_platform == 'emscripten'", - "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version < '3.12' and sys_platform == 'win32'", - "python_full_version < '3.12' and sys_platform == 'emscripten'", - "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", -] -dependencies = [ - { name = "einops", marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "ninja", marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "packaging", marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "setuptools", marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "torch", marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "transformers", marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "triton", marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/34/67/ec89aa703da194a813e35d2ea2de8f74a7ce6991a120a29f3a0c5e30d4b9/mamba_ssm-2.3.1.tar.gz", hash = "sha256:4d529477ad94753962216d583fc8f1c127c717b7d7c875d6bbb9376366d0d761", size = 121707, upload-time = "2026-03-10T09:27:34.798Z" } - -[[package]] -name = "mamba-ssm" -version = "2.3.1" -source = { url = "https://github.com/state-spaces/mamba/releases/download/v2.3.1/mamba_ssm-2.3.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" } -resolution-markers = [ - "python_full_version < '3.12' and sys_platform == 'linux'", -] dependencies = [ - { name = "einops", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, - { name = "ninja", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, - { name = "packaging", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, - { name = "torch", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, - { name = "transformers", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, - { name = "triton", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, -] -wheels = [ - { url = "https://github.com/state-spaces/mamba/releases/download/v2.3.1/mamba_ssm-2.3.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl", hash = "sha256:04ebab0968058c64592eb8bad43ea7a8a42ac9927b2d88679a60e7da6cf907c8" }, -] - -[package.metadata] -requires-dist = [ - { name = "causal-conv1d", marker = "extra == 'causal-conv1d'", specifier = ">=1.2.0" }, { name = "einops" }, { name = "ninja" }, { name = "packaging" }, - { name = "pytest", marker = "extra == 'dev'" }, - { name = "setuptools", specifier = ">=61.0.0" }, + { name = "setuptools" }, { name = "torch" }, { name = "transformers" }, { name = "triton" }, ] -provides-extras = ["causal-conv1d", "dev"] +sdist = { url = "https://files.pythonhosted.org/packages/34/67/ec89aa703da194a813e35d2ea2de8f74a7ce6991a120a29f3a0c5e30d4b9/mamba_ssm-2.3.1.tar.gz", hash = "sha256:4d529477ad94753962216d583fc8f1c127c717b7d7c875d6bbb9376366d0d761", size = 121707, upload-time = "2026-03-10T09:27:34.798Z" } [[package]] name = "markdown" @@ -4492,8 +4543,7 @@ version = "0.4.0rc0" source = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git?rev=e049cc00c24d03e2ae45d2608c7a44e2d2364e3d#e049cc00c24d03e2ae45d2608c7a44e2d2364e3d" } dependencies = [ { name = "accelerate" }, - { name = "causal-conv1d", version = "1.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "causal-conv1d", version = "1.6.1", source = { url = "https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.6.1.post4/causal_conv1d-1.6.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" }, marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, + { name = "causal-conv1d" }, { name = "comet-ml" }, { name = "datasets" }, { name = "diffusers" }, @@ -4502,8 +4552,7 @@ dependencies = [ { name = "hydra-core" }, { name = "imageio" }, { name = "imageio-ffmpeg" }, - { name = "mamba-ssm", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "mamba-ssm", version = "2.3.1", source = { url = "https://github.com/state-spaces/mamba/releases/download/v2.3.1/mamba_ssm-2.3.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" }, marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, + { name = "mamba-ssm" }, { name = "megatron-core", extra = ["dev", "mlm"] }, { name = "mlflow" }, { name = "nvidia-resiliency-ext" }, @@ -4538,16 +4587,14 @@ dependencies = [ [package.optional-dependencies] dev = [ { name = "av" }, - { name = "causal-conv1d", version = "1.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "causal-conv1d", version = "1.6.1", source = { url = "https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.6.1.post4/causal_conv1d-1.6.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" }, marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, + { name = "causal-conv1d" }, { name = "datasets" }, { name = "einops" }, { name = "fastapi" }, { name = "flash-linear-attention" }, { name = "flashinfer-python" }, { name = "hypercorn" }, - { name = "mamba-ssm", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or sys_platform != 'linux'" }, - { name = "mamba-ssm", version = "2.3.1", source = { url = "https://github.com/state-spaces/mamba/releases/download/v2.3.1/mamba_ssm-2.3.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" }, marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, + { name = "mamba-ssm" }, { name = "megatron-energon", extra = ["av-decode"] }, { name = "multi-storage-client" }, { name = "nv-grouped-gemm" }, @@ -4606,7 +4653,7 @@ av-decode = [ [[package]] name = "mistral-common" -version = "1.10.0" +version = "1.11.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonschema", marker = "sys_platform == 'linux'" }, @@ -4618,9 +4665,9 @@ dependencies = [ { name = "tiktoken", marker = "sys_platform == 'linux'" }, { name = "typing-extensions", marker = "sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a7/22/f798c1acc3f8cf32b6201b063d96867d79aa39d31dff12478739e1a78979/mistral_common-1.10.0.tar.gz", hash = "sha256:e456ff101edbdfc094039ec6c26f7d0f73356729798d628a6e6e96c3917147bc", size = 6351515, upload-time = "2026-03-13T10:13:46.683Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/15/12076a58b9dde4ad486b6de4afd2dfe3e8226fd049ef44553892e62f2e92/mistral_common-1.11.1.tar.gz", hash = "sha256:b784e1f9141bbcb26ab1f61b724c709f08cd3543e81730cb7248721499491840", size = 6356869, upload-time = "2026-04-29T07:24:43.234Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/87/c6/1429a0a3ab40f8530492b62b52eb792266c261b22ed62aa7f25d61d531ae/mistral_common-1.10.0-py3-none-any.whl", hash = "sha256:c594d1a05202b61e8f0d867ec6064df4c5e5d492c2c2bdb6fd8fb4872c6afd8b", size = 6525284, upload-time = "2026-03-13T10:13:44.329Z" }, + { url = "https://files.pythonhosted.org/packages/c3/6d/ab384c8b772390426472b623b85135e9b5f159ab31a21d87a6d46819d386/mistral_common-1.11.1-py3-none-any.whl", hash = "sha256:797fded812139069d359fc08a1a66bf994555e10bb51b613941281b91ee07135", size = 6531421, upload-time = "2026-04-29T07:24:40.516Z" }, ] [package.optional-dependencies] @@ -5155,20 +5202,24 @@ wheels = [ [[package]] name = "numba" -version = "0.61.2" +version = "0.65.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "llvmlite", marker = "sys_platform == 'linux'" }, { name = "numpy", marker = "sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615, upload-time = "2025-04-09T02:58:07.659Z" } +sdist = { url = "https://files.pythonhosted.org/packages/49/61/7299643b9c18d669e04be7c5bcb64d985070d07553274817b45b049e7bfe/numba-0.65.0.tar.gz", hash = "sha256:edad0d9f6682e93624c00125a471ae4df186175d71fd604c983c377cdc03e68b", size = 2764131, upload-time = "2026-04-01T03:52:01.946Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/97/c8/8740616c8436c86c1b9a62e72cb891177d2c34c2d24ddcde4c390371bf4c/numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3945615cd73c2c7eba2a85ccc9c1730c21cd3958bfcf5a44302abae0fb07bb60", size = 3829227, upload-time = "2025-04-09T02:57:46.63Z" }, - { url = "https://files.pythonhosted.org/packages/fc/06/66e99ae06507c31d15ff3ecd1f108f2f59e18b6e08662cd5f8a5853fbd18/numba-0.61.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbfdf4eca202cebade0b7d43896978e146f39398909a42941c9303f82f403a18", size = 3523422, upload-time = "2025-04-09T02:57:48.222Z" }, - { url = "https://files.pythonhosted.org/packages/9a/2d/e518df036feab381c23a624dac47f8445ac55686ec7f11083655eb707da3/numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546", size = 3885928, upload-time = "2025-04-09T02:57:55.206Z" }, - { url = "https://files.pythonhosted.org/packages/10/0f/23cced68ead67b75d77cfcca3df4991d1855c897ee0ff3fe25a56ed82108/numba-0.61.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd", size = 3577115, upload-time = "2025-04-09T02:57:56.818Z" }, - { url = "https://files.pythonhosted.org/packages/0d/e0/5ea04e7ad2c39288c0f0f9e8d47638ad70f28e275d092733b5817cf243c9/numba-0.61.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bdbca73ad81fa196bd53dc12e3aaf1564ae036e0c125f237c7644fe64a4928ab", size = 3893918, upload-time = "2025-04-09T02:58:02.933Z" }, - { url = "https://files.pythonhosted.org/packages/17/58/064f4dcb7d7e9412f16ecf80ed753f92297e39f399c905389688cf950b81/numba-0.61.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f154aaea625fb32cfbe3b80c5456d514d416fcdf79733dd69c0df3a11348e9e", size = 3584056, upload-time = "2025-04-09T02:58:04.538Z" }, + { url = "https://files.pythonhosted.org/packages/c1/a7/11e2b24251d57cf41fc9ad83f378d890d61a890e3f8eb6338b39833f67a4/numba-0.65.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:032b0b8e879512cd424d79eed6d772a1399c6387ded184c2cf3cc22c08d750a6", size = 3744674, upload-time = "2026-04-01T03:51:27.311Z" }, + { url = "https://files.pythonhosted.org/packages/fe/0b/7c63eb742859a6243f42288441f65ac9dac96ea59f409e43b713aafbe867/numba-0.65.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af143d823624033a128b5950c0aaf9ffc2386dfe954eb757119cf0432335534c", size = 3450620, upload-time = "2026-04-01T03:51:29.092Z" }, + { url = "https://files.pythonhosted.org/packages/73/36/88406bd58600cc696417b8e5dd6a056478da808f3eaf48d18e2421e0c2d9/numba-0.65.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a52d92ffd297c10364bce60cd1fcb88f99284ab5df085f2c6bcd1cb33b529a6f", size = 3801411, upload-time = "2026-04-01T03:51:34.321Z" }, + { url = "https://files.pythonhosted.org/packages/0c/61/ce753a1d7646dd477e16d15e89473703faebb8995d2f71d7ad69a540b565/numba-0.65.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da8e371e328c06d0010c3d8b44b21858652831b85bcfba78cb22c042e22dbd8e", size = 3501622, upload-time = "2026-04-01T03:51:36.348Z" }, + { url = "https://files.pythonhosted.org/packages/1b/8f/3d116e4b8e92f6abace431afa4b2b944f4d65bdee83af886f5c4b263df95/numba-0.65.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b8a9008411615c69d083d1dcf477f75a5aa727b30beb16e139799e2be945cdfd", size = 3809537, upload-time = "2026-04-01T03:51:41.42Z" }, + { url = "https://files.pythonhosted.org/packages/b5/2c/6a3ca4128e253cb67affe06deb47688f51ce968f5111e2a06d010e6f1fa6/numba-0.65.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af96c0cba53664efcb361528b8c75e011a6556c859c7e08424c2715201c6cf7a", size = 3508615, upload-time = "2026-04-01T03:51:43.444Z" }, + { url = "https://files.pythonhosted.org/packages/24/8d/e12d6ff4b9119db3cbf7b2db1ce257576441bd3c76388c786dea74f20b02/numba-0.65.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:05c0a9fdf75d85f57dee47b719e8d6415707b80aae45d75f63f9dc1b935c29f7", size = 3778456, upload-time = "2026-04-01T03:51:48.552Z" }, + { url = "https://files.pythonhosted.org/packages/17/89/abcd83e76f6a773276fe76244140671bcc5bf820f6e2ae1a15362ae4c8c9/numba-0.65.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:583680e0e8faf124d362df23b4b593f3221a8996341a63d1b664c122401bec2f", size = 3478464, upload-time = "2026-04-01T03:51:50.527Z" }, + { url = "https://files.pythonhosted.org/packages/ff/e5/8267b0adb0c01b52b553df5062fbbb42c30ed5362d08b85cc913a36f838f/numba-0.65.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c7fa502960f7a2f3f5cb025bc7bff888a3551277b92431bfdc5ba2f11a375749", size = 3816373, upload-time = "2026-04-01T03:51:56.18Z" }, + { url = "https://files.pythonhosted.org/packages/b0/f5/b8397ca360971669a93706b9274592b6864e4367a37d498fbbcb62aa2d48/numba-0.65.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5046c63f783ca3eb6195f826a50797465e7c4ce811daa17c9bea47e310c9b964", size = 3532782, upload-time = "2026-04-01T03:51:58.387Z" }, ] [[package]] @@ -5207,46 +5258,51 @@ dependencies = [ sdist = { url = "https://files.pythonhosted.org/packages/02/ad/046a097b63a96c1ba1d85f0031dbe7fcbdb33e6c445dfbaba2ffaefdd497/nv_grouped_gemm-1.1.4.post8.tar.gz", hash = "sha256:ab321693f0292cfd8a26dc7b6f14decd9eb00e209494de7218e4fad36191275d", size = 20821209, upload-time = "2025-12-17T02:22:38.432Z" } [[package]] -name = "nvidia-cublas-cu12" -version = "12.8.4.1" +name = "nvidia-cublas" +version = "13.1.0.3" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, + { url = "https://files.pythonhosted.org/packages/e1/a5/fce49e2ae977e0ccc084e5adafceb4f0ac0c8333cb6863501618a7277f67/nvidia_cublas-13.1.0.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c86fc7f7ae36d7528288c5d88098edcb7b02c633d262e7ddbb86b0ad91be5df2", size = 542851226, upload-time = "2025-10-09T08:59:04.818Z" }, + { url = "https://files.pythonhosted.org/packages/e7/44/423ac00af4dd95a5aeb27207e2c0d9b7118702149bf4704c3ddb55bb7429/nvidia_cublas-13.1.0.3-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:ee8722c1f0145ab246bccb9e452153b5e0515fd094c3678df50b2a0888b8b171", size = 423133236, upload-time = "2025-10-09T08:59:32.536Z" }, ] [[package]] -name = "nvidia-cuda-cupti-cu12" -version = "12.8.90" +name = "nvidia-cuda-cupti" +version = "13.0.85" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, + { url = "https://files.pythonhosted.org/packages/2a/2a/80353b103fc20ce05ef51e928daed4b6015db4aaa9162ed0997090fe2250/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_aarch64.whl", hash = "sha256:796bd679890ee55fb14a94629b698b6db54bcfd833d391d5e94017dd9d7d3151", size = 10310827, upload-time = "2025-09-04T08:26:42.012Z" }, + { url = "https://files.pythonhosted.org/packages/33/6d/737d164b4837a9bbd202f5ae3078975f0525a55730fe871d8ed4e3b952b0/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_x86_64.whl", hash = "sha256:4eb01c08e859bf924d222250d2e8f8b8ff6d3db4721288cf35d14252a4d933c8", size = 10715597, upload-time = "2025-09-04T08:26:51.312Z" }, ] [[package]] -name = "nvidia-cuda-nvrtc-cu12" -version = "12.8.93" +name = "nvidia-cuda-nvrtc" +version = "13.0.88" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, + { url = "https://files.pythonhosted.org/packages/c3/68/483a78f5e8f31b08fb1bb671559968c0ca3a065ac7acabfc7cee55214fd6/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:ad9b6d2ead2435f11cbb6868809d2adeeee302e9bb94bcf0539c7a40d80e8575", size = 90215200, upload-time = "2025-09-04T08:28:44.204Z" }, + { url = "https://files.pythonhosted.org/packages/b7/dc/6bb80850e0b7edd6588d560758f17e0550893a1feaf436807d64d2da040f/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d27f20a0ca67a4bb34268a5e951033496c5b74870b868bacd046b1b8e0c3267b", size = 43015449, upload-time = "2025-09-04T08:28:20.239Z" }, ] [[package]] -name = "nvidia-cuda-runtime-cu12" -version = "12.8.90" +name = "nvidia-cuda-runtime" +version = "13.0.96" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, + { url = "https://files.pythonhosted.org/packages/87/4f/17d7b9b8e285199c58ce28e31b5c5bbaa4d8271af06a89b6405258245de2/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ef9bcbe90493a2b9d810e43d249adb3d02e98dd30200d86607d8d02687c43f55", size = 2261060, upload-time = "2025-10-09T08:55:15.78Z" }, + { url = "https://files.pythonhosted.org/packages/2e/24/d1558f3b68b1d26e706813b1d10aa1d785e4698c425af8db8edc3dced472/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f82250d7782aa23b6cfe765ecc7db554bd3c2870c43f3d1821f1d18aebf0548", size = 2243632, upload-time = "2025-10-09T08:55:36.117Z" }, ] [[package]] -name = "nvidia-cudnn-cu12" -version = "9.10.2.21" +name = "nvidia-cudnn-cu13" +version = "9.19.0.56" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cublas", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, + { url = "https://files.pythonhosted.org/packages/f1/84/26025437c1e6b61a707442184fa0c03d083b661adf3a3eecfd6d21677740/nvidia_cudnn_cu13-9.19.0.56-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:6ed29ffaee1176c612daf442e4dd6cfeb6a0caa43ddcbeb59da94953030b1be4", size = 433781201, upload-time = "2026-02-03T20:40:53.805Z" }, + { url = "https://files.pythonhosted.org/packages/a3/22/0b4b932655d17a6da1b92fa92ab12844b053bb2ac2475e179ba6f043da1e/nvidia_cudnn_cu13-9.19.0.56-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:d20e1734305e9d68889a96e3f35094d733ff1f83932ebe462753973e53a572bf", size = 366066321, upload-time = "2026-02-03T20:44:52.837Z" }, ] [[package]] @@ -5269,62 +5325,68 @@ wheels = [ ] [[package]] -name = "nvidia-cufft-cu12" -version = "11.3.3.83" +name = "nvidia-cufft" +version = "12.0.0.61" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, + { url = "https://files.pythonhosted.org/packages/8b/ae/f417a75c0259e85c1d2f83ca4e960289a5f814ed0cea74d18c353d3e989d/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2708c852ef8cd89d1d2068bdbece0aa188813a0c934db3779b9b1faa8442e5f5", size = 214053554, upload-time = "2025-09-04T08:31:38.196Z" }, + { url = "https://files.pythonhosted.org/packages/a8/2f/7b57e29836ea8714f81e9898409196f47d772d5ddedddf1592eadb8ab743/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6c44f692dce8fd5ffd3e3df134b6cdb9c2f72d99cf40b62c32dde45eea9ddad3", size = 214085489, upload-time = "2025-09-04T08:31:56.044Z" }, ] [[package]] -name = "nvidia-cufile-cu12" -version = "1.13.1.3" +name = "nvidia-cufile" +version = "1.15.1.6" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, + { url = "https://files.pythonhosted.org/packages/3f/70/4f193de89a48b71714e74602ee14d04e4019ad36a5a9f20c425776e72cd6/nvidia_cufile-1.15.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08a3ecefae5a01c7f5117351c64f17c7c62efa5fffdbe24fc7d298da19cd0b44", size = 1223672, upload-time = "2025-09-04T08:32:22.779Z" }, + { url = "https://files.pythonhosted.org/packages/ab/73/cc4a14c9813a8a0d509417cf5f4bdaba76e924d58beb9864f5a7baceefbf/nvidia_cufile-1.15.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:bdc0deedc61f548bddf7733bdc216456c2fdb101d020e1ab4b88d232d5e2f6d1", size = 1136992, upload-time = "2025-09-04T08:32:14.119Z" }, ] [[package]] -name = "nvidia-curand-cu12" -version = "10.3.9.90" +name = "nvidia-curand" +version = "10.4.0.35" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, + { url = "https://files.pythonhosted.org/packages/1e/72/7c2ae24fb6b63a32e6ae5d241cc65263ea18d08802aaae087d9f013335a2/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:133df5a7509c3e292aaa2b477afd0194f06ce4ea24d714d616ff36439cee349a", size = 61962106, upload-time = "2025-08-04T10:21:41.128Z" }, + { url = "https://files.pythonhosted.org/packages/a5/9f/be0a41ca4a4917abf5cb9ae0daff1a6060cc5de950aec0396de9f3b52bc5/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:1aee33a5da6e1db083fe2b90082def8915f30f3248d5896bcec36a579d941bfc", size = 59544258, upload-time = "2025-08-04T10:22:03.992Z" }, ] [[package]] -name = "nvidia-cusolver-cu12" -version = "11.7.3.90" +name = "nvidia-cusolver" +version = "12.0.4.66" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cublas", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparse", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c3/b30c9e935fc01e3da443ec0116ed1b2a009bb867f5324d3f2d7e533e776b/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:02c2457eaa9e39de20f880f4bd8820e6a1cfb9f9a34f820eb12a155aa5bc92d2", size = 223467760, upload-time = "2025-09-04T08:33:04.222Z" }, + { url = "https://files.pythonhosted.org/packages/5f/67/cba3777620cdacb99102da4042883709c41c709f4b6323c10781a9c3aa34/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:0a759da5dea5c0ea10fd307de75cdeb59e7ea4fcb8add0924859b944babf1112", size = 200941980, upload-time = "2025-09-04T08:33:22.767Z" }, ] [[package]] -name = "nvidia-cusparse-cu12" -version = "12.5.8.93" +name = "nvidia-cusparse" +version = "12.6.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, + { url = "https://files.pythonhosted.org/packages/f8/94/5c26f33738ae35276672f12615a64bd008ed5be6d1ebcb23579285d960a9/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:80bcc4662f23f1054ee334a15c72b8940402975e0eab63178fc7e670aa59472c", size = 162155568, upload-time = "2025-09-04T08:33:42.864Z" }, + { url = "https://files.pythonhosted.org/packages/fa/18/623c77619c31d62efd55302939756966f3ecc8d724a14dab2b75f1508850/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b3c89c88d01ee0e477cb7f82ef60a11a4bcd57b6b87c33f789350b59759360b", size = 145942937, upload-time = "2025-09-04T08:33:58.029Z" }, ] [[package]] -name = "nvidia-cusparselt-cu12" -version = "0.7.1" +name = "nvidia-cusparselt-cu13" +version = "0.8.0" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, + { url = "https://files.pythonhosted.org/packages/46/10/8dcd1175260706a2fc92a16a52e306b71d4c1ea0b0cc4a9484183399818a/nvidia_cusparselt_cu13-0.8.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:400c6ed1cf6780fc6efedd64ec9f1345871767e6a1a0a552a1ea0578117ea77c", size = 220791277, upload-time = "2025-08-13T19:22:40.982Z" }, + { url = "https://files.pythonhosted.org/packages/fd/53/43b0d71f4e702fa9733f8b4571fdca50a8813f1e450b656c239beff12315/nvidia_cusparselt_cu13-0.8.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:25e30a8a7323935d4ad0340b95a0b69926eee755767e8e0b1cf8dd85b197d3fd", size = 169884119, upload-time = "2025-08-13T19:23:41.967Z" }, ] [[package]] @@ -5343,7 +5405,8 @@ name = "nvidia-cutlass-dsl-libs-base" version = "4.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cuda-python" }, + { name = "cuda-python", version = "12.9.4", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" }, + { name = "cuda-python", version = "13.2.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, { name = "numpy" }, { name = "typing-extensions" }, ] @@ -5390,35 +5453,39 @@ wheels = [ ] [[package]] -name = "nvidia-nccl-cu12" -version = "2.27.5" +name = "nvidia-nccl-cu13" +version = "2.28.9" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" }, + { url = "https://files.pythonhosted.org/packages/39/55/1920646a2e43ffd4fc958536b276197ed740e9e0c54105b4bb3521591fc7/nvidia_nccl_cu13-2.28.9-py3-none-manylinux_2_18_aarch64.whl", hash = "sha256:01c873ba1626b54caa12272ed228dc5b2781545e0ae8ba3f432a8ef1c6d78643", size = 196561677, upload-time = "2025-11-18T05:49:03.45Z" }, + { url = "https://files.pythonhosted.org/packages/b0/b4/878fefaad5b2bcc6fcf8d474a25e3e3774bc5133e4b58adff4d0bca238bc/nvidia_nccl_cu13-2.28.9-py3-none-manylinux_2_18_x86_64.whl", hash = "sha256:e4553a30f34195f3fa1da02a6da3d6337d28f2003943aa0a3d247bbc25fefc42", size = 196493177, upload-time = "2025-11-18T05:49:17.677Z" }, ] [[package]] -name = "nvidia-nvjitlink-cu12" -version = "12.8.93" +name = "nvidia-nvjitlink" +version = "13.0.88" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, + { url = "https://files.pythonhosted.org/packages/56/7a/123e033aaff487c77107195fa5a2b8686795ca537935a24efae476c41f05/nvidia_nvjitlink-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:13a74f429e23b921c1109976abefacc69835f2f433ebd323d3946e11d804e47b", size = 40713933, upload-time = "2025-09-04T08:35:43.553Z" }, + { url = "https://files.pythonhosted.org/packages/ab/2c/93c5250e64df4f894f1cbb397c6fd71f79813f9fd79d7cd61de3f97b3c2d/nvidia_nvjitlink-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e931536ccc7d467a98ba1d8b89ff7fa7f1fa3b13f2b0069118cd7f47bff07d0c", size = 38768748, upload-time = "2025-09-04T08:35:20.008Z" }, ] [[package]] -name = "nvidia-nvshmem-cu12" +name = "nvidia-nvshmem-cu13" version = "3.4.5" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" }, + { url = "https://files.pythonhosted.org/packages/dc/0f/05cc9c720236dcd2db9c1ab97fff629e96821be2e63103569da0c9b72f19/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dc2a197f38e5d0376ad52cd1a2a3617d3cdc150fd5966f4aee9bcebb1d68fe9", size = 60215947, upload-time = "2025-09-06T00:32:20.022Z" }, + { url = "https://files.pythonhosted.org/packages/3c/35/a9bf80a609e74e3b000fef598933235c908fcefcef9026042b8e6dfde2a9/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:290f0a2ee94c9f3687a02502f3b9299a9f9fe826e6d0287ee18482e78d495b80", size = 60412546, upload-time = "2025-09-06T00:32:41.564Z" }, ] [[package]] -name = "nvidia-nvtx-cu12" -version = "12.8.90" +name = "nvidia-nvtx" +version = "13.0.85" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, + { url = "https://files.pythonhosted.org/packages/c2/f3/d86c845465a2723ad7e1e5c36dcd75ddb82898b3f53be47ebd429fb2fa5d/nvidia_nvtx-13.0.85-py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4936d1d6780fbe68db454f5e72a42ff64d1fd6397df9f363ae786930fd5c1cd4", size = 148047, upload-time = "2025-09-04T08:29:01.761Z" }, + { url = "https://files.pythonhosted.org/packages/a8/64/3708a90d1ebe202ffdeb7185f878a3c84d15c2b2c31858da2ce0583e2def/nvidia_nvtx-13.0.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb7780edb6b14107373c835bf8b72e7a178bac7367e23da7acb108f973f157a6", size = 148878, upload-time = "2025-09-04T08:28:53.627Z" }, ] [[package]] @@ -5563,7 +5630,8 @@ dependencies = [ { name = "safetensors" }, { name = "timm" }, { name = "torch" }, - { name = "torchvision" }, + { name = "torchvision", version = "0.25.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" }, + { name = "torchvision", version = "0.26.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, { name = "tqdm" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4a/1f/2bc9795047fa2c1ad2567ef78ce6dfc9a7b763fa534acee09a94da2a5b8f/open_clip_torch-3.3.0.tar.gz", hash = "sha256:904b1a9f909df8281bb3de60ab95491cd2994a509177ea4f9d6292a84fe24d6d", size = 1503380, upload-time = "2026-02-27T00:32:46.74Z" } @@ -5677,9 +5745,9 @@ langgraph = [ ] megatron = [ { name = "apex" }, - { name = "causal-conv1d", version = "1.6.1", source = { url = "https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.6.1.post4/causal_conv1d-1.6.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" }, marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "causal-conv1d", marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "deep-ep", marker = "sys_platform == 'linux'" }, - { name = "mamba-ssm", version = "2.3.1", source = { url = "https://github.com/state-spaces/mamba/releases/download/v2.3.1/mamba_ssm-2.3.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" }, marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "mamba-ssm", marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "megatron-bridge" }, { name = "megatron-core" }, { name = "ml-dtypes", marker = "python_full_version < '3.13'" }, @@ -5689,7 +5757,7 @@ megatron = [ { name = "pybind11" }, { name = "torch" }, { name = "transformer-engine" }, - { name = "transformer-engine-cu12" }, + { name = "transformer-engine-cu13" }, { name = "transformer-engine-torch" }, ] plotting = [ @@ -5735,7 +5803,7 @@ requires-dist = [ { name = "apex", marker = "extra == 'megatron'", git = "https://github.com/NVIDIA/apex.git?rev=25.09" }, { name = "awscli", marker = "extra == 'backend'", specifier = ">=1.38.1" }, { name = "bitsandbytes", marker = "extra == 'backend'", specifier = ">=0.45.5" }, - { name = "causal-conv1d", marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'megatron'", url = "https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.6.1.post4/causal_conv1d-1.6.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" }, + { name = "causal-conv1d", marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'megatron'", specifier = "==1.6.1" }, { name = "datrie", marker = "extra == 'tinker'", specifier = ">=0.8.3" }, { name = "deep-ep", marker = "sys_platform == 'linux' and extra == 'megatron'", git = "https://github.com/deepseek-ai/DeepEP.git?rev=v1.2.1" }, { name = "duckdb", marker = "extra == 'backend'", specifier = ">=1.0.0" }, @@ -5747,7 +5815,7 @@ requires-dist = [ { name = "langchain-openai", marker = "extra == 'langgraph'", specifier = ">=0.3.27" }, { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.2" }, { name = "litellm", specifier = ">=1.71.1,<=1.82.0" }, - { name = "mamba-ssm", marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'megatron'", url = "https://github.com/state-spaces/mamba/releases/download/v2.3.1/mamba_ssm-2.3.1%2Bcu12torch2.10cxx11abiTRUE-cp311-cp311-linux_x86_64.whl" }, + { name = "mamba-ssm", marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'megatron'", specifier = "==2.3.1" }, { name = "matplotlib", marker = "extra == 'plotting'", specifier = ">=3.10.1" }, { name = "megatron-bridge", marker = "extra == 'megatron'", git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git?rev=e049cc00c24d03e2ae45d2608c7a44e2d2364e3d" }, { name = "megatron-core", marker = "extra == 'megatron'", specifier = "==0.16.0rc0" }, @@ -5775,13 +5843,13 @@ requires-dist = [ { name = "setuptools", marker = "extra == 'backend'", specifier = ">=78.1.0" }, { name = "tblib", specifier = ">=3.0.0" }, { name = "tinker", marker = "extra == 'tinker'", specifier = ">=0.8.1" }, - { name = "torch", marker = "extra == 'backend'", specifier = "==2.10.0" }, - { name = "torch", marker = "extra == 'megatron'", specifier = "==2.10.0" }, - { name = "torch", marker = "extra == 'tinker'", specifier = "==2.10.0" }, + { name = "torch", marker = "extra == 'backend'", specifier = "==2.11.0" }, + { name = "torch", marker = "extra == 'megatron'", specifier = "==2.11.0" }, + { name = "torch", marker = "extra == 'tinker'", specifier = "==2.11.0" }, { name = "torchao", marker = "extra == 'backend'", specifier = "==0.17.0" }, - { name = "transformer-engine", marker = "extra == 'megatron'", specifier = "==2.11.0" }, - { name = "transformer-engine-cu12", marker = "extra == 'megatron'", specifier = "==2.11.0" }, - { name = "transformer-engine-torch", marker = "extra == 'megatron'", git = "https://github.com/NVIDIA/TransformerEngine.git?subdirectory=transformer_engine%2Fpytorch&rev=v2.11" }, + { name = "transformer-engine", marker = "extra == 'megatron'", specifier = "==2.14.1" }, + { name = "transformer-engine-cu13", marker = "extra == 'megatron'", specifier = "==2.14.1" }, + { name = "transformer-engine-torch", marker = "extra == 'megatron'", specifier = "==2.14.1" }, { name = "transformers", marker = "extra == 'backend'", specifier = "==5.6.2" }, { name = "transformers", marker = "extra == 'tinker'", specifier = "==5.6.2" }, { name = "trl", marker = "extra == 'backend'", specifier = "==1.3.0" }, @@ -5789,7 +5857,7 @@ requires-dist = [ { name = "unsloth", extras = ["colab-no-deps", "huggingface"], marker = "extra == 'backend'", git = "https://github.com/Kovbo/unsloth.git?rev=140026a39cfce9ee8702c46140d4c2d4846d1eb6" }, { name = "unsloth-zoo", marker = "extra == 'backend'", git = "https://github.com/Kovbo/unsloth-zoo.git?rev=68677b9814e90ccb647cbda91062e0d4001798bd" }, { name = "uvicorn", marker = "extra == 'tinker'", specifier = ">=0.35.0" }, - { name = "vllm", marker = "sys_platform == 'linux' and extra == 'backend'", specifier = "==0.19.1" }, + { name = "vllm", marker = "sys_platform == 'linux' and extra == 'backend'", specifier = "==0.20.0" }, { name = "wandb", marker = "extra == 'backend'", specifier = "==0.25.0" }, { name = "weave", specifier = ">=0.52.24" }, ] @@ -6054,16 +6122,18 @@ wheels = [ [[package]] name = "outlines-core" -version = "0.2.11" +version = "0.2.14" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1a/d3/e04e9145f8f806723dec9b9e5227ad695a3efcd3ced7794cf7c22b15df5e/outlines_core-0.2.11.tar.gz", hash = "sha256:dfce56f717ff5083e54cbcfdb66cad243365437fccbb5509adaa7e31e030f1d8", size = 197263, upload-time = "2025-05-19T10:12:51.719Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/04/4a0812eb27c086cfd2e66e7ec9150f33e105912a9b7f8b335e3479f03a06/outlines_core-0.2.14.tar.gz", hash = "sha256:64808deed1591ca3029ff64346ceb974cd5d780c916ea82504951fe83523039e", size = 191539, upload-time = "2026-01-09T15:59:10.016Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/db/32c6e1170f139420e948fdd18a09a6175244bc0760dcf4dc2470e18411b9/outlines_core-0.2.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:132605b8dd1e3d1369da6a851992dd357f6376068292f6bd47caa7a28b794d19", size = 2289078, upload-time = "2025-05-19T10:12:12.118Z" }, - { url = "https://files.pythonhosted.org/packages/25/c3/b6e6f4e08fa84d2424f82705a6dc47fee33cb91989010fa678736957dcf6/outlines_core-0.2.11-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b31d5fc83b78aad282dd667b8d6e684614481fe08a7609ce0ce45dee64cd2991", size = 2115075, upload-time = "2025-05-19T10:12:13.761Z" }, - { url = "https://files.pythonhosted.org/packages/92/c7/a65d1fddf49830ebc41422294eacde35286d9f68994a8aa905cb14f5aade/outlines_core-0.2.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86df9740368866295077346440d911df4972da2b3f1f54b8125e6f329e8a8891", size = 2287677, upload-time = "2025-05-19T10:12:24.24Z" }, - { url = "https://files.pythonhosted.org/packages/23/79/8795aed8be9b77dd69d78e7cfbfcf28c179e6b08da6e56bbbf48a09fe55f/outlines_core-0.2.11-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:96ce4dd78f106799be4a0a5795cefd1352806162973756a4b6fce4bb6eddd7e4", size = 2113000, upload-time = "2025-05-19T10:12:25.446Z" }, - { url = "https://files.pythonhosted.org/packages/87/96/7dcdc5198844145ab35528f9f93a58c3d47b87e54d0f79357c631d7b7a9a/outlines_core-0.2.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:daef6eaaf8c3403455ab5cbf265cb5c6838df571eb7c4b23cddac19cfc701726", size = 2287320, upload-time = "2025-05-19T10:12:35.515Z" }, - { url = "https://files.pythonhosted.org/packages/4d/68/b420b6a3beaadbf8e9f2a82132120027efd6424634013fbeca8c2fed7467/outlines_core-0.2.11-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:76b2512417c68863f8f227a080e87f755682dfd895e23b021121318be11da579", size = 2112861, upload-time = "2025-05-19T10:12:36.742Z" }, + { url = "https://files.pythonhosted.org/packages/f7/69/e0be45d4c8ad7d301cdc9917d22ff39211da1e830f92fb07b29c9221b5c4/outlines_core-0.2.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:615566bf8257d2bba8ac192cdfc29d1c4357f57b53672fbd622e821215e4f1bd", size = 2338968, upload-time = "2026-01-09T15:58:23.317Z" }, + { url = "https://files.pythonhosted.org/packages/f2/67/9dab90313460eb250f926e7985d62cebfc33c7580197be8a496de6e9f7c4/outlines_core-0.2.14-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:81d01cfae29de5671bc5013fd6b2008621157bec3d8be284da7da2dc0672745c", size = 2236169, upload-time = "2026-01-09T15:58:24.575Z" }, + { url = "https://files.pythonhosted.org/packages/29/29/3a04944407207a5d214879ca5ca33c2bd3e65199a4e927051c1bdaaa4d50/outlines_core-0.2.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bb2060c240c4507f334965a8948dbeeb22007560d797f6debd92346c0b620cb", size = 2341426, upload-time = "2026-01-09T15:58:33.553Z" }, + { url = "https://files.pythonhosted.org/packages/b2/a7/a77f746272504bac3f628047d56ea1731b61549a3e1d9bbfd226f2968246/outlines_core-0.2.14-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:1de34681c7e0e7e1551fc9036e4fa3c57986336c905a10536591ceb6d869c258", size = 2236941, upload-time = "2026-01-09T15:58:35.118Z" }, + { url = "https://files.pythonhosted.org/packages/c1/9a/4b62903de006d991b58674ff033c1b6fb92be5767360376fc961f6771bdb/outlines_core-0.2.14-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6453e23f01d98ec48e3a4141d7112792ce77001dfb28d91d6fd89f47009f91ef", size = 2341051, upload-time = "2026-01-09T15:58:44.415Z" }, + { url = "https://files.pythonhosted.org/packages/50/36/1532f7d9ab16c676812d94528e89964aa0d15f12adcb285e6ed86f86f2fe/outlines_core-0.2.14-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:7deef6df74cb247f2a3a62f03438ba967456504b0555ec7029f8db834e054448", size = 2236778, upload-time = "2026-01-09T15:58:45.437Z" }, + { url = "https://files.pythonhosted.org/packages/d5/63/dfa000239e46f17b47e6dc9bec3aab8a8136fe400312f1916320e02c8f38/outlines_core-0.2.14-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1776ae984574461f249fe590314a439992eb9b883f4091b8fa7fc56f29f3717", size = 2343210, upload-time = "2026-01-09T15:58:54.282Z" }, + { url = "https://files.pythonhosted.org/packages/36/4f/0e63da06c6054f154ef22b5ef3c6b9030cb22da9c03d2d2dd82836a1e795/outlines_core-0.2.14-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:7eba2b41dac03d6e6e8d5ea0aecbbc03dacb4c57de3b1fc944d0bafb022941f7", size = 2238206, upload-time = "2026-01-09T15:58:55.705Z" }, ] [[package]] @@ -9122,6 +9192,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" }, ] +[[package]] +name = "tilelang" +version = "0.1.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "apache-tvm-ffi", marker = "sys_platform == 'linux'" }, + { name = "cloudpickle", marker = "sys_platform == 'linux'" }, + { name = "ml-dtypes", marker = "sys_platform == 'linux'" }, + { name = "numpy", marker = "sys_platform == 'linux'" }, + { name = "psutil", marker = "sys_platform == 'linux'" }, + { name = "torch", marker = "sys_platform == 'linux'" }, + { name = "torch-c-dlpack-ext", marker = "python_full_version < '3.14' and sys_platform == 'linux'" }, + { name = "tqdm", marker = "sys_platform == 'linux'" }, + { name = "typing-extensions", marker = "sys_platform == 'linux'" }, + { name = "z3-solver", marker = "sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/70/5051f65821baa30a3d61fc48f8ba10c776490315e8c90f82559b92089756/tilelang-0.1.9.tar.gz", hash = "sha256:287f727c913bb648fcf6c1968809ba3390e55eeed257a5c6bb9a80bc05966af4", size = 93395292, upload-time = "2026-04-22T09:19:11.988Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/8a/1cbeee79d62abaa02441c2d00621554e41aa62dbf3b94a4feb3867184b01/tilelang-0.1.9-cp38-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bbccfe9035aed775ffafb6dc25a5994504b24e2c5d95d0f39643edfafa7bf12", size = 45419374, upload-time = "2026-04-22T09:15:56.014Z" }, + { url = "https://files.pythonhosted.org/packages/c6/a7/f4bfb86f87e107703146e703204cec2c0eae2492b633e0052b0ace3febb6/tilelang-0.1.9-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:77ab0ee2f40f66ea015b6b21426d482751e28cbc635ef9d1198cbd6502454a7c", size = 42110365, upload-time = "2026-04-22T09:17:18.292Z" }, +] + [[package]] name = "timm" version = "1.0.26" @@ -9131,7 +9223,8 @@ dependencies = [ { name = "pyyaml" }, { name = "safetensors" }, { name = "torch" }, - { name = "torchvision" }, + { name = "torchvision", version = "0.25.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" }, + { name = "torchvision", version = "0.26.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/7b/1e/e924b3b2326a856aaf68586f9c52a5fc81ef45715eca408393b68c597e0e/timm-1.0.26.tar.gz", hash = "sha256:f66f082f2f381cf68431c22714c8b70f723837fa2a185b155961eab90f2d5b10", size = 2419859, upload-time = "2026-03-23T18:12:10.272Z" } wheels = [ @@ -9259,68 +9352,49 @@ wheels = [ [[package]] name = "torch" -version = "2.10.0" +version = "2.11.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cuda-bindings", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "cuda-bindings", version = "13.2.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, + { name = "cuda-toolkit", extra = ["cublas", "cudart", "cufft", "cufile", "cupti", "curand", "cusolver", "cusparse", "nvjitlink", "nvrtc", "nvtx"], marker = "sys_platform == 'linux'" }, { name = "filelock" }, { name = "fsspec" }, { name = "jinja2" }, { name = "networkx" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "python_full_version >= '3.12'" }, + { name = "nvidia-cudnn-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvshmem-cu13", marker = "sys_platform == 'linux'" }, + { name = "setuptools" }, { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "triton", marker = "sys_platform == 'linux'" }, { name = "typing-extensions" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/0f/8b/4b61d6e13f7108f36910df9ab4b58fd389cc2520d54d81b88660804aad99/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:418997cb02d0a0f1497cf6a09f63166f9f5df9f3e16c8a716ab76a72127c714f", size = 79423467, upload-time = "2026-02-10T21:44:48.711Z" }, - { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload-time = "2026-02-10T21:44:52.603Z" }, - { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254, upload-time = "2026-02-10T21:44:44.095Z" }, - { url = "https://files.pythonhosted.org/packages/36/ab/7b562f1808d3f65414cd80a4f7d4bb00979d9355616c034c171249e1a303/torch-2.10.0-3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ac5bdcbb074384c66fa160c15b1ead77839e3fe7ed117d667249afce0acabfac", size = 915518691, upload-time = "2026-03-11T14:15:43.147Z" }, - { url = "https://files.pythonhosted.org/packages/b3/7a/abada41517ce0011775f0f4eacc79659bc9bc6c361e6bfe6f7052a6b9363/torch-2.10.0-3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:98c01b8bb5e3240426dcde1446eed6f40c778091c8544767ef1168fc663a05a6", size = 915622781, upload-time = "2026-03-11T14:17:11.354Z" }, - { url = "https://files.pythonhosted.org/packages/ab/c6/4dfe238342ffdcec5aef1c96c457548762d33c40b45a1ab7033bb26d2ff2/torch-2.10.0-3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80b1b5bfe38eb0e9f5ff09f206dcac0a87aadd084230d4a36eea5ec5232c115b", size = 915627275, upload-time = "2026-03-11T14:16:11.325Z" }, - { url = "https://files.pythonhosted.org/packages/d8/f0/72bf18847f58f877a6a8acf60614b14935e2f156d942483af1ffc081aea0/torch-2.10.0-3-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:46b3574d93a2a8134b3f5475cfb98e2eb46771794c57015f6ad1fb795ec25e49", size = 915523474, upload-time = "2026-03-11T14:17:44.422Z" }, - { url = "https://files.pythonhosted.org/packages/f4/39/590742415c3030551944edc2ddc273ea1fdfe8ffb2780992e824f1ebee98/torch-2.10.0-3-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b1d5e2aba4eb7f8e87fbe04f86442887f9167a35f092afe4c237dfcaaef6e328", size = 915632474, upload-time = "2026-03-11T14:15:13.666Z" }, - { url = "https://files.pythonhosted.org/packages/b6/8e/34949484f764dde5b222b7fe3fede43e4a6f0da9d7f8c370bb617d629ee2/torch-2.10.0-3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0228d20b06701c05a8f978357f657817a4a63984b0c90745def81c18aedfa591", size = 915523882, upload-time = "2026-03-11T14:14:46.311Z" }, - { url = "https://files.pythonhosted.org/packages/78/89/f5554b13ebd71e05c0b002f95148033e730d3f7067f67423026cc9c69410/torch-2.10.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:3282d9febd1e4e476630a099692b44fdc214ee9bf8ee5377732d9d9dfe5712e4", size = 145992610, upload-time = "2026-01-21T16:25:26.327Z" }, - { url = "https://files.pythonhosted.org/packages/ae/30/a3a2120621bf9c17779b169fc17e3dc29b230c29d0f8222f499f5e159aa8/torch-2.10.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a2f9edd8dbc99f62bc4dfb78af7bf89499bca3d753423ac1b4e06592e467b763", size = 915607863, upload-time = "2026-01-21T16:25:06.696Z" }, - { url = "https://files.pythonhosted.org/packages/6f/3d/c87b33c5f260a2a8ad68da7147e105f05868c281c63d65ed85aa4da98c66/torch-2.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:29b7009dba4b7a1c960260fc8ac85022c784250af43af9fb0ebafc9883782ebd", size = 113723116, upload-time = "2026-01-21T16:25:21.916Z" }, - { url = "https://files.pythonhosted.org/packages/61/d8/15b9d9d3a6b0c01b883787bd056acbe5cc321090d4b216d3ea89a8fcfdf3/torch-2.10.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:b7bd80f3477b830dd166c707c5b0b82a898e7b16f59a7d9d42778dd058272e8b", size = 79423461, upload-time = "2026-01-21T16:24:50.266Z" }, - { url = "https://files.pythonhosted.org/packages/cc/af/758e242e9102e9988969b5e621d41f36b8f258bb4a099109b7a4b4b50ea4/torch-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5fd4117d89ffd47e3dcc71e71a22efac24828ad781c7e46aaaf56bf7f2796acf", size = 145996088, upload-time = "2026-01-21T16:24:44.171Z" }, - { url = "https://files.pythonhosted.org/packages/23/8e/3c74db5e53bff7ed9e34c8123e6a8bfef718b2450c35eefab85bb4a7e270/torch-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:787124e7db3b379d4f1ed54dd12ae7c741c16a4d29b49c0226a89bea50923ffb", size = 915711952, upload-time = "2026-01-21T16:23:53.503Z" }, - { url = "https://files.pythonhosted.org/packages/6e/01/624c4324ca01f66ae4c7cd1b74eb16fb52596dce66dbe51eff95ef9e7a4c/torch-2.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:2c66c61f44c5f903046cc696d088e21062644cbe541c7f1c4eaae88b2ad23547", size = 113757972, upload-time = "2026-01-21T16:24:39.516Z" }, - { url = "https://files.pythonhosted.org/packages/c9/5c/dee910b87c4d5c0fcb41b50839ae04df87c1cfc663cf1b5fca7ea565eeaa/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6d3707a61863d1c4d6ebba7be4ca320f42b869ee657e9b2c21c736bf17000294", size = 79498198, upload-time = "2026-01-21T16:24:34.704Z" }, - { url = "https://files.pythonhosted.org/packages/c9/6f/f2e91e34e3fcba2e3fc8d8f74e7d6c22e74e480bbd1db7bc8900fdf3e95c/torch-2.10.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5c4d217b14741e40776dd7074d9006fd28b8a97ef5654db959d8635b2fe5f29b", size = 146004247, upload-time = "2026-01-21T16:24:29.335Z" }, - { url = "https://files.pythonhosted.org/packages/98/fb/5160261aeb5e1ee12ee95fe599d0541f7c976c3701d607d8fc29e623229f/torch-2.10.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6b71486353fce0f9714ca0c9ef1c850a2ae766b409808acd58e9678a3edb7738", size = 915716445, upload-time = "2026-01-21T16:22:45.353Z" }, - { url = "https://files.pythonhosted.org/packages/6a/16/502fb1b41e6d868e8deb5b0e3ae926bbb36dab8ceb0d1b769b266ad7b0c3/torch-2.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:c2ee399c644dc92ef7bc0d4f7e74b5360c37cdbe7c5ba11318dda49ffac2bc57", size = 113757050, upload-time = "2026-01-21T16:24:19.204Z" }, - { url = "https://files.pythonhosted.org/packages/1a/0b/39929b148f4824bc3ad6f9f72a29d4ad865bcf7ebfc2fa67584773e083d2/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:3202429f58309b9fa96a614885eace4b7995729f44beb54d3e4a47773649d382", size = 79851305, upload-time = "2026-01-21T16:24:09.209Z" }, - { url = "https://files.pythonhosted.org/packages/d8/14/21fbce63bc452381ba5f74a2c0a959fdf5ad5803ccc0c654e752e0dbe91a/torch-2.10.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:aae1b29cd68e50a9397f5ee897b9c24742e9e306f88a807a27d617f07adb3bd8", size = 146005472, upload-time = "2026-01-21T16:22:29.022Z" }, - { url = "https://files.pythonhosted.org/packages/54/fd/b207d1c525cb570ef47f3e9f836b154685011fce11a2f444ba8a4084d042/torch-2.10.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6021db85958db2f07ec94e1bc77212721ba4920c12a18dc552d2ae36a3eb163f", size = 915612644, upload-time = "2026-01-21T16:21:47.019Z" }, - { url = "https://files.pythonhosted.org/packages/36/53/0197f868c75f1050b199fe58f9bf3bf3aecac9b4e85cc9c964383d745403/torch-2.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff43db38af76fda183156153983c9a096fc4c78d0cd1e07b14a2314c7f01c2c8", size = 113997015, upload-time = "2026-01-21T16:23:00.767Z" }, - { url = "https://files.pythonhosted.org/packages/0e/13/e76b4d9c160e89fff48bf16b449ea324bda84745d2ab30294c37c2434c0d/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:cdf2a523d699b70d613243211ecaac14fe9c5df8a0b0a9c02add60fb2a413e0f", size = 79498248, upload-time = "2026-01-21T16:23:09.315Z" }, - { url = "https://files.pythonhosted.org/packages/4f/93/716b5ac0155f1be70ed81bacc21269c3ece8dba0c249b9994094110bfc51/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:bf0d9ff448b0218e0433aeb198805192346c4fd659c852370d5cc245f602a06a", size = 79464992, upload-time = "2026-01-21T16:23:05.162Z" }, - { url = "https://files.pythonhosted.org/packages/69/2b/51e663ff190c9d16d4a8271203b71bc73a16aa7619b9f271a69b9d4a936b/torch-2.10.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:233aed0659a2503b831d8a67e9da66a62c996204c0bba4f4c442ccc0c68a3f60", size = 146018567, upload-time = "2026-01-21T16:22:23.393Z" }, - { url = "https://files.pythonhosted.org/packages/5e/cd/4b95ef7f293b927c283db0b136c42be91c8ec6845c44de0238c8c23bdc80/torch-2.10.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:682497e16bdfa6efeec8cde66531bc8d1fbbbb4d8788ec6173c089ed3cc2bfe5", size = 915721646, upload-time = "2026-01-21T16:21:16.983Z" }, - { url = "https://files.pythonhosted.org/packages/56/97/078a007208f8056d88ae43198833469e61a0a355abc0b070edd2c085eb9a/torch-2.10.0-cp314-cp314-win_amd64.whl", hash = "sha256:6528f13d2a8593a1a412ea07a99812495bec07e9224c28b2a25c0a30c7da025c", size = 113752373, upload-time = "2026-01-21T16:22:13.471Z" }, - { url = "https://files.pythonhosted.org/packages/d8/94/71994e7d0d5238393df9732fdab607e37e2b56d26a746cb59fdb415f8966/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f5ab4ba32383061be0fb74bda772d470140a12c1c3b58a0cfbf3dae94d164c28", size = 79850324, upload-time = "2026-01-21T16:22:09.494Z" }, - { url = "https://files.pythonhosted.org/packages/e2/65/1a05346b418ea8ccd10360eef4b3e0ce688fba544e76edec26913a8d0ee0/torch-2.10.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:716b01a176c2a5659c98f6b01bf868244abdd896526f1c692712ab36dbaf9b63", size = 146006482, upload-time = "2026-01-21T16:22:18.42Z" }, - { url = "https://files.pythonhosted.org/packages/1d/b9/5f6f9d9e859fc3235f60578fa64f52c9c6e9b4327f0fe0defb6de5c0de31/torch-2.10.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:d8f5912ba938233f86361e891789595ff35ca4b4e2ac8fe3670895e5976731d6", size = 915613050, upload-time = "2026-01-21T16:20:49.035Z" }, - { url = "https://files.pythonhosted.org/packages/66/4d/35352043ee0eaffdeff154fad67cd4a31dbed7ff8e3be1cc4549717d6d51/torch-2.10.0-cp314-cp314t-win_amd64.whl", hash = "sha256:71283a373f0ee2c89e0f0d5f446039bdabe8dbc3c9ccf35f0f784908b0acd185", size = 113995816, upload-time = "2026-01-21T16:22:05.312Z" }, + { url = "https://files.pythonhosted.org/packages/ae/0d/98b410492609e34a155fa8b121b55c7dca229f39636851c3a9ec20edea21/torch-2.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7b6a60d48062809f58595509c524b88e6ddec3ebe25833d6462eeab81e5f2ce4", size = 80529712, upload-time = "2026-03-23T18:12:02.608Z" }, + { url = "https://files.pythonhosted.org/packages/84/03/acea680005f098f79fd70c1d9d5ccc0cb4296ec2af539a0450108232fc0c/torch-2.11.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d91aac77f24082809d2c5a93f52a5f085032740a1ebc9252a7b052ef5a4fddc6", size = 419718178, upload-time = "2026-03-23T18:10:46.675Z" }, + { url = "https://files.pythonhosted.org/packages/8c/8b/d7be22fbec9ffee6cff31a39f8750d4b3a65d349a286cf4aec74c2375662/torch-2.11.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:7aa2f9bbc6d4595ba72138026b2074be1233186150e9292865e04b7a63b8c67a", size = 530604548, upload-time = "2026-03-23T18:10:03.569Z" }, + { url = "https://files.pythonhosted.org/packages/d1/bd/9912d30b68845256aabbb4a40aeefeef3c3b20db5211ccda653544ada4b6/torch-2.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:73e24aaf8f36ab90d95cd1761208b2eb70841c2a9ca1a3f9061b39fc5331b708", size = 114519675, upload-time = "2026-03-23T18:11:52.995Z" }, + { url = "https://files.pythonhosted.org/packages/6f/8b/69e3008d78e5cee2b30183340cc425081b78afc5eff3d080daab0adda9aa/torch-2.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4b5866312ee6e52ea625cd211dcb97d6a2cdc1131a5f15cc0d87eec948f6dd34", size = 80606338, upload-time = "2026-03-23T18:11:34.781Z" }, + { url = "https://files.pythonhosted.org/packages/13/16/42e5915ebe4868caa6bac83a8ed59db57f12e9a61b7d749d584776ed53d5/torch-2.11.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f99924682ef0aa6a4ab3b1b76f40dc6e273fca09f367d15a524266db100a723f", size = 419731115, upload-time = "2026-03-23T18:11:06.944Z" }, + { url = "https://files.pythonhosted.org/packages/1a/c9/82638ef24d7877510f83baf821f5619a61b45568ce21c0a87a91576510aa/torch-2.11.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:0f68f4ac6d95d12e896c3b7a912b5871619542ec54d3649cf48cc1edd4dd2756", size = 530712279, upload-time = "2026-03-23T18:10:31.481Z" }, + { url = "https://files.pythonhosted.org/packages/1c/ff/6756f1c7ee302f6d202120e0f4f05b432b839908f9071157302cedfc5232/torch-2.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:fbf39280699d1b869f55eac536deceaa1b60bd6788ba74f399cc67e60a5fab10", size = 114556047, upload-time = "2026-03-23T18:10:55.931Z" }, + { url = "https://files.pythonhosted.org/packages/87/89/5ea6722763acee56b045435fb84258db7375c48165ec8be7880ab2b281c5/torch-2.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1e6debd97ccd3205bbb37eb806a9d8219e1139d15419982c09e23ef7d4369d18", size = 80606801, upload-time = "2026-03-23T18:10:18.649Z" }, + { url = "https://files.pythonhosted.org/packages/32/d1/8ed2173589cbfe744ed54e5a73efc107c0085ba5777ee93a5f4c1ab90553/torch-2.11.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:63a68fa59de8f87acc7e85a5478bb2dddbb3392b7593ec3e78827c793c4b73fd", size = 419732382, upload-time = "2026-03-23T18:08:30.835Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e1/b73f7c575a4b8f87a5928f50a1e35416b5e27295d8be9397d5293e7e8d4c/torch-2.11.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:cc89b9b173d9adfab59fd227f0ab5e5516d9a52b658ae41d64e59d2e55a418db", size = 530711509, upload-time = "2026-03-23T18:08:47.213Z" }, + { url = "https://files.pythonhosted.org/packages/66/82/3e3fcdd388fbe54e29fd3f991f36846ff4ac90b0d0181e9c8f7236565f82/torch-2.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:4dda3b3f52d121063a731ddb835f010dc137b920d7fec2778e52f60d8e4bf0cd", size = 114555842, upload-time = "2026-03-23T18:09:52.111Z" }, + { url = "https://files.pythonhosted.org/packages/db/38/8ac78069621b8c2b4979c2f96dc8409ef5e9c4189f6aac629189a78677ca/torch-2.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8b394322f49af4362d4f80e424bcaca7efcd049619af03a4cf4501520bdf0fb4", size = 80959574, upload-time = "2026-03-23T18:10:14.214Z" }, + { url = "https://files.pythonhosted.org/packages/6d/6c/56bfb37073e7136e6dd86bfc6af7339946dd684e0ecf2155ac0eee687ae1/torch-2.11.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2658f34ce7e2dabf4ec73b45e2ca68aedad7a5be87ea756ad656eaf32bf1e1ea", size = 419732324, upload-time = "2026-03-23T18:09:36.604Z" }, + { url = "https://files.pythonhosted.org/packages/07/f4/1b666b6d61d3394cca306ea543ed03a64aad0a201b6cd159f1d41010aeb1/torch-2.11.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:98bb213c3084cfe176302949bdc360074b18a9da7ab59ef2edc9d9f742504778", size = 530596026, upload-time = "2026-03-23T18:09:20.842Z" }, + { url = "https://files.pythonhosted.org/packages/48/6b/30d1459fa7e4b67e9e3fe1685ca1d8bb4ce7c62ef436c3a615963c6c866c/torch-2.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a97b94bbf62992949b4730c6cd2cc9aee7b335921ee8dc207d930f2ed09ae2db", size = 114793702, upload-time = "2026-03-23T18:09:47.304Z" }, + { url = "https://files.pythonhosted.org/packages/26/0d/8603382f61abd0db35841148ddc1ffd607bf3100b11c6e1dab6d2fc44e72/torch-2.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:01018087326984a33b64e04c8cb5c2795f9120e0d775ada1f6638840227b04d7", size = 80573442, upload-time = "2026-03-23T18:09:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/c7/86/7cd7c66cb9cec6be330fff36db5bd0eef386d80c031b581ec81be1d4b26c/torch-2.11.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:2bb3cc54bd0dea126b0060bb1ec9de0f9c7f7342d93d436646516b0330cd5be7", size = 419749385, upload-time = "2026-03-23T18:07:33.77Z" }, + { url = "https://files.pythonhosted.org/packages/47/e8/b98ca2d39b2e0e4730c0ee52537e488e7008025bc77ca89552ff91021f7c/torch-2.11.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4dc8b3809469b6c30b411bb8c4cad3828efd26236153d9beb6a3ec500f211a60", size = 530716756, upload-time = "2026-03-23T18:07:50.02Z" }, + { url = "https://files.pythonhosted.org/packages/78/88/d4a4cda8362f8a30d1ed428564878c3cafb0d87971fbd3947d4c84552095/torch-2.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:2b4e811728bd0cc58fb2b0948fe939a1ee2bf1422f6025be2fca4c7bd9d79718", size = 114552300, upload-time = "2026-03-23T18:09:05.617Z" }, + { url = "https://files.pythonhosted.org/packages/bf/46/4419098ed6d801750f26567b478fc185c3432e11e2cad712bc6b4c2ab0d0/torch-2.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8245477871c3700d4370352ffec94b103cfcb737229445cf9946cddb7b2ca7cd", size = 80959460, upload-time = "2026-03-23T18:09:00.818Z" }, + { url = "https://files.pythonhosted.org/packages/fd/66/54a56a4a6ceaffb567231994a9745821d3af922a854ed33b0b3a278e0a99/torch-2.11.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:ab9a8482f475f9ba20e12db84b0e55e2f58784bdca43a854a6ccd3fd4b9f75e6", size = 419735835, upload-time = "2026-03-23T18:07:18.974Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e7/0b6665f533aa9e337662dc190425abc0af1fe3234088f4454c52393ded61/torch-2.11.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:563ed3d25542d7e7bbc5b235ccfacfeb97fb470c7fee257eae599adb8005c8a2", size = 530613405, upload-time = "2026-03-23T18:08:07.014Z" }, + { url = "https://files.pythonhosted.org/packages/cf/bf/c8d12a2c86dbfd7f40fb2f56fbf5a505ccf2d9ce131eb559dfc7c51e1a04/torch-2.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b2a43985ff5ef6ddd923bbcf99943e5f58059805787c5c9a2622bf05ca2965b0", size = 114792991, upload-time = "2026-03-23T18:08:19.216Z" }, ] [[package]] @@ -9353,62 +9427,91 @@ wheels = [ [[package]] name = "torchaudio" -version = "2.10.0" +version = "2.11.0" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "torch", marker = "sys_platform == 'linux'" }, -] wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/b7/c66dc34a27441d78997e20d0ffe2f5ad73db9f7b1267511be255bb94ac9b/torchaudio-2.10.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:87c841a21e82703ebd4a29170c4e60c25a2b47312dc212930087ad58965ac0c8", size = 391843, upload-time = "2026-01-21T16:28:43.093Z" }, - { url = "https://files.pythonhosted.org/packages/13/ae/a2a34a64947c4fa4a61b4c86d8f36fbcb4ebfec30fdde140267db260f96c/torchaudio-2.10.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b2c77fb9114dd463dc805560bf55a1ac2a52e219794cc32b7b32cf2aeffd2826", size = 1894140, upload-time = "2026-01-21T16:28:35.892Z" }, - { url = "https://files.pythonhosted.org/packages/ea/3f/df620439a76ece170472d41438d11a1545d5db5dc9f1eaeab8c6e055a328/torchaudio-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:42b148a0921a3721abd1f6ae098b1ec9f89703e555c4f7a0d44da87b8decbcb9", size = 391973, upload-time = "2026-01-21T16:28:39.732Z" }, - { url = "https://files.pythonhosted.org/packages/98/25/e55a30d7138f8fe56ed006df25b0a3c27681f0ec7bc9989e1778e6d559c3/torchaudio-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:0e77b2956448d63790a99beed0b74ac8b8cd3a94dcdd9ad01974411078f46278", size = 1895234, upload-time = "2026-01-21T16:28:37.034Z" }, - { url = "https://files.pythonhosted.org/packages/49/fd/831c2595c81b17141180ca11ab3c0836cc544ef13e15aa0e7b2cb619e582/torchaudio-2.10.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5bc39ff3ea341097ce1ab023dd88c9dd8ca5f96ebf48821e7d23766137bb55d7", size = 392757, upload-time = "2026-01-21T16:28:33.631Z" }, - { url = "https://files.pythonhosted.org/packages/8e/d8/405c80c57dc68ca5855bddfaae57c3d84ea7397bf1eb2aa5d59c9fa1d3a9/torchaudio-2.10.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3057c4286db5673d266124a2a10ca54e19f516772e9057f44573a7da5b85e328", size = 1897099, upload-time = "2026-01-21T16:28:24.793Z" }, - { url = "https://files.pythonhosted.org/packages/43/8c/653e7f67855424bf3b7cbb48335f8316f7fb02bb01a6cab38f6bf9555676/torchaudio-2.10.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:b41b254d958632dc00dc7768431cadda516c91641d798775cbb19bcd4f0d2be4", size = 393430, upload-time = "2026-01-21T16:28:34.855Z" }, - { url = "https://files.pythonhosted.org/packages/8e/1f/f91fcb9dd47a19b720fb48042a2f6f023651948e73726e98fff60d5ed5c7/torchaudio-2.10.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:da1081d1018a1e95f5a13947402aeb037cf5ac8861219a6164df004898a96bb1", size = 1897271, upload-time = "2026-01-21T16:28:23.519Z" }, - { url = "https://files.pythonhosted.org/packages/57/a1/ef5571406858f4ea89c18d6ad844d21cb9858708149e6bbd9a789ee30ea5/torchaudio-2.10.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:b2d5e11a2bec08f02a4f5fb7d1902ff82d48c533a27ceedc21e6ade650cf65b3", size = 393061, upload-time = "2026-01-21T16:28:25.802Z" }, - { url = "https://files.pythonhosted.org/packages/9d/0f/a0cf0ebc6f71b1868ea056dd4cd4f1a2244b8da8bc38372a1adc984a7c1f/torchaudio-2.10.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:77f6cf11a3b61af1b0967cd642368ecd30a86d70f622b22410ae6cb42d980b72", size = 1897137, upload-time = "2026-01-21T16:28:15.366Z" }, - { url = "https://files.pythonhosted.org/packages/53/8a/946aa07393845b918d318b5e34b3bd0359fd27fc9fac10a85fae2bb86382/torchaudio-2.10.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:ed912de8ec1b400e17a5172badcfcddc601a9cd4e02d200f3a9504fc8e54961c", size = 393434, upload-time = "2026-01-21T16:28:18.668Z" }, - { url = "https://files.pythonhosted.org/packages/e1/68/e37e8fbbae986afa80f8851e08fc017eb8ae5f7b398ee28ed92303da163e/torchaudio-2.10.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:f7aa33a8198e87949896e16ea245ea731906445becdf10130e8823c68494a94a", size = 1897289, upload-time = "2026-01-21T16:28:17.059Z" }, + { url = "https://files.pythonhosted.org/packages/b3/f9/6f7ebe071b44592c85269762b55b63ab0a091b5f479f73544738f7564a1e/torchaudio-2.11.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:73dab4841f94d888bc7c2aed7b5547c643edc974306919fe1adfb65d57cccf4b", size = 1626527, upload-time = "2026-03-23T18:13:39.011Z" }, + { url = "https://files.pythonhosted.org/packages/ac/70/17408e0d154d0c894537a88dcbadc48e8ad3b6e1ef4a1dabda5d40245ee0/torchaudio-2.11.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1a07ec72fd6f26a588c39b5f029e0130d16bb40bc4221635580bf8fb18fcbc80", size = 1771930, upload-time = "2026-03-23T18:13:37.963Z" }, + { url = "https://files.pythonhosted.org/packages/78/28/c7adc053039f286c2aca0038b766cbe3294e66fec6b29a820e95128f9ede/torchaudio-2.11.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:bc653defca1c16154398517a1adc98d0fb7f1dd08e58ced217558d213c2c6e29", size = 1626670, upload-time = "2026-03-23T18:13:42.162Z" }, + { url = "https://files.pythonhosted.org/packages/88/d8/d6d0f896e064aa67377484efef4911cdcc07bce2929474e1417cc0af18c2/torchaudio-2.11.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6503c0bdb29daf2e6281bb70ea2dfe2c3553b782b619eb5d73bdadd8a3f7cecf", size = 1771992, upload-time = "2026-03-23T18:13:33.188Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/249c1498ebdad3e7752866635ec0855fc0dcf898beccda5a9d2b9df8e4d0/torchaudio-2.11.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:b034d7672f1c415434f48ef17807f2cce47f29e8795338c751d4e596c9fbe8b5", size = 1618523, upload-time = "2026-03-23T18:13:15.703Z" }, + { url = "https://files.pythonhosted.org/packages/4f/98/be13fe35d9aa5c26381c0e453c828a789d15c007f8f7d08c95341d19974d/torchaudio-2.11.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1c1101c1243ef0e4063ec63298977e2d3655c15cf88d9eb0a1bd4fe2db9f47ea", size = 1771992, upload-time = "2026-03-23T18:13:35.343Z" }, + { url = "https://files.pythonhosted.org/packages/06/95/1ad1507482e7263e556709a3f5f87fecd375a0742cdaf238806c8e72eaad/torchaudio-2.11.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:9fe3083c62e035646483a14e180d33561bdc2eed436c9ab1259c137fb7120b4a", size = 1618546, upload-time = "2026-03-23T18:13:29.686Z" }, + { url = "https://files.pythonhosted.org/packages/98/4c/480328ba07487eb9890406720304d0d460dd7a6a64098614f5aa53b662ca/torchaudio-2.11.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:13cff988697ccbad539987599f9dc672f40c417bed67570b365e4e5002bbd096", size = 1771991, upload-time = "2026-03-23T18:13:30.843Z" }, + { url = "https://files.pythonhosted.org/packages/5c/54/f414d7b92dd0b3094a2409c95a97bd6c49aa0620da722a0e55462f9bd9cb/torchaudio-2.11.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:79fb3cb99169fd41bd9719647261402a164da0d105a4d81f42a3260844ec5e79", size = 1618527, upload-time = "2026-03-23T18:13:26.68Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a8/bf2e1f6ce24c990192400ae49b4acc1a0d0295b6c6a06bceecdc46ce08de/torchaudio-2.11.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:00e9f71ab9c656f0abdb40c515bd65d4658ab0ad380dee27a2efd7d51dabd3d6", size = 1771995, upload-time = "2026-03-23T18:13:23.373Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a0/62a5842062f739239691f2e57523e0570dd06704ad987755f7644a3afa23/torchaudio-2.11.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:1be3767064364ae82705bdf2b15c1e8b41fea82c4cd04d47428a8684b634b6ed", size = 1618552, upload-time = "2026-03-23T18:13:21.09Z" }, + { url = "https://files.pythonhosted.org/packages/6d/89/c293d818f9f899db93bf291b42401c05ae29acfb2e53d5341c30ea703e62/torchaudio-2.11.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:67f6edac29ed004652c11db5c19d9debb5d835695930574f564efc8bdd061bba", size = 1771986, upload-time = "2026-03-23T18:13:22.153Z" }, ] [[package]] name = "torchvision" version = "0.25.0" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'emscripten'", + "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", + "python_full_version == '3.12.*' and sys_platform == 'win32'", + "python_full_version == '3.12.*' and sys_platform == 'emscripten'", + "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", + "python_full_version < '3.12' and sys_platform == 'win32'", + "python_full_version < '3.12' and sys_platform == 'emscripten'", + "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", +] dependencies = [ - { name = "numpy" }, - { name = "pillow" }, - { name = "torch" }, + { name = "numpy", marker = "sys_platform != 'linux'" }, + { name = "pillow", marker = "sys_platform != 'linux'" }, + { name = "torch", marker = "sys_platform != 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/3e/be/c704bceaf11c4f6b19d64337a34a877fcdfe3bd68160a8c9ae9bea4a35a3/torchvision-0.25.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db74a551946b75d19f9996c419a799ffdf6a223ecf17c656f90da011f1d75b20", size = 1874923, upload-time = "2026-01-21T16:27:46.574Z" }, - { url = "https://files.pythonhosted.org/packages/ae/e9/f143cd71232430de1f547ceab840f68c55e127d72558b1061a71d0b193cd/torchvision-0.25.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f49964f96644dbac2506dffe1a0a7ec0f2bf8cf7a588c3319fed26e6329ffdf3", size = 2344808, upload-time = "2026-01-21T16:27:43.191Z" }, - { url = "https://files.pythonhosted.org/packages/43/ae/ad5d6165797de234c9658752acb4fce65b78a6a18d82efdf8367c940d8da/torchvision-0.25.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:153c0d2cbc34b7cf2da19d73450f24ba36d2b75ec9211b9962b5022fb9e4ecee", size = 8070752, upload-time = "2026-01-21T16:27:33.748Z" }, { url = "https://files.pythonhosted.org/packages/23/19/55b28aecdc7f38df57b8eb55eb0b14a62b470ed8efeb22cdc74224df1d6a/torchvision-0.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:ea580ffd6094cc01914ad32f8c8118174f18974629af905cea08cb6d5d48c7b7", size = 4038722, upload-time = "2026-01-21T16:27:41.355Z" }, { url = "https://files.pythonhosted.org/packages/56/3a/6ea0d73f49a9bef38a1b3a92e8dd455cea58470985d25635beab93841748/torchvision-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2abe430c90b1d5e552680037d68da4eb80a5852ebb1c811b2b89d299b10573b", size = 1874920, upload-time = "2026-01-21T16:27:45.348Z" }, - { url = "https://files.pythonhosted.org/packages/51/f8/c0e1ef27c66e15406fece94930e7d6feee4cb6374bbc02d945a630d6426e/torchvision-0.25.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b75deafa2dfea3e2c2a525559b04783515e3463f6e830cb71de0fb7ea36fe233", size = 2344556, upload-time = "2026-01-21T16:27:40.125Z" }, - { url = "https://files.pythonhosted.org/packages/68/2f/f24b039169db474e8688f649377de082a965fbf85daf4e46c44412f1d15a/torchvision-0.25.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f25aa9e380865b11ea6e9d99d84df86b9cc959f1a007cd966fc6f1ab2ed0e248", size = 8072351, upload-time = "2026-01-21T16:27:21.074Z" }, { url = "https://files.pythonhosted.org/packages/ad/16/8f650c2e288977cf0f8f85184b90ee56ed170a4919347fc74ee99286ed6f/torchvision-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9c55ae8d673ab493325d1267cbd285bb94d56f99626c00ac4644de32a59ede3", size = 4303059, upload-time = "2026-01-21T16:27:11.08Z" }, { url = "https://files.pythonhosted.org/packages/f5/5b/1562a04a6a5a4cf8cf40016a0cdeda91ede75d6962cff7f809a85ae966a5/torchvision-0.25.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:24e11199e4d84ba9c5ee7825ebdf1cd37ce8deec225117f10243cae984ced3ec", size = 1874918, upload-time = "2026-01-21T16:27:39.02Z" }, - { url = "https://files.pythonhosted.org/packages/36/b1/3d6c42f62c272ce34fcce609bb8939bdf873dab5f1b798fd4e880255f129/torchvision-0.25.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f271136d2d2c0b7a24c5671795c6e4fd8da4e0ea98aeb1041f62bc04c4370ef", size = 2309106, upload-time = "2026-01-21T16:27:30.624Z" }, - { url = "https://files.pythonhosted.org/packages/c7/60/59bb9c8b67cce356daeed4cb96a717caa4f69c9822f72e223a0eae7a9bd9/torchvision-0.25.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:855c0dc6d37f462482da7531c6788518baedca1e0847f3df42a911713acdfe52", size = 8071522, upload-time = "2026-01-21T16:27:29.392Z" }, { url = "https://files.pythonhosted.org/packages/32/a5/9a9b1de0720f884ea50dbf9acb22cbe5312e51d7b8c4ac6ba9b51efd9bba/torchvision-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:cef0196be31be421f6f462d1e9da1101be7332d91984caa6f8022e6c78a5877f", size = 4321911, upload-time = "2026-01-21T16:27:35.195Z" }, { url = "https://files.pythonhosted.org/packages/52/99/dca81ed21ebaeff2b67cc9f815a20fdaa418b69f5f9ea4c6ed71721470db/torchvision-0.25.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a8f8061284395ce31bcd460f2169013382ccf411148ceb2ee38e718e9860f5a7", size = 1896209, upload-time = "2026-01-21T16:27:32.159Z" }, - { url = "https://files.pythonhosted.org/packages/28/cc/2103149761fdb4eaed58a53e8437b2d716d48f05174fab1d9fcf1e2a2244/torchvision-0.25.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:146d02c9876858420adf41f3189fe90e3d6a409cbfa65454c09f25fb33bf7266", size = 2310735, upload-time = "2026-01-21T16:27:22.327Z" }, - { url = "https://files.pythonhosted.org/packages/76/ad/f4c985ad52ddd3b22711c588501be1b330adaeaf6850317f66751711b78c/torchvision-0.25.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c4d395cb2c4a2712f6eb93a34476cdf7aae74bb6ea2ea1917f858e96344b00aa", size = 8089557, upload-time = "2026-01-21T16:27:27.666Z" }, { url = "https://files.pythonhosted.org/packages/63/cc/0ea68b5802e5e3c31f44b307e74947bad5a38cc655231d845534ed50ddb8/torchvision-0.25.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5e6b449e9fa7d642142c0e27c41e5a43b508d57ed8e79b7c0a0c28652da8678c", size = 4344260, upload-time = "2026-01-21T16:27:17.018Z" }, { url = "https://files.pythonhosted.org/packages/9e/1f/fa839532660e2602b7e704d65010787c5bb296258b44fa8b9c1cd6175e7d/torchvision-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:620a236288d594dcec7634c754484542dc0a5c1b0e0b83a34bda5e91e9b7c3a1", size = 1896193, upload-time = "2026-01-21T16:27:24.785Z" }, - { url = "https://files.pythonhosted.org/packages/80/ed/d51889da7ceaf5ff7a0574fb28f9b6b223df19667265395891f81b364ab3/torchvision-0.25.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b5e7f50002a8145a98c5694a018e738c50e2972608310c7e88e1bd4c058f6ce", size = 2309331, upload-time = "2026-01-21T16:27:19.97Z" }, - { url = "https://files.pythonhosted.org/packages/90/a5/f93fcffaddd8f12f9e812256830ec9c9ca65abbf1bc369379f9c364d1ff4/torchvision-0.25.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:632db02300e83793812eee4f61ae6a2686dab10b4cfd628b620dc47747aa9d03", size = 8088713, upload-time = "2026-01-21T16:27:15.281Z" }, { url = "https://files.pythonhosted.org/packages/1f/eb/d0096eed5690d962853213f2ee00d91478dfcb586b62dbbb449fb8abc3a6/torchvision-0.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:d1abd5ed030c708f5dbf4812ad5f6fbe9384b63c40d6bd79f8df41a4a759a917", size = 4325058, upload-time = "2026-01-21T16:27:26.165Z" }, { url = "https://files.pythonhosted.org/packages/97/36/96374a4c7ab50dea9787ce987815614ccfe988a42e10ac1a2e3e5b60319a/torchvision-0.25.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ad9a8a5877782944d99186e4502a614770fe906626d76e9cd32446a0ac3075f2", size = 1896207, upload-time = "2026-01-21T16:27:23.383Z" }, - { url = "https://files.pythonhosted.org/packages/b5/e2/7abb10a867db79b226b41da419b63b69c0bd5b82438c4a4ed50e084c552f/torchvision-0.25.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:40a122c3cf4d14b651f095e0f672b688dde78632783fc5cd3d4d5e4f6a828563", size = 2310741, upload-time = "2026-01-21T16:27:18.712Z" }, - { url = "https://files.pythonhosted.org/packages/08/e6/0927784e6ffc340b6676befde1c60260bd51641c9c574b9298d791a9cda4/torchvision-0.25.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:846890161b825b38aa85fc37fb3ba5eea74e7091ff28bab378287111483b6443", size = 8089772, upload-time = "2026-01-21T16:27:14.048Z" }, { url = "https://files.pythonhosted.org/packages/b6/37/e7ca4ec820d434c0f23f824eb29f0676a0c3e7a118f1514f5b949c3356da/torchvision-0.25.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f07f01d27375ad89d72aa2b3f2180f07da95dd9d2e4c758e015c0acb2da72977", size = 4425879, upload-time = "2026-01-21T16:27:12.579Z" }, ] +[[package]] +name = "torchvision" +version = "0.26.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'linux'", + "python_full_version == '3.13.*' and sys_platform == 'linux'", + "python_full_version == '3.12.*' and sys_platform == 'linux'", + "python_full_version < '3.12' and sys_platform == 'linux'", +] +dependencies = [ + { name = "numpy", marker = "sys_platform == 'linux'" }, + { name = "pillow", marker = "sys_platform == 'linux'" }, + { name = "torch", marker = "sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/bf/21b899792b08cae7a298551c68398a79e333697479ed311b3b067aab4bdc/torchvision-0.26.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1c55dc8affbcc0eb2060fbabbe996ae9e5839b24bb6419777f17848945a411b1", size = 7767527, upload-time = "2026-03-23T18:12:44.348Z" }, + { url = "https://files.pythonhosted.org/packages/9a/45/57bbf9e216850d065e66dd31a50f57424b607f1d878ab8956e56a1f4e36b/torchvision-0.26.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:fd10b5f994c210f4f6d6761cf686f82d748554adf486cb0979770c3252868c8f", size = 7519925, upload-time = "2026-03-23T18:12:53.283Z" }, + { url = "https://files.pythonhosted.org/packages/f4/ec/5c31c92c08b65662fe9604a4067ae8232582805949f11ddc042cebe818ed/torchvision-0.26.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:406557718e62fdf10f5706e88d8a5ec000f872da913bf629aab9297622585547", size = 7767944, upload-time = "2026-03-23T18:12:42.805Z" }, + { url = "https://files.pythonhosted.org/packages/f5/d8/cb6ccda1a1f35a6597645818641701207b3e8e13553e75fce5d86bac74b2/torchvision-0.26.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d61a5abb6b42a0c0c311996c2ac4b83a94418a97182c83b055a2a4ae985e05aa", size = 7522205, upload-time = "2026-03-23T18:12:54.654Z" }, + { url = "https://files.pythonhosted.org/packages/e6/81/0b3e58d1478c660a5af4268713486b2df7203f35abd9195fea87348a5178/torchvision-0.26.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a39c7a26538c41fda453f9a9692b5ff9b35a5437db1d94f3027f6f509c160eac", size = 7727494, upload-time = "2026-03-23T18:12:46.062Z" }, + { url = "https://files.pythonhosted.org/packages/b6/dc/d9ab5d29115aa05e12e30f1397a3eeae1d88a511241dc3bce48dc4342675/torchvision-0.26.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b7e6213620bbf97742e5f79832f9e9d769e6cf0f744c5b53dad80b76db633691", size = 7521747, upload-time = "2026-03-23T18:12:36.815Z" }, + { url = "https://files.pythonhosted.org/packages/71/e2/7a89096e6cf2f3336353b5338ba925e0addf9d8601920340e6bdf47e8eb3/torchvision-0.26.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:3daf9cc149cf3cdcbd4df9c59dae69ffca86c6823250442c3bbfd63fc2e26c61", size = 7728679, upload-time = "2026-03-23T18:12:26.196Z" }, + { url = "https://files.pythonhosted.org/packages/69/1d/4e1eebc17d18ce080a11dcf3df3f8f717f0efdfa00983f06e8ba79259f61/torchvision-0.26.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:82c3965eca27e86a316e31e4c3e5a16d353e0bcbe0ef8efa2e66502c54493c4b", size = 7609138, upload-time = "2026-03-23T18:12:35.327Z" }, + { url = "https://files.pythonhosted.org/packages/7b/ac/48f28ffd227991f2e14f4392dde7e8dc14352bb9428c1ef4a4bbf5f7ed85/torchvision-0.26.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:9a904f2131cbfadab4df828088a9f66291ad33f49ff853872aed1f86848ef776", size = 7727777, upload-time = "2026-03-23T18:12:22.549Z" }, + { url = "https://files.pythonhosted.org/packages/a4/21/a2266f7f1b0e58e624ff15fd6f01041f59182c49551ece0db9a183071329/torchvision-0.26.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:0f3e572efe62ad645017ea847e0b5e4f2f638d4e39f05bc011d1eb9ac68d4806", size = 7522174, upload-time = "2026-03-23T18:12:29.565Z" }, + { url = "https://files.pythonhosted.org/packages/d2/6a/18a582fe3c5ee26f49b5c9fb21ad8016b4d1c06d10178894a58653946fda/torchvision-0.26.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:7058c5878262937e876f20c25867b33724586aa4499e2853b2d52b99a5e51953", size = 7729089, upload-time = "2026-03-23T18:12:31.394Z" }, + { url = "https://files.pythonhosted.org/packages/c5/9b/f7e119b59499edc00c55c03adc9ec3bd96144d9b81c46852c431f9c64a9a/torchvision-0.26.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:8008474855623c6ba52876589dc52df0aa66e518c25eca841445348e5f79844c", size = 7522704, upload-time = "2026-03-23T18:12:20.301Z" }, +] + [[package]] name = "tornado" version = "6.5.5" @@ -9461,15 +9564,15 @@ wheels = [ [[package]] name = "transformer-engine" -version = "2.11.0" +version = "2.14.1" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/00/33/44571ec584c88e1715f4c2afefc0ddd45064c7065ac1c6ffc8e832bc3ba3/transformer_engine-2.11.0-py3-none-any.whl", hash = "sha256:7ee1eae8fa6b0cb471c6066aa3555304fda8537174e5019929dc0c8655071df3", size = 723110, upload-time = "2026-01-02T09:58:23.245Z" }, + { url = "https://files.pythonhosted.org/packages/9e/e3/d54ab51ad6d9be35582fc8cd0bcf851f4c7d0f75d465ae7d706fba4fc40e/transformer_engine-2.14.1-py3-none-any.whl", hash = "sha256:ad0e5e3c11b90bc98f7dd843c7af06d8a321361ac0df8c6c35326c9b437bdfec", size = 820028, upload-time = "2026-04-29T17:11:33.922Z" }, ] [[package]] -name = "transformer-engine-cu12" -version = "2.11.0" +name = "transformer-engine-cu13" +version = "2.14.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "importlib-metadata" }, @@ -9477,14 +9580,14 @@ dependencies = [ { name = "pydantic" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/05/27/5c4c27cb245a3513e5ad7ccef50e2e9688996e2cc558edbbb575dfcca276/transformer_engine_cu12-2.11.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:ed5fda0925cb304d6864b451d8d012c579d5bd097bfefefca769b2704b06381a", size = 287630565, upload-time = "2026-01-02T09:56:43.645Z" }, - { url = "https://files.pythonhosted.org/packages/fa/a2/1439bbb6bc7d4d6045bad7d213884f7be92301c0982f009e3bbafa40e4ff/transformer_engine_cu12-2.11.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:6e5c0707583b2a90b2570da6f57409c6802653e069dfec38cf07a3b77ba9b12d", size = 288159349, upload-time = "2026-01-02T09:57:56.435Z" }, + { url = "https://files.pythonhosted.org/packages/d3/2e/0b7e77ba111f07bd5e750b5f93155b5765bc45c7f3cd63a7d8790e965e53/transformer_engine_cu13-2.14.1-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:0268b0273e918be12abfc5f6fb791d1cddec21a49c0cb0cc9df70797baa622e4", size = 258189641, upload-time = "2026-04-29T17:11:58.091Z" }, + { url = "https://files.pythonhosted.org/packages/54/f7/40a56f7477fb74ae6b62c8e06a14b7eeaf179c1e08a97f08e0ec9f0dae77/transformer_engine_cu13-2.14.1-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:20506b4846fab8beed420178717adee5ebc6b33700a1f975d17b6a98016df730", size = 259287859, upload-time = "2026-04-29T17:11:46.524Z" }, ] [[package]] name = "transformer-engine-torch" -version = "2.11.0" -source = { git = "https://github.com/NVIDIA/TransformerEngine.git?subdirectory=transformer_engine%2Fpytorch&rev=v2.11#c188b533cc3721ca9c6bbfd26148f5cf60108c25" } +version = "2.14.1" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "einops" }, { name = "onnx" }, @@ -9492,8 +9595,9 @@ dependencies = [ { name = "packaging" }, { name = "pydantic" }, { name = "torch" }, - { name = "transformer-engine-cu12" }, + { name = "transformer-engine-cu13" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/7d/b5/d04c164cac677ddf88c7412acd3f14a4e4fe563f417a4319c0a4635405ac/transformer_engine_torch-2.14.1.tar.gz", hash = "sha256:8a2f1f3232184f86395929505a011fbaa0b8224584417ee8d5fc7018e8533e4d", size = 303709, upload-time = "2026-04-29T17:11:35.046Z" } [[package]] name = "transformers" @@ -9710,7 +9814,8 @@ huggingface = [ { name = "psutil" }, { name = "sentence-transformers" }, { name = "sentencepiece" }, - { name = "torchvision" }, + { name = "torchvision", version = "0.25.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" }, + { name = "torchvision", version = "0.26.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, { name = "tqdm" }, { name = "transformers" }, { name = "triton", marker = "'linux' in sys_platform" }, @@ -9916,11 +10021,12 @@ wheels = [ [[package]] name = "vllm" -version = "0.19.1" +version = "0.20.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp", marker = "sys_platform == 'linux'" }, { name = "anthropic", marker = "sys_platform == 'linux'" }, + { name = "apache-tvm-ffi", marker = "sys_platform == 'linux'" }, { name = "blake3", marker = "sys_platform == 'linux'" }, { name = "cachetools", marker = "sys_platform == 'linux'" }, { name = "cbor2", marker = "sys_platform == 'linux'" }, @@ -9930,13 +10036,14 @@ dependencies = [ { name = "diskcache", marker = "sys_platform == 'linux'" }, { name = "einops", marker = "sys_platform == 'linux'" }, { name = "fastapi", extra = ["standard"], marker = "sys_platform == 'linux'" }, + { name = "fastsafetensors", marker = "sys_platform == 'linux'" }, { name = "filelock", marker = "sys_platform == 'linux'" }, { name = "flashinfer-cubin", marker = "sys_platform == 'linux'" }, { name = "flashinfer-python", marker = "sys_platform == 'linux'" }, { name = "gguf", marker = "sys_platform == 'linux'" }, { name = "ijson", marker = "sys_platform == 'linux'" }, { name = "lark", marker = "sys_platform == 'linux'" }, - { name = "llguidance", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'ppc64le' and sys_platform == 'linux') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "llguidance", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'ppc64le' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "lm-format-enforcer", marker = "sys_platform == 'linux'" }, { name = "mcp", marker = "sys_platform == 'linux'" }, { name = "mistral-common", extra = ["image"], marker = "sys_platform == 'linux'" }, @@ -9975,20 +10082,21 @@ dependencies = [ { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform == 'linux'" }, { name = "six", marker = "python_full_version >= '3.12' and sys_platform == 'linux'" }, { name = "tiktoken", marker = "sys_platform == 'linux'" }, + { name = "tilelang", marker = "sys_platform == 'linux'" }, { name = "tokenizers", marker = "sys_platform == 'linux'" }, { name = "torch", marker = "sys_platform == 'linux'" }, { name = "torchaudio", marker = "sys_platform == 'linux'" }, - { name = "torchvision", marker = "sys_platform == 'linux'" }, + { name = "torchvision", version = "0.26.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, { name = "tqdm", marker = "sys_platform == 'linux'" }, { name = "transformers", marker = "sys_platform == 'linux'" }, { name = "typing-extensions", marker = "sys_platform == 'linux'" }, { name = "watchfiles", marker = "sys_platform == 'linux'" }, { name = "xgrammar", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'ppc64le' and sys_platform == 'linux') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a8/49/60a2a962ecbf780c8fbfd0d5548b208d654d5c4267df94d8d93883641431/vllm-0.19.1.tar.gz", hash = "sha256:9fb88ce6b50991eba41d183584f65f51d7f6015d86a42cdabf79c1c8bd5d66fa", size = 31105401, upload-time = "2026-04-18T05:50:15.143Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/80/9798ce5e16af5754183ef33a63dc27017e2b51c87f51cc741832ce47a2d5/vllm-0.20.0.tar.gz", hash = "sha256:a6d50152936ee292455af3ffbe359f7a284ac43bf3b68caccf29f368e196cc72", size = 33508260, upload-time = "2026-04-27T11:08:04.666Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/28/4c/26c426103c58ac8d98435fe63c7758a2f289b5481a08be19e9c9fe29a4c2/vllm-0.19.1-cp38-abi3-manylinux_2_31_aarch64.whl", hash = "sha256:c8dde3c9af20f00a644e64a50ebe43948f2921bab3ffd5407d634c15836cb181", size = 385252556, upload-time = "2026-04-18T05:49:16.101Z" }, - { url = "https://files.pythonhosted.org/packages/78/20/f41216b79c87372a9d03175f36fa1411ee61059ce8c557d2691722ea4aae/vllm-0.19.1-cp38-abi3-manylinux_2_31_x86_64.whl", hash = "sha256:71a87f46cafab4489c69a5c5c83b870d0235e5694d8222303d460576293dc719", size = 433132101, upload-time = "2026-04-18T05:49:54.202Z" }, + { url = "https://files.pythonhosted.org/packages/63/5b/26379d3c522379373e50b9f77adf55eb94f4a0f62a6c8e3e7fe3f0bf0d39/vllm-0.20.0-cp38-abi3-manylinux_2_35_aarch64.whl", hash = "sha256:29a135ca0d70650f057f15c7c0b560d24659524c771f70fbddc24597c861c118", size = 235776358, upload-time = "2026-04-27T11:07:22.058Z" }, + { url = "https://files.pythonhosted.org/packages/47/bb/cb02d1e9679fce892a674f86caee25acc9ddd64d7dafa4cfe29e899993a8/vllm-0.20.0-cp38-abi3-manylinux_2_35_x86_64.whl", hash = "sha256:24d28892e210200f6e1bd13f699c42a74cd2bb7364c11248e2348f677c7f6dfb", size = 244415937, upload-time = "2026-04-27T11:07:48.135Z" }, ] [[package]] @@ -10712,6 +10820,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288, upload-time = "2026-03-01T22:07:51.388Z" }, ] +[[package]] +name = "z3-solver" +version = "4.15.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/8e/0c8f17309549d2e5cde9a3ccefa6365437f1e7bafe71878eaf9478e47b18/z3_solver-4.15.4.0.tar.gz", hash = "sha256:928c29b58c4eb62106da51c1914f6a4a55d0441f8f48a81b9da07950434a8946", size = 5018600, upload-time = "2025-10-29T18:12:03.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/21/c9/bb51a96af0091324c81b803f16c49f719f9f6ea0b0bb52200f5c97ec4892/z3_solver-4.15.4.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e103a6f203f505b8b8b8e5c931cc407c95b61556512d4921c1ddc0b3f41b08e", size = 29268352, upload-time = "2025-10-29T18:11:53.032Z" }, + { url = "https://files.pythonhosted.org/packages/bf/2e/0b49f7e4e53817cfb09a0f6585012b782dfe0b666e8abefcb4fac0570606/z3_solver-4.15.4.0-py3-none-manylinux_2_34_aarch64.whl", hash = "sha256:62c7e9cbdd711932301f29919ad9158de9b2f58b4d281dd259bbcd0a2f408ba1", size = 27226534, upload-time = "2025-10-29T18:11:55.59Z" }, +] + [[package]] name = "zipp" version = "3.23.0"