From 14699c17ff70d6fb8a122b54843883d0c7deeda5 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Mon, 18 May 2026 12:22:04 +0800 Subject: [PATCH 01/17] first version of sim_ready --- .../toolkits/simready_pipeline/cli/start.py | 85 + .../simready_pipeline/configs/gen_config.json | 25 + .../toolkits/simready_pipeline/core/asset.py | 90 + .../simready_pipeline/io/json_store.py | 64 + .../toolkits/simready_pipeline/parser/base.py | 97 + .../simready_pipeline/parser/geometry.py | 132 ++ .../simready_pipeline/parser/inspector.py | 91 + .../simready_pipeline/parser/internal.py | 126 ++ .../simready_pipeline/parser/physics.py | 472 +++++ .../toolkits/simready_pipeline/parser/usd.py | 146 ++ .../simready_pipeline/pipeline/ingest.py | 154 ++ .../simready_pipeline/utils/geometry_utils.py | 194 ++ .../simready_pipeline/utils/ingest_utils.py | 447 +++++ .../simready_pipeline/utils/simready_utils.py | 1722 +++++++++++++++++ .../simready_pipeline/utils/texture_utils.py | 297 +++ .../simready_pipeline/utils/usd_utils.py | 412 ++++ 16 files changed, 4554 insertions(+) create mode 100644 embodichain/toolkits/simready_pipeline/cli/start.py create mode 100644 embodichain/toolkits/simready_pipeline/configs/gen_config.json create mode 100644 embodichain/toolkits/simready_pipeline/core/asset.py create mode 100644 embodichain/toolkits/simready_pipeline/io/json_store.py create mode 100644 embodichain/toolkits/simready_pipeline/parser/base.py create mode 100644 embodichain/toolkits/simready_pipeline/parser/geometry.py create mode 100644 embodichain/toolkits/simready_pipeline/parser/inspector.py create mode 100644 embodichain/toolkits/simready_pipeline/parser/internal.py create mode 100644 embodichain/toolkits/simready_pipeline/parser/physics.py create mode 100644 embodichain/toolkits/simready_pipeline/parser/usd.py create mode 100644 embodichain/toolkits/simready_pipeline/pipeline/ingest.py create mode 100644 embodichain/toolkits/simready_pipeline/utils/geometry_utils.py create mode 100644 embodichain/toolkits/simready_pipeline/utils/ingest_utils.py create mode 100644 embodichain/toolkits/simready_pipeline/utils/simready_utils.py create mode 100644 embodichain/toolkits/simready_pipeline/utils/texture_utils.py create mode 100644 embodichain/toolkits/simready_pipeline/utils/usd_utils.py diff --git a/embodichain/toolkits/simready_pipeline/cli/start.py b/embodichain/toolkits/simready_pipeline/cli/start.py new file mode 100644 index 00000000..c2910db1 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/cli/start.py @@ -0,0 +1,85 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +import argparse +from pathlib import Path +import os + +os.environ["PYOPENGL_PLATFORM"] = "egl" + +from embodichain.toolkits.simready_pipeline.pipeline.ingest import ingest_one_asset +from embodichain.toolkits.simready_pipeline.io.json_store import JsonStore +from embodichain.toolkits.simready_pipeline.parser.base import ParserManager + + +def cli_ingest_single( + input_dir: str, output_dir: str, category: str, simple_ingest: bool +): + input_path = Path(input_dir) + output_path = Path(output_dir) + + if not input_path.exists(): + raise FileNotFoundError(f"Input directory not found: {input_path}") + + output_path.mkdir(parents=True, exist_ok=True) + store = JsonStore(output_path) + manager = ParserManager() + + print(f"Processing Single Asset: {input_path.name} (Category: {category})") + + asset = ingest_one_asset( + asset_dir=input_path, + category=category, + output_root=output_path, + store=store, + manager=manager, + simple_ingest=simple_ingest, + ) + + if asset: + print(f"Successfully Processed") + else: + print("no asset returned (might be direct_copy mode)") + + +def main(): + parser = argparse.ArgumentParser( + description="embodichain.toolkits.simready_pipeline Asset Ingestion Pipeline" + ) + + parser.add_argument( + "--input_dir", type=str, help="Path to the single asset directory" + ) + parser.add_argument("--output_root", type=str, help="Path to the output directory") + parser.add_argument( + "--category", + type=str, + required=True, + help="Specify the category for this asset (e.g., 'cup', 'chair')", + ) + parser.add_argument( + "--simple", action="store_true", help="trimesh only, skip Blender" + ) + + args = parser.parse_args() + + cli_ingest_single( + args.input_dir, args.output_root, args.category, simple_ingest=args.simple + ) + + +if __name__ == "__main__": + main() diff --git a/embodichain/toolkits/simready_pipeline/configs/gen_config.json b/embodichain/toolkits/simready_pipeline/configs/gen_config.json new file mode 100644 index 00000000..85d97390 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/configs/gen_config.json @@ -0,0 +1,25 @@ +{ + "ingest": { + "canonical_asset_name": "asset.obj", + "canonical_texture_name": "", + "unprocessed_formats": [".urdf", ".usd"], + "parseable_mesh_formats": [".glb", ".gltf", ".obj", ".ply", ".stl"], + "blender_texture_size": 2048, + "blender_texture_name": "surface_texture.png" + }, + "geometry_cleanup": { + "ratio": 0.5, + "merge_dist": 0.00001, + "remove_non_manifold": true, + "triangulate": false, + "fill_hole_sides": 8 + }, + "llm": { + "azure_openai": { + "api_key": "", + "model": "gpt-4o", + "base_url": "", + "api_version": "2024-02-15-preview" + } + } +} diff --git a/embodichain/toolkits/simready_pipeline/core/asset.py b/embodichain/toolkits/simready_pipeline/core/asset.py new file mode 100644 index 00000000..59aec900 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/core/asset.py @@ -0,0 +1,90 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional +from datetime import datetime + + +@dataclass +class Asset: + + asset_id: str + + identity: Dict[str, Any] = field(default_factory=dict) + asset_data: Dict[str, Any] = field(default_factory=dict) + + parsed: Dict[str, Any] = field( + default_factory=dict + ) # Visual, Geometry, Topology, 等解析或者入库时而来的信息 + semantics: Dict[str, Any] = field(default_factory=dict) + physics: Dict[str, Any] = field(default_factory=dict) + simulation: Dict[str, Any] = field(default_factory=dict) + affordance: Dict[str, Any] = field(default_factory=dict) + usd: Dict[str, Any] = field(default_factory=dict) + + provenance: Dict[str, Any] = field(default_factory=dict) + quality: Dict[str, Any] = field(default_factory=dict) + status: Dict[str, Any] = field(default_factory=dict) + internal: Dict[str, Any] = field(default_factory=dict) + + ingest_info: Dict[str, Any] = field(default_factory=dict) # ingest相关的临时信息 + + def __post_init__(self) -> None: + self._init_simulation_defaults() + self.touch() + + def _init_simulation_defaults(self) -> None: + self.simulation.setdefault("articulation", None) + self.simulation.setdefault("sim_ready", {}) + + def touch(self) -> None: + self.status["last_updated"] = datetime.now().isoformat() + + def to_dict(self) -> Dict[str, Any]: + return { + "asset_id": self.asset_id, + "identity": self.identity, + "asset_data": self.asset_data, + "parsed": self.parsed, + "quality": self.quality, + "semantics": self.semantics, + "physics": self.physics, + "simulation": self.simulation, + "usd": self.usd, + "provenance": self.provenance, + "status": self.status, + "internal": self.internal, + "affordance": self.affordance, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "Asset": + return cls( + asset_id=data["asset_id"], + identity=data.get("identity", {}), + asset_data=data.get("asset_data", []), + parsed=data.get("parsed", {}), + quality=data.get("quality", {}), + semantics=data.get("semantics", {}), + physics=data.get("physics", {}), + simulation=data.get("simulation", {}), + usd=data.get("usd", {}), + provenance=data.get("provenance", {}), + status=data.get("status", {}), + internal=data.get("internal", {}), + affordance=data.get("affordance", {}), + ) diff --git a/embodichain/toolkits/simready_pipeline/io/json_store.py b/embodichain/toolkits/simready_pipeline/io/json_store.py new file mode 100644 index 00000000..2ee2b828 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/io/json_store.py @@ -0,0 +1,64 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +import json +from pathlib import Path +from typing import Dict, Optional + +from embodichain.toolkits.simready_pipeline.core.asset import Asset + + +class JsonStore: + """ + Simple JSON-based store for Assets and a global registry. + """ + + def __init__(self, root_dir: str): + self.root = Path(root_dir) + + def _get_asset_json_path(self, asset_id: str) -> Path: + return self.root / asset_id / "asset.json" + + def save_asset(self, asset: Asset) -> None: + asset_path = self._get_asset_json_path(asset.asset_id) + asset_path.parent.mkdir(parents=True, exist_ok=True) + asset_path.write_text(json.dumps(asset.to_dict(), indent=2)) + + def load_asset(self, asset_id: str) -> Optional[Asset]: + asset_path = self._get_asset_json_path(asset_id) + if not asset_path.exists(): + return None + data = json.loads(asset_path.read_text()) + return Asset.from_dict(data) + + def write_asset(self, asset_id: str, asset_json: dict) -> None: + asset_root = self.root / asset_id + asset_root.mkdir(exist_ok=True) + + asset_path = asset_root / "asset.json" + asset_path.write_text(json.dumps(asset_json, indent=2)) + + registry = json.loads(self.registry_path.read_text()) + registry["assets"][asset_id] = { + "path": str(asset_root), + "category": asset_json["identity"]["category"], + } + + self.registry_path.write_text(json.dumps(registry, indent=2)) + + def list_asset_ids(self) -> list[str]: + registry = self.load_registry() + return list(registry.get("assets", {}).keys()) diff --git a/embodichain/toolkits/simready_pipeline/parser/base.py b/embodichain/toolkits/simready_pipeline/parser/base.py new file mode 100644 index 00000000..2940d2ec --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/parser/base.py @@ -0,0 +1,97 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from typing import Dict, List, Optional +from abc import ABC, abstractmethod +from embodichain.toolkits.simready_pipeline.core.asset import Asset +from pathlib import Path + + +class AssetParser(ABC): + """ + Parser = capability, no orchestration logic. + """ + + name: str + + @abstractmethod + def parse(self, asset: Asset, asset_root: Path) -> None: + """ + Mutate asset in-place. + Must be idempotent. + """ + raise NotImplementedError + + +from embodichain.toolkits.simready_pipeline.parser.inspector import AssetInspector +from embodichain.toolkits.simready_pipeline.parser.geometry import GeometryParser +from embodichain.toolkits.simready_pipeline.parser.physics import PhysicsParser +from embodichain.toolkits.simready_pipeline.parser.usd import UsdParser +from embodichain.toolkits.simready_pipeline.parser.internal import InternalParser + + +class ParserManager: + """ + Central parser dispatcher & pipeline owner. + """ + + DEFAULT_PIPELINE: List[str] = [ + "inspector", + "geometry", + "physics", + "usd", + "internal", + ] + + def __init__(self): + self._parsers: Dict[str, object] = {} + + self._register( + AssetInspector(), + GeometryParser(), + PhysicsParser(), + UsdParser(), + InternalParser(), + ) + + def _register(self, *parsers): + for p in parsers: + if not getattr(p, "name", None): + raise ValueError(f"Parser missing name: {p}") + if p.name in self._parsers: + raise ValueError(f"Duplicate parser: {p.name}") + self._parsers[p.name] = p + + def parse( + self, + asset: Asset, + asset_root: Path, + pipeline: Optional[List[str]] = None, + ) -> None: + pipeline = pipeline or self.DEFAULT_PIPELINE + + for name in pipeline: + self._run(name, asset, asset_root) + asset.status["parsed"] = True + + def parse_one(self, name: str, asset: Asset, asset_root: Path) -> None: + self._run(name, asset, asset_root) + + def _run(self, name: str, asset: Asset, asset_root: Path): + parser = self._parsers.get(name) + if not parser: + raise KeyError(f"Parser not registered: {name}") + parser.parse(asset, asset_root) diff --git a/embodichain/toolkits/simready_pipeline/parser/geometry.py b/embodichain/toolkits/simready_pipeline/parser/geometry.py new file mode 100644 index 00000000..af9f348c --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/parser/geometry.py @@ -0,0 +1,132 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +import json +from pathlib import Path +from typing import Any + +import numpy as np +import trimesh +from embodichain.toolkits.simready_pipeline.parser.base import AssetParser +from embodichain.toolkits.simready_pipeline.core.asset import Asset +from embodichain.toolkits.simready_pipeline.utils.geometry_utils import process_obj + + +def _load_geometry_cleanup_config() -> dict: + config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json" + with config_path.open("r", encoding="utf-8") as f: + return json.load(f).get("geometry_cleanup", {}) + + +GEOMETRY_CLEANUP_CONFIG = _load_geometry_cleanup_config() + + +class GeometryParser(AssetParser): + name = "geometry" + + def __init__(self): + super().__init__() + + def _topology_stats(self, mesh: trimesh.Trimesh) -> dict[str, Any]: + stats: dict[str, Any] = { + "is_empty": bool(mesh.is_empty), + "is_watertight": bool(mesh.is_watertight), + "is_winding_consistent": bool(mesh.is_winding_consistent), + "is_volume": bool(mesh.is_volume), + "euler_number": None, + "body_count": int(mesh.body_count) if hasattr(mesh, "body_count") else None, + "face_component_count": None, + "broken_face_count": None, + "boundary_edge_count": None, + "manifold_edge_count": None, + "nonmanifold_edge_count": None, + "edge_incidence_hist": None, + } + + if mesh.is_empty: + return stats + + try: + tmp = mesh.copy(include_visual=False) + tmp.remove_unreferenced_vertices() + stats["euler_number"] = int(tmp.euler_number) + except Exception: + try: + stats["euler_number"] = int(mesh.euler_number) + except Exception: + stats["euler_number"] = None + + stats["face_component_count"] = None + + try: + broken = trimesh.repair.broken_faces(mesh) + stats["broken_face_count"] = int(len(broken)) + except Exception: + stats["broken_face_count"] = None + + try: + edges = mesh.edges_unique + if len(edges) > 0: + counts = np.bincount(mesh.edges_unique_inverse) + stats["boundary_edge_count"] = int(np.sum(counts == 1)) + stats["manifold_edge_count"] = int(np.sum(counts == 2)) + stats["nonmanifold_edge_count"] = int(np.sum(counts > 2)) + except Exception: + pass + + return stats + + def parse(self, asset: Asset, asset_root: Path) -> None: + asset.parsed.setdefault("geometry", {}) + + if asset.asset_data.get("type") != "mesh": + asset.parsed["geometry"] = {"asset dont have a mesh": "skipped"} + return + + mesh_path = asset_root / asset.asset_data.get("path") + process_obj( + input_path=str(mesh_path), + output_path=str(mesh_path), + ratio=GEOMETRY_CLEANUP_CONFIG.get("ratio", 0.5), + weld_distance=GEOMETRY_CLEANUP_CONFIG.get("weld_distance", 0.0001), + merge_dist=GEOMETRY_CLEANUP_CONFIG.get("merge_dist", 1e-5), + remove_non_manifold=GEOMETRY_CLEANUP_CONFIG.get( + "remove_non_manifold", True + ), + triangulate=GEOMETRY_CLEANUP_CONFIG.get("triangulate", False), + fill_hole_sides=GEOMETRY_CLEANUP_CONFIG.get("fill_hole_sides", 8), + ) + + try: + + mesh = trimesh.load( + mesh_path, force="mesh", skip_materials=True, process=False + ) + + geom_info = { + "vertices": int(len(mesh.vertices)), + "faces": int(len(mesh.faces)), + "bounds": mesh.bounds.tolist() if mesh.bounds is not None else None, + "extents": mesh.extents.tolist() if mesh.extents is not None else None, + "area": float(mesh.area), + } + + geom_info.update(self._topology_stats(mesh)) + asset.parsed["geometry"] = geom_info + + except Exception as e: + print(f"[GEOMETRY PARSER FAILED] {mesh_path}: {str(e)}") + asset.parsed["geometry"] = {"error": str(e)} diff --git a/embodichain/toolkits/simready_pipeline/parser/inspector.py b/embodichain/toolkits/simready_pipeline/parser/inspector.py new file mode 100644 index 00000000..59c3bcee --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/parser/inspector.py @@ -0,0 +1,91 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from pathlib import Path +from embodichain.toolkits.simready_pipeline.core.asset import Asset +from embodichain.toolkits.simready_pipeline.parser.base import AssetParser + + +class AssetInspector(AssetParser): + name = "inspector" + + def _find_first_file(self, root: Path, suffixes: tuple[str, ...]) -> Path | None: + candidates: list[Path] = [] + for suffix in suffixes: + candidates.extend(sorted(root.rglob(f"*{suffix}"))) + return candidates[0] if candidates else None + + def parse(self, asset: Asset, asset_root: Path) -> None: + asset_source_dir = asset_root / "asset_source" + + asset.asset_data.clear() + asset.simulation.setdefault("articulation", {}) + + if not asset_source_dir.exists(): + print(f"Warning: asset_source not found: {asset_source_dir}") + return + + asset_id = asset.asset_id + canonical_mesh = asset_source_dir / "asset.obj" + + urdf_file = self._find_first_file(asset_source_dir, (".urdf",)) + if urdf_file is not None: + asset.simulation["articulation"] = { + "type": "articulation", + "format": "urdf", + "file_path": str(urdf_file.relative_to(asset_root)), + } + asset.asset_data = { + "id": asset_id, + "type": "articulation", + "format": "urdf", + "path": str(urdf_file.relative_to(asset_root)), + } + return + + if canonical_mesh.exists(): + asset.asset_data = { + "id": asset_id, + "type": "mesh", + "format": "obj", + "path": str(canonical_mesh.relative_to(asset_root)), + } + return + + mesh_file = self._find_first_file( + asset_source_dir, (".obj", ".gltf", ".glb", ".ply", ".stl") + ) + if mesh_file is not None: + asset.asset_data = { + "id": asset_id, + "type": "mesh", + "format": mesh_file.suffix.lstrip(".").lower(), + "path": str(mesh_file.relative_to(asset_root)), + } + return + + usd_file = self._find_first_file(asset_source_dir, (".usd",)) + + if usd_file is not None: + asset.asset_data = { + "id": asset_id, + "type": "scene", + "format": "usd", + "path": str(usd_file.relative_to(asset_root)), + } + return + + print(f"Warning: No supported files found in {asset_source_dir}") diff --git a/embodichain/toolkits/simready_pipeline/parser/internal.py b/embodichain/toolkits/simready_pipeline/parser/internal.py new file mode 100644 index 00000000..def8afde --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/parser/internal.py @@ -0,0 +1,126 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +import numpy as np +import trimesh +import pyrender +from PIL import Image +from pathlib import Path +from embodichain.toolkits.simready_pipeline.core.asset import Asset +from embodichain.toolkits.simready_pipeline.parser.base import AssetParser + + +class InternalParser(AssetParser): + name = "internal" + + @staticmethod + def _render_thumbnail(mesh: trimesh.Trimesh, output_path: Path) -> None: + """ + Internal static function to handle the rendering logic. + Camera is on X-axis positive, looking at the mesh's bounding box center. + Z-axis is up. + """ + bounds = mesh.bounds + model_center = (bounds[0] + bounds[1]) / 2.0 + size = bounds[1] - bounds[0] + + target_frustum_size = max(size[1], size[2]) * 1.5 + yfov = np.pi / 4.0 + img_width, img_height = 512, 512 + camera_distance = (target_frustum_size / 2.0) / np.tan(yfov / 2.0) + + eye = model_center + np.array([camera_distance, 0.0, 0.0]) + target = model_center # Look at the mesh center, not origin + up = np.array([0.0, 0.0, 1.0]) # Z-up + + forward = eye - target + forward = forward / np.linalg.norm(forward) + + right = np.cross(up, forward) + right = right / np.linalg.norm(right) + + corrected_up = np.cross(forward, right) + + camera_pose = np.eye(4) + camera_pose[:3, 0] = right + camera_pose[:3, 1] = corrected_up + camera_pose[:3, 2] = forward + camera_pose[:3, 3] = eye + + scene = pyrender.Scene(bg_color=[1.0, 1.0, 1.0, 1.0]) + pyrender_mesh = pyrender.Mesh.from_trimesh(mesh, smooth=False) + scene.add(pyrender_mesh) + + camera = pyrender.PerspectiveCamera( + yfov=yfov, aspectRatio=img_width / img_height + ) + scene.add(camera, pose=camera_pose) + + key_light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=3.0) + key_pose = np.eye(4) + key_pose[:3, 3] = eye + np.array([0, camera_distance, camera_distance]) + scene.add(key_light, pose=key_pose) + + fill_light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=1.0) + fill_pose = np.eye(4) + fill_pose[:3, 3] = eye + np.array([0, -camera_distance, 0.5 * camera_distance]) + scene.add(fill_light, pose=fill_pose) + + renderer = pyrender.OffscreenRenderer( + viewport_width=img_width, viewport_height=img_height + ) + color, _ = renderer.render(scene) + renderer.delete() + + Image.fromarray(color).save(output_path) + + def parse(self, asset: Asset, asset_root: Path) -> None: + asset.internal.setdefault("thumbnail_path", "") + asset.internal.setdefault("rendered", False) + asset.internal.setdefault("error", None) + + mesh_path_ori = asset_root / asset.asset_data.get("path") + mesh_path_sr = asset_root / "asset_simready" / "asset_simready.obj" + mesh_path = None + if mesh_path_sr.exists(): + mesh_path = mesh_path_sr + elif mesh_path_ori.exists(): + mesh_path = mesh_path_ori + else: + asset.internal["error"] = ( + "No mesh file found (neither simready nor original)" + ) + return + + try: + + mesh = trimesh.load(str(mesh_path), force="mesh") + output_filename = f"{asset.asset_id}.png" + output_path = asset_root / output_filename + self._render_thumbnail(mesh, output_path) + + asset.internal.update( + { + "thumbnail_path": f"{asset.asset_id}/{asset.asset_id}.png", + "rendered": True, + "error": None, + } + ) + + except Exception as e: + asset.internal.update({"rendered": False, "error": f"Exception: {str(e)}"}) + + return diff --git a/embodichain/toolkits/simready_pipeline/parser/physics.py b/embodichain/toolkits/simready_pipeline/parser/physics.py new file mode 100644 index 00000000..55c37da3 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/parser/physics.py @@ -0,0 +1,472 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import json +import re +from copy import deepcopy +from pathlib import Path +from typing import Dict, Any, List +from urllib.parse import urlsplit, urlunsplit + +from openai import AzureOpenAI + +from embodichain.toolkits.simready_pipeline.core.asset import Asset +from embodichain.toolkits.simready_pipeline.parser.base import AssetParser +from embodichain.toolkits.simready_pipeline.utils.simready_utils import ( + process_mesh, + delete_rendered_pngs, + client, + DEPLOYMENT, +) + +DEFAULT_RIGID_PHYSICS: Dict[str, Any] = { + "mass": 1.0, + "density": 1000.0, + "linear_damping": 0.7, + "angular_damping": 0.7, + "enable_collision": True, + "enable_ccd": False, + "contact_offset": 0.002, + "rest_offset": 0.001, + "dynamic_friction": 0.5, + "static_friction": 0.5, + "restitution": 0.0, + "max_linear_velocity": 1.0e2, + "max_angular_velocity": 1.0e2, + "max_depenetration_velocity": 10.0, + "solver_min_position_iters": 4, + "solver_min_velocity_iters": 1, + "sleep_threshold": 0.001, +} + +DEFAULT_SOFTBODY_PHYSICS: Dict[str, Any] = { + "triangle_remesh_resolution": 8, + "triangle_simplify_target": 0, + "maximal_edge_length": 0.0, + "simulation_mesh_resolution": 8, + "simulation_mesh_output_obj": False, + "mass": -1.0, + "density": 1000.0, + "youngs_modulus": 1.0e6, + "poissons_ratio": 0.45, + "material_model": "CO_ROTATIONAL", + "elasticity_damping": 0.0, + "vertex_velocity_damping": 0.005, + "linear_damping": 0.0, + "enable_ccd": False, + "enable_self_collision": False, + "self_collision_stress_tolerance": 0.9, + "collision_mesh_simplification": True, + "self_collision_filter_distance": 0.1, + "has_gravity": True, + "max_velocity": 100.0, + "max_depenetration_velocity": 1.0e6, + "sleep_threshold": 0.05, + "settling_threshold": 0.1, + "settling_damping": 10.0, + "solver_min_position_iters": 4, + "solver_min_velocity_iters": 1, +} + +ALLOWED_MODES = {"rigid", "softbody", "articulation"} +RIGID_KEYS = list(DEFAULT_RIGID_PHYSICS.keys()) +SOFT_KEYS = list(DEFAULT_SOFTBODY_PHYSICS.keys()) + +PHYSICS_SYSTEM_PROMPT = """You are a physics annotation model for robot training and simulation-ready asset ingestion. + +This task is safety-critical: a wrong physical annotation can cause severe hardware damage, unsafe robot behavior, broken simulation, and large downstream losses. + +You must reason from the real physical world: +- infer the most plausible physics mode from the description +- estimate realistic values using object material, shape, use case, and expected behavior +- be conservative and physically plausible +- do not hallucinate exotic values +- do not explain your reasoning +- do not output markdown +- do not output any extra text outside JSON +- do not output any keys other than the required keys + +CRITICAL COMPLETENESS REQUIREMENT: +- You MUST return every required property for the chosen mode. +- Do NOT omit any required key. +- Do NOT return null for required keys. +- Do NOT return empty strings for required keys. +- Do NOT return partial objects. +- If a field is hard to estimate, still provide your best physically plausible value. +- Missing even one required property makes the output invalid. +- The properties object must be fully populated and complete for the selected mode. + +You must return EXACTLY one JSON object with this structure: +{ + "mode": "rigid" | "softbody" | "articulation", + "confidence": 0.0-1.0, + "properties": { + "mass": , + "density": , + "linear_damping": , + "angular_damping": , + "enable_collision": True, + "enable_ccd": , + "contact_offset": , + "rest_offset": , + "dynamic_friction": , + "static_friction": , + "restitution": , + "max_linear_velocity": , + "max_angular_velocity": , + "max_depenetration_velocity": , + "solver_min_position_iters": 4, + "solver_min_velocity_iters": 1, + "sleep_threshold": 0.001, } +} + +Important: +- If the object is clearly deformable, cloth-like, flesh-like, cable-like, or highly elastic, choose "softbody". +- If it is a mechanically jointed object with distinct links and joints, choose "articulation". +- Otherwise choose "rigid". +- Confidence must reflect how much the description supports the decision. +- The properties object must match the selected mode exactly. +- The properties object must include ALL required keys for the selected mode, no exceptions. + +For rigid mode: +Return ONLY these keys, exactly once each: +mass, density, linear_damping, angular_damping, enable_collision, enable_ccd, +contact_offset, rest_offset, dynamic_friction, static_friction, restitution, +max_linear_velocity, max_angular_velocity, max_depenetration_velocity, +solver_min_position_iters, solver_min_velocity_iters, sleep_threshold + +Rigid mode completeness rules: +- Every key listed above is mandatory. +- No key may be missing. +- No extra keys may appear. +- If uncertain, choose a conservative physically plausible value for every field. +- You must always provide a value for mass, density, damping, collision flags, contact offsets, friction, restitution, velocity limits, solver iterations, and sleep threshold. + +Guidance: +- mass: estimate in kg from size/material/use case; if unknown use a conservative default near 1.0 +- density: use realistic density in kg/m^3 based on material; metals high, wood mid, foam low, plastic medium, stone high +- linear_damping / angular_damping: higher for unstable / floating / draggy objects, lower for rigid stable objects +- enable_collision: usually true for physical objects +- enable_ccd: true only if fast motion or small/thin geometry would cause tunneling +- contact_offset must be > rest_offset +- friction: rubber/rough surfaces higher, metal/plastic smoother lower +- restitution: bouncing materials higher, dead materials near 0 +- sleep_threshold: smaller for stable heavy objects, larger for tiny or soft objects + +For softbody mode: +Return ONLY these keys, exactly once each: +triangle_remesh_resolution, triangle_simplify_target, maximal_edge_length, +simulation_mesh_resolution, simulation_mesh_output_obj, +mass, density, youngs_modulus, poissons_ratio, material_model, elasticity_damping, +vertex_velocity_damping, linear_damping, +enable_ccd, enable_self_collision, self_collision_stress_tolerance, +collision_mesh_simplification, self_collision_filter_distance, +has_gravity, max_velocity, max_depenetration_velocity, +sleep_threshold, settling_threshold, settling_damping, +solver_min_position_iters, solver_min_velocity_iters + +Softbody mode completeness rules: +- Every key listed above is mandatory. +- No key may be missing. +- No extra keys may appear. +- If uncertain, choose a conservative physically plausible value for every field. +- You must always provide a value for mesh resolution parameters, mass, density, elasticity parameters, collision parameters, gravity flags, damping terms, thresholds, and solver iterations. + +Guidance: +- youngs_modulus: higher for stiffer materials; lower for cloth, flesh, foam, rubber-like objects +- poissons_ratio: typical soft solids are around 0.3-0.49, avoid invalid values +- material_model: choose the closest physically plausible model, default CO_ROTATIONAL if unsure +- enable_self_collision: true for cloth, cables, highly deformable shapes that can fold onto themselves +- collision_mesh_simplification: usually true for simulation efficiency +- has_gravity: true unless explicitly suspended or otherwise constrained +- max_depenetration_velocity: high enough to resolve interpenetration robustly + +For articulation mode: +If you choose articulation, keep the properties object minimal and physically conservative. +If you do not have enough evidence for articulation, prefer rigid. +Even in articulation mode, the properties object must still be complete and valid according to the selected schema used by your pipeline. +Do not omit any field that your downstream system expects for articulation. + +Output only JSON, no code fences, no explanation. +""" + + +def extract_json(text: str) -> Dict[str, Any]: + text = re.sub(r"```json|```", "", text).strip() + match = re.search(r"\{.*\}", text, re.DOTALL) + if not match: + raise ValueError("No JSON object found in response:\n" + text) + return json.loads(match.group()) + + +class PhysicsParser(AssetParser): + """ + Physics inference & completion parser. + """ + + name = "physics" + + def __init__(self): + super().__init__() + + def parse(self, asset: Asset, asset_root: Path) -> None: + self._ensure_sections(asset) + self._simready_process(asset, asset_root) + self._infer_physics(asset) + self._ensure_properties(asset) + self._update_simulation_status(asset) + + def _ensure_sections(self, asset: Asset) -> None: + asset.physics.setdefault("mode", None) + asset.physics.setdefault("properties", {}) + asset.physics.setdefault("source", None) + asset.physics.setdefault("confidence", None) + + asset.simulation["sim_ready"].setdefault("is_sim_ready", False) + asset.simulation["sim_ready"].setdefault("sim_ready_path", None) + asset.simulation.setdefault("blockers", []) + + def _simready_process(self, asset: Asset, asset_root: Path) -> None: + mesh_path = asset_root / asset.asset_data.get("path") + out_path = asset_root / "asset_simready" + + result = process_mesh( + mesh_path, + "asset", + extra_text=str(asset.ingest_info["extra_info"].get("simready_info", "")), + out_dir=out_path, + ) + print(result) + semantics_generated = {} + semantics_generated["object_name_generated"] = result["semantics_result"][ + "object_name" + ] + semantics_generated["semantic_tag_generated"] = result["semantics_result"][ + "semantic_tag" + ] + semantics_generated["description_generated"] = result["semantics_result"][ + "description" + ] + semantics_generated["primary_materials_generated"] = result["semantics_result"][ + "primary_materials" + ] + asset.semantics.update(semantics_generated) + delete_rendered_pngs(out_path) + asset.simulation["sim_ready"]["is_sim_ready"] = True + sim_ready_path = asset_root / "asset_simready" / "asset_simready.obj" + rel_path = sim_ready_path.relative_to(asset_root) + asset.simulation["sim_ready"]["sim_ready_path"] = str(rel_path) + return + + def _infer_physics(self, asset: Asset) -> None: + if asset.physics.get("mode"): + return + + description = ( + asset.semantics.get("description") + or asset.semantics.get("description_generated") + or "" + ).strip() + + try: + result = self._call_LLM(description) + + mode = result["mode"] + if mode not in ALLOWED_MODES: + raise ValueError(f"Invalid mode returned by LLM: {mode}") + + properties = result.get("properties") + if not isinstance(properties, dict): + raise ValueError("LLM returned non-dict properties") + + properties = self._validate_and_sanitize_properties(mode, properties) + + asset.physics["mode"] = mode + asset.physics["properties"] = { + "mode": mode, + "data": properties, + } + asset.physics["source"] = "generative" + asset.physics["confidence"] = float(result.get("confidence", 0.0)) + + except Exception: + mode = self._fallback_mode(asset) + asset.physics["mode"] = mode + asset.physics["properties"] = { + "mode": mode, + "data": self._default_properties(mode), + } + asset.physics["source"] = "default" + asset.physics["confidence"] = 0.0 + + def _call_LLM(self, description: str) -> Dict[str, Any]: + if not description: + raise ValueError("Missing semantics description for physics inference") + + user_prompt = f""" + Asset description: + {description} + + Infer the most plausible physics mode and physical properties for this asset. + + Hard constraints: + - Output EXACTLY one JSON object. + - Do not include markdown, comments, or any extra text. + - Do not invent fields. + - The returned properties object must match the selected mode exactly. + - Use real-world physical intuition. + - Prefer conservative, physically plausible values over aggressive or extreme values. + - If evidence for articulation is weak, prefer rigid. + """ + + resp = client.chat.completions.create( + model=DEPLOYMENT, + temperature=0.0, + response_format={"type": "json_object"}, + messages=[ + {"role": "system", "content": PHYSICS_SYSTEM_PROMPT}, + {"role": "user", "content": user_prompt}, + ], + ) + + content = resp.choices[0].message.content or "" + return extract_json(content) + + def _fallback_mode(self, asset: Asset) -> str: + if asset.asset_data.get("type") == "articulation": + return "articulation" + return "rigid" + + def _default_properties(self, mode: str) -> Dict[str, Any]: + if mode == "rigid": + return deepcopy(DEFAULT_RIGID_PHYSICS) + if mode == "softbody": + return deepcopy(DEFAULT_SOFTBODY_PHYSICS) + return {} + + def _validate_and_sanitize_properties( + self, mode: str, properties: Dict[str, Any] + ) -> Dict[str, Any]: + if mode == "rigid": + expected = set(RIGID_KEYS) + got = set(properties.keys()) + if got != expected: + print( + f"Rigid properties keys mismatch.\nExpected: {expected}\nGot: {got}" + ) + + out = deepcopy(DEFAULT_RIGID_PHYSICS) + for k in expected: + out[k] = properties[k] + + out["contact_offset"] = float(out["contact_offset"]) + out["rest_offset"] = float(out["rest_offset"]) + if out["contact_offset"] <= out["rest_offset"]: + out["contact_offset"] = max(out["rest_offset"] + 1e-4, 1e-4) + + out["mass"] = float(out["mass"]) + out["density"] = float(out["density"]) + out["linear_damping"] = float(out["linear_damping"]) + out["angular_damping"] = float(out["angular_damping"]) + out["dynamic_friction"] = float(out["dynamic_friction"]) + out["static_friction"] = float(out["static_friction"]) + out["restitution"] = float(out["restitution"]) + out["max_linear_velocity"] = float(out["max_linear_velocity"]) + out["max_angular_velocity"] = float(out["max_angular_velocity"]) + out["max_depenetration_velocity"] = float(out["max_depenetration_velocity"]) + out["solver_min_position_iters"] = int(out["solver_min_position_iters"]) + out["solver_min_velocity_iters"] = int(out["solver_min_velocity_iters"]) + out["sleep_threshold"] = float(out["sleep_threshold"]) + + return out + + if mode == "softbody": + expected = set(SOFT_KEYS) + got = set(properties.keys()) + if got != expected: + raise ValueError( + f"Softbody properties keys mismatch.\nExpected: {expected}\nGot: {got}" + ) + + out = deepcopy(DEFAULT_SOFTBODY_PHYSICS) + for k in expected: + out[k] = properties[k] + + out["triangle_remesh_resolution"] = int(out["triangle_remesh_resolution"]) + out["triangle_simplify_target"] = int(out["triangle_simplify_target"]) + out["maximal_edge_length"] = float(out["maximal_edge_length"]) + out["simulation_mesh_resolution"] = int(out["simulation_mesh_resolution"]) + out["simulation_mesh_output_obj"] = bool(out["simulation_mesh_output_obj"]) + + out["mass"] = float(out["mass"]) + out["density"] = float(out["density"]) + out["youngs_modulus"] = float(out["youngs_modulus"]) + out["poissons_ratio"] = float(out["poissons_ratio"]) + out["poissons_ratio"] = min(max(out["poissons_ratio"], 0.0), 0.49) + out["material_model"] = str(out["material_model"]) + out["elasticity_damping"] = float(out["elasticity_damping"]) + out["vertex_velocity_damping"] = float(out["vertex_velocity_damping"]) + out["linear_damping"] = float(out["linear_damping"]) + out["enable_ccd"] = bool(out["enable_ccd"]) + out["enable_self_collision"] = bool(out["enable_self_collision"]) + out["self_collision_stress_tolerance"] = float( + out["self_collision_stress_tolerance"] + ) + out["collision_mesh_simplification"] = bool( + out["collision_mesh_simplification"] + ) + out["self_collision_filter_distance"] = float( + out["self_collision_filter_distance"] + ) + out["has_gravity"] = bool(out["has_gravity"]) + out["max_velocity"] = float(out["max_velocity"]) + out["max_depenetration_velocity"] = float(out["max_depenetration_velocity"]) + out["sleep_threshold"] = float(out["sleep_threshold"]) + out["settling_threshold"] = float(out["settling_threshold"]) + out["settling_damping"] = float(out["settling_damping"]) + out["solver_min_position_iters"] = int(out["solver_min_position_iters"]) + out["solver_min_velocity_iters"] = int(out["solver_min_velocity_iters"]) + + return out + + if properties and not isinstance(properties, dict): + raise ValueError("Articulation properties must be a dict") + return properties or {} + + def _ensure_properties(self, asset: Asset) -> None: + props = asset.physics.get("properties", {}) + if not props or not props.get("data"): + mode = asset.physics.get("mode") + asset.physics["properties"] = { + "mode": mode, + "data": self._default_properties(mode), + } + asset.physics["source"] = "default" + + def _update_simulation_status(self, asset: Asset) -> None: + blockers: List[str] = [] + + if not asset.physics.get("mode"): + blockers.append("missing_physics_mode") + + props = asset.physics.get("properties", {}) + if not props.get("data"): + blockers.append("missing_physics_properties") + + asset.simulation["blockers"] = blockers + # asset.simulation["sim_ready"] = len(blockers) == 0 diff --git a/embodichain/toolkits/simready_pipeline/parser/usd.py b/embodichain/toolkits/simready_pipeline/parser/usd.py new file mode 100644 index 00000000..69c86657 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/parser/usd.py @@ -0,0 +1,146 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from pathlib import Path +from typing import Any, Dict + +import numpy as np +import trimesh +from embodichain.toolkits.simready_pipeline.parser.base import AssetParser +from embodichain.toolkits.simready_pipeline.core.asset import Asset +from embodichain.toolkits.simready_pipeline.utils.usd_utils import ( + convert_model_to_usd, + DEFAULT_PHYSICS_PARAMS, +) + + +class UsdParser(AssetParser): + + name = "usd" + + def __init__(self): + super().__init__() + self.physics_properties = {} + + def build_physics(self, asset: Asset) -> Dict[str, Any]: + + if not isinstance(asset.physics, dict): + raise ValueError("asset.physics must be a dict") + + physics_block = asset.physics + + if "properties" not in physics_block: + raise KeyError("asset.physics missing 'properties'") + + props_block = physics_block["properties"] + + if not isinstance(props_block, dict): + raise ValueError("asset.physics['properties'] must be dict") + + if "data" not in props_block: + raise KeyError("asset.physics['properties'] missing 'data'") + + data_block = props_block["data"] + + if not isinstance(data_block, dict): + raise ValueError("asset.physics['properties']['data'] must be dict") + + # Required numeric physics keys used by USD pipeline + required_keys = [ + "mass", + "density", + "static_friction", + "dynamic_friction", + "restitution", + "linear_damping", + "angular_damping", + ] + + # Merge provided data with defaults so missing keys are filled with safe defaults + merged_data = DEFAULT_PHYSICS_PARAMS.copy() + # data_block may contain a subset of params; update defaults with provided values + merged_data.update({k: v for k, v in data_block.items() if v is not None}) + + # Report any keys that were missing and therefore filled from defaults + missing = [k for k in required_keys if k not in data_block] + if missing: + print( + f"[Warning] Missing physics keys {missing}; using DEFAULT_PHYSICS_PARAMS for those values." + ) + + # Validate numeric types for required numeric keys + for k in required_keys: + if k not in merged_data: + # This should not happen because DEFAULT_PHYSICS_PARAMS contains these keys + raise KeyError( + f"Missing required physics parameter even after merging defaults: {k}" + ) + if not isinstance(merged_data[k], (int, float)): + raise TypeError( + f"Physics param '{k}' must be numeric, got {type(merged_data[k])}" + ) + + # Use merged_data going forward + data_block = merged_data + + self.physics_properties = { + "mode": physics_block["mode"], + "source": physics_block.get("source"), + "confidence": physics_block.get("confidence"), + "properties": { + "mode": props_block["mode"], + "data": data_block, + }, + } + + return self.physics_properties + + def parse(self, asset: Asset, asset_root: Path) -> None: + asset.usd.setdefault("is_usd", False) + asset.usd.setdefault("usd_path", "") + if asset.asset_data.get("type") != "mesh": + asset.usd.update({"asset dont have a mesh": "skipped"}) + return + + mesh_path_ori = asset_root / asset.asset_data.get("path") + mesh_path_sr = asset_root / "asset_simready" / "asset_simready.obj" + mesh_path = ( + mesh_path_sr + if mesh_path_sr.exists() + else mesh_path_ori if mesh_path_ori.exists() else None + ) + out_path = asset_root / "asset_usd" + self.build_physics(asset) + convert_model_to_usd( + mesh_path, + out_path, + physics_params=self.physics_properties["properties"]["data"], + ) + usd_file = out_path / "asset_simready_inst.usdc" + usd_path_str = "" + if usd_file.exists(): + try: + usd_path_str = str(usd_file.relative_to(asset_root)) + except Exception: + usd_path_str = str(usd_file) + + asset.usd.update( + { + "is_usd": True, + "usd_path": usd_path_str, + } + ) + return diff --git a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py new file mode 100644 index 00000000..4d323f7c --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py @@ -0,0 +1,154 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from pathlib import Path +import json +import os +import shutil +import subprocess +import sys +import tempfile +from typing import Iterable, Optional + +from embodichain.toolkits.simready_pipeline.core.asset import Asset +from embodichain.toolkits.simready_pipeline.utils.ingest_utils import ( + new_uuid, + trimesh_parse_ingest, + blender_parser_ingest, + inject_semantic_from_config, + inject_user_extra_info, +) +from embodichain.toolkits.simready_pipeline.io.json_store import JsonStore +from embodichain.toolkits.simready_pipeline.parser.base import ParserManager + + +def _load_ingest_config() -> dict: + config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json" + with config_path.open("r", encoding="utf-8") as f: + return json.load(f).get("ingest", {}) + + +INGEST_CONFIG = _load_ingest_config() +CANOCAIL_ASSET_NAME = INGEST_CONFIG.get("canonical_asset_name", "asset.obj") +CANOCAIL_TEXTURE_NAME = INGEST_CONFIG.get("canonical_texture_name", "") +UNPROCESSED_FORMATS = INGEST_CONFIG.get( + "unprocessed_formats", [".urdf", ".usd"] +) # 当前先复制,后续可以考虑解析 +PARSEABLE_MESH_FORMATS = INGEST_CONFIG.get( + "parseable_mesh_formats", [".glb", ".gltf", ".obj", ".ply", ".stl"] +) # 主流的需要处理的格式 + +tex_size: int = int(INGEST_CONFIG.get("blender_texture_size", 2048)) +png_name: str = INGEST_CONFIG.get("blender_texture_name", "surface_texture.png") + + +def ingest_one_asset( + asset_dir: str | Path, + category: str, + output_root: Path, + store: JsonStore, + manager: ParserManager, + simple_ingest: bool = True, +) -> Optional[Asset]: + + asset_dir = Path(asset_dir) # source path + + output_root = Path(output_root) + output_root.mkdir(parents=True, exist_ok=True) + + asset_id = "assets" + asset_root = output_root / asset_id + asset_root.mkdir(parents=True, exist_ok=False) + + asset_source = asset_root / "asset_source" + asset_archive = asset_root / "asset_archive" + + files = [p for p in asset_dir.iterdir() if p.is_file()] + file_suffixes = {p.suffix.lower() for p in files} + + has_unprocessed_format = any( + suffix in file_suffixes for suffix in UNPROCESSED_FORMATS + ) + + archive_dst = asset_archive / asset_dir.name + if archive_dst.exists(): + raise RuntimeError(f"Archive destination already exists: {archive_dst}") + shutil.copytree(asset_dir, archive_dst) + + def find_first_mesh_file(files, formats): + for suffix in formats: + candidates = sorted(p for p in files if p.suffix.lower() == suffix) + if candidates: + return candidates[0] + return RuntimeError("No Vailed Mesh File") + + if has_unprocessed_format: + source_file = None + ingest_mode = "direct_copy" + asset_name = asset_dir.stem + visual_info = None + else: + source_file = find_first_mesh_file(files, PARSEABLE_MESH_FORMATS) + asset_name = source_file.stem if source_file else None + ingest_mode = "unified" + if simple_ingest: + visual_info = trimesh_parse_ingest( + source_file, + asset_source, + obj_name=CANOCAIL_ASSET_NAME, + mtl_name=Path(CANOCAIL_ASSET_NAME).with_suffix(".mtl").name, + ) + else: + visual_info = blender_parser_ingest( + source_file, + asset_source, + texture_size=tex_size, + png_name=png_name, + obj_name=CANOCAIL_ASSET_NAME, + ) + + asset = Asset( + asset_id=asset_id, + identity={ + "name": asset_name, + "source_dir": asset_dir.name, + "category": category, + "ingest_mode": ingest_mode, + }, + parsed={"visual": visual_info}, + ) + asset.status["ingested"] = True + asset.status.setdefault("parsed", False) + asset.status.setdefault("validated", False) + + if ingest_mode == "direct_copy": + shutil.copytree(asset_dir, asset_source) + asset.identity["normalized_source"] = "raw_copy" + asset.identity["source_file"] = None + asset.identity["source_type"] = "direct_copy" + store.save_asset(asset) + return asset # no parser + else: + asset.identity["source_file"] = source_file.name + asset.identity["source_type"] = source_file.suffix.lower() + asset.identity["normalized_source"] = CANOCAIL_ASSET_NAME + + inject_semantic_from_config(asset_dir, asset) + inject_user_extra_info(asset_dir, asset) + manager.parse(asset, asset_root) + store.save_asset(asset) + + return asset diff --git a/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py b/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py new file mode 100644 index 00000000..98b6f145 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py @@ -0,0 +1,194 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +import bpy +from pathlib import Path + + +def clear_scene(): + bpy.ops.object.select_all(action="SELECT") + bpy.ops.object.delete(use_global=False, confirm=False) + + for block in ( + bpy.data.meshes, + bpy.data.materials, + bpy.data.images, + bpy.data.collections, + ): + for item in list(block): + try: + block.remove(item) + except: + pass + + +def load_obj(filepath): + bpy.ops.wm.obj_import(filepath=str(filepath)) + objs = [o for o in bpy.context.scene.objects if o.type == "MESH"] + return objs + + +def join_meshes(objs): + if not objs: + raise RuntimeError("No mesh objects to join.") + + bpy.ops.object.select_all(action="DESELECT") + for o in objs: + o.select_set(True) + + bpy.context.view_layer.objects.active = objs[0] + bpy.ops.object.join() + return bpy.context.active_object + + +def decimate_optimized(obj, ratio: float = 0.5, weld_distance: float = 0.0001): + + bpy.context.view_layer.objects.active = obj + + if obj.mode != "OBJECT": + bpy.ops.object.mode_set(mode="OBJECT") + + # 1) Weld + # weld_mod = obj.modifiers.new(name="Weld", type="WELD") + # weld_mod.merge_threshold = weld_distance + # bpy.ops.object.modifier_apply(modifier=weld_mod.name) + # bpy.ops.object.mode_set(mode="EDIT") + # bpy.ops.mesh.select_all(action="SELECT") + + # bpy.ops.mesh.normals_make_consistent(inside=False) + # bpy.ops.mesh.customdata_custom_splitnormals_clear() + + # bpy.ops.object.mode_set(mode="OBJECT") + + # 2) remove loose + bpy.ops.object.mode_set(mode="EDIT") + bpy.ops.mesh.select_all(action="DESELECT") + bpy.ops.mesh.select_loose() + bpy.ops.mesh.delete(type="VERT") + bpy.ops.object.mode_set(mode="OBJECT") + + # 3) decimate + print(f"Simplifying mesh (Ratio: {ratio})...") + decimate_mod = obj.modifiers.new(name="Decimate", type="DECIMATE") + decimate_mod.ratio = ratio + decimate_mod.use_collapse_triangulate = True + bpy.ops.object.modifier_apply(modifier=decimate_mod.name) + + # 4) post clean + bpy.ops.object.mode_set(mode="EDIT") + bpy.ops.mesh.select_all(action="SELECT") + bpy.ops.mesh.remove_doubles(threshold=weld_distance) + bpy.ops.mesh.delete_loose() + bpy.ops.object.mode_set(mode="OBJECT") + + print( + f"[Info] Optimized state: Vertices {len(obj.data.vertices)}, Faces {len(obj.data.polygons)}" + ) + + return obj + + +def clean_mesh(obj, merge_dist=1e-5, remove_non_manifold=True, triangulate=False): + bpy.context.view_layer.objects.active = obj + + if obj.mode != "OBJECT": + bpy.ops.object.mode_set(mode="OBJECT") + + bpy.ops.object.mode_set(mode="EDIT") + bpy.ops.mesh.select_all(action="SELECT") + + bpy.ops.mesh.remove_doubles(threshold=merge_dist) + + bpy.ops.mesh.delete_loose() + + bpy.ops.mesh.dissolve_degenerate() + + bpy.ops.mesh.normals_make_consistent(inside=False) + + if remove_non_manifold: + bpy.ops.mesh.select_all(action="DESELECT") + bpy.ops.mesh.select_non_manifold() + bpy.ops.mesh.delete(type="VERT") + + bpy.ops.mesh.select_all(action="SELECT") + bpy.ops.mesh.remove_doubles(threshold=merge_dist) + bpy.ops.mesh.delete_loose() + + if triangulate: + bpy.ops.mesh.quads_convert_to_tris() + + bpy.ops.object.mode_set(mode="OBJECT") + return obj + + +def fill_holes(obj, max_sides=8): + bpy.context.view_layer.objects.active = obj + + if obj.mode != "OBJECT": + bpy.ops.object.mode_set(mode="OBJECT") + + bpy.ops.object.mode_set(mode="EDIT") + bpy.ops.mesh.select_all(action="SELECT") + + bpy.ops.mesh.fill_holes(sides=max_sides) + + bpy.ops.mesh.beautify_fill() + bpy.ops.mesh.dissolve_degenerate() + bpy.ops.mesh.normals_make_consistent(inside=False) + + bpy.ops.object.mode_set(mode="OBJECT") + return obj + + +def export_obj(obj, out_path): + bpy.ops.object.select_all(action="DESELECT") + obj.select_set(True) + bpy.context.view_layer.objects.active = obj + + bpy.ops.wm.obj_export(filepath=str(out_path), export_selected_objects=True) + + +def process_obj( + input_path, + output_path, + ratio=0.5, + weld_distance=0.0001, + merge_dist=1e-5, + remove_non_manifold=True, + triangulate=False, + fill_hole_sides=8, +): + clear_scene() + objs = load_obj(input_path) + if not objs: + raise RuntimeError("No mesh objects imported.") + + obj = join_meshes(objs) + + bpy.context.view_layer.objects.active = obj + bpy.ops.object.transform_apply(location=True, rotation=True, scale=True) + + clean_mesh( + obj, + merge_dist=merge_dist, + remove_non_manifold=remove_non_manifold, + triangulate=triangulate, + ) + decimate_optimized(obj, ratio=ratio, weld_distance=weld_distance) + # fill_holes(obj, max_sides=fill_hole_sides) + + export_obj(obj, output_path) + print("Clean mesh saved to:", output_path) diff --git a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py b/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py new file mode 100644 index 00000000..22c70094 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py @@ -0,0 +1,447 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +import uuid +import trimesh +import json +from pathlib import Path +from typing import Union, Dict, Any +from embodichain.toolkits.simready_pipeline.utils.texture_utils import classify_visual +import hashlib +import os +from embodichain.toolkits.simready_pipeline.core.asset import Asset + + +def new_uuid() -> str: + return uuid.uuid4().hex + + +def compute_folder_sha256(folder_path: Union[str, Path]) -> str: + + folder_path = Path(folder_path).resolve() + + if not folder_path.is_dir(): + raise ValueError(f"Path {folder_path} is not a valid directory.") + + sha256_hash = hashlib.sha256() + + all_files = [] + for root, dirs, files in os.walk(folder_path): + dirs.sort() + files.sort() + for file_name in files: + file_path = Path(root) / file_name + relative_path = file_path.relative_to(folder_path) + all_files.append(relative_path) + + for rel_path in sorted(all_files): + full_path = folder_path / rel_path + sha256_hash.update(str(rel_path).encode("utf-8")) + with open(full_path, "rb") as f: + for byte_block in iter(lambda: f.read(65536), b""): + sha256_hash.update(byte_block) + + return sha256_hash.hexdigest() + + +def inject_semantic_from_config(asset_source: Path, asset: Asset) -> None: + + config_path = asset_source / "config.json" + + if not config_path.exists(): + print(f"[INFO] No config.json found at {config_path}") + return + try: + with open(config_path, "r", encoding="utf-8") as f: + config: Dict[str, Any] = json.load(f) + except Exception as e: + print(f"[WARN] Failed to read config.json: {e}") + return + + semantic = config.get("semantic") + if not semantic: + print("[INFO] No semantic field in config.json") + return + + asset.semantics.setdefault("tags", []) + asset.semantics.setdefault("description", None) + + if "tags" in semantic and isinstance(semantic["tags"], list): + existing_tags = set(asset.semantics.get("tags", [])) + new_tags = set(semantic["tags"]) + asset.semantics["tags"] = list(existing_tags | new_tags) + + if "description" in semantic and semantic["description"]: + if not asset.semantics.get("description"): + asset.semantics["description"] = semantic["description"] + + print(f"[INFO] Injected semantic from {config_path}") + + +def inject_user_extra_info(asset_source: Path, asset: Asset) -> None: + + config_path = asset_source / "config.json" + asset.ingest_info.setdefault("extra_info", {}) + if not config_path.exists(): + print(f"[INFO] No config.json found at {config_path}") + return + try: + with open(config_path, "r", encoding="utf-8") as f: + config: Dict[str, Any] = json.load(f) + except Exception as e: + print(f"[WARN] Failed to read config.json: {e}") + return + + extra_info = config.get("extra_info") + if not extra_info: + print("[INFO] No extra_info field in config.json") + return + + asset.ingest_info["extra_info"].update(extra_info) + + print(f"[INFO] Injected extra_info from {config_path}") + + +def load_one_trimesh( + path: str, +) -> Union[ + trimesh.Trimesh, None +]: # 可能是个scene,但是我们只处理scene中的第一个geometry,如果有多个mesh,复合起来需要下一个版本 + try: + mesh_or_scene = trimesh.load_mesh(path) + if isinstance(mesh_or_scene, trimesh.Scene): + if len(mesh_or_scene.geometry) == 0: + print(f"No geometry found in Scene: {path}") + return None + first_mesh = list(mesh_or_scene.geometry.values())[0] + return first_mesh + if isinstance(mesh_or_scene, trimesh.Trimesh): + return mesh_or_scene + print(f"Unexpected type: {type(mesh_or_scene)}") + return None + + except Exception as e: + print(f"Failed to load {path}: {e}") + return None + + +def trimesh_parse_ingest( + source_file: Path, + asset_source: Path, + obj_name: str = "asset.obj", + mtl_name: str = "asset.mtl", +): + mesh = load_one_trimesh(source_file) + if mesh is None: + return None + + texture_info = classify_visual(mesh) + visual_category = texture_info.get("visual_category") + material_kind = texture_info.get("material_kind") + textures = texture_info.get("material", {}).get("textures", {}) + uv_present = texture_info.get("uv_present") + + visual = { + "visual_category": visual_category, + "uv_present": uv_present, + "texture_count_total": texture_info.get("texture_count_total"), + "material_kind": material_kind, + "textures": textures, + } + visual_ingest = None + asset_source = Path(asset_source) + asset_source.mkdir(parents=True, exist_ok=True) + obj_path = asset_source / obj_name + + # ========= CASE 1: no visual ========= + if visual_category == "None": + print("[INFO] No visual → assign default gray") + + mesh.visual = trimesh.visual.ColorVisuals( + mesh, face_colors=[128, 128, 128, 255] + ) + visual_ingest = "no visual" + + # ========= CASE 2: color ========= + elif visual_category in ["color_face", "color_vertex"]: + print("[INFO] Vertex/Face color → export directly") + visual_ingest = "Color Visual" + + # ========= CASE 3: texture ========= + elif visual_category == "texture": + + vis = mesh.visual + + if not uv_present: + visual_ingest = "no UV! But detected as Visual.Texture" + print("[WARN] texture but no UV → export raw") + + else: + # ---------- PBR ---------- + if material_kind == "pbr": + print("[WARN] PBR → only baseColorTexture will be used") + + base_tex = textures.get("baseColorTexture", {}) + + if base_tex.get("present"): + base_img = vis.material.baseColorTexture + + simple_mat = trimesh.visual.material.SimpleMaterial(image=base_img) + + mesh.visual = trimesh.visual.texture.TextureVisuals( + uv=vis.uv, image=base_img, material=simple_mat + ) + visual_ingest = "Basecolor Texture from PBR as Visual" + else: + print("[WARN] No baseColorTexture → fallback raw") + + # ---------- Simple ---------- + else: + visual_ingest = "Simple Texture" + print("[INFO] Simple texture → use directly") + + else: + print("[WARN] Unknown visual type → export raw") + + obj_str, tex_dict = trimesh.exchange.obj.export_obj( + mesh, + include_normals=True, + include_color=True, + include_texture=True, + return_texture=True, + write_texture=False, + mtl_name=mtl_name, + ) + + # ===== 写 OBJ ===== + with open(obj_path, "w") as f: + f.write(obj_str) + + # ===== 写 texture / mtl ===== + for name, data in tex_dict.items(): + file_path = asset_source / name + + if not file_path.exists(): + with open(file_path, "wb") as f: + f.write(data) + + return {"visual_ingest": visual_ingest, "visual_source": visual} + + +import bpy + + +def clear_scene(): + bpy.ops.object.select_all(action="SELECT") + bpy.ops.object.delete(use_global=False, confirm=False) + for block in ( + bpy.data.meshes, + bpy.data.materials, + bpy.data.images, + bpy.data.collections, + ): + for item in list(block): + try: + block.remove(item) + except: + pass + + +def import_model(path: Path): + ext = path.suffix.lower() + + if ext == ".obj": + bpy.ops.wm.obj_import(filepath=str(path)) + elif ext in [".fbx"]: + bpy.ops.import_scene.fbx(filepath=str(path)) + elif ext in [".gltf", ".glb"]: + bpy.ops.import_scene.gltf(filepath=str(path)) + elif ext in [".ply"]: + bpy.ops.wm.ply_import(filepath=str(path)) + else: + raise RuntimeError(f"Unsupported extension: {ext}") + + imported = [o for o in bpy.context.scene.objects if o.type == "MESH"] + return imported + + +def setup_studio_lighting(): + scene = bpy.context.scene + scene.render.engine = "CYCLES" + cycles = scene.cycles + cycles.samples = 128 + cycles.use_adaptive_sampling = True + + world = scene.world or bpy.data.worlds.new("World") + scene.world = world + world.use_nodes = True + nodes = world.node_tree.nodes + nodes.clear() + + bg = nodes.new(type="ShaderNodeBackground") + bg.inputs["Color"].default_value = (0.8, 0.8, 0.8, 1.0) + out = nodes.new(type="ShaderNodeOutputWorld") + world.node_tree.links.new(bg.outputs["Background"], out.inputs["Surface"]) + + +def duplicate_and_join(objs, name="BAKE_MESH"): + if not objs: + return None + bpy.ops.object.select_all(action="DESELECT") + for o in objs: + o.select_set(True) + bpy.context.view_layer.objects.active = objs[0] + bpy.ops.object.duplicate() + dupes = [o for o in bpy.context.selected_objects if o.type == "MESH"] + bpy.context.view_layer.objects.active = dupes[0] + bpy.ops.object.join() + joined = bpy.context.active_object + joined.name = name + return joined + + +def ensure_uv(obj): + me = obj.data + if len(me.uv_layers) == 0: + bpy.context.view_layer.objects.active = obj + bpy.ops.object.mode_set(mode="EDIT") + bpy.ops.mesh.select_all(action="SELECT") + bpy.ops.uv.smart_project(angle_limit=66.0, island_margin=0.02) + bpy.ops.object.mode_set(mode="OBJECT") + + +def get_vertex_color_layer(obj): + me = obj.data + if hasattr(me, "color_attributes") and len(me.color_attributes) > 0: + return me.color_attributes.active_color.name + return None + + +def inject_vertex_color_to_material(mat, vcol_name): + if not mat.use_nodes: + mat.use_nodes = True + nodes = mat.node_tree.nodes + links = mat.node_tree.links + + pnode = next((n for n in nodes if n.type == "BSDF_PRINCIPLED"), None) + if not pnode: + pnode = nodes.new(type="ShaderNodeBsdfPrincipled") + + attr = nodes.new(type="ShaderNodeAttribute") + attr.attribute_name = vcol_name + links.new(attr.outputs["Color"], pnode.inputs["Base Color"]) + + +def add_bake_image_node(mat, image): + if not mat.use_nodes: + mat.use_nodes = True + nodes = mat.node_tree.nodes + + img_node = nodes.new(type="ShaderNodeTexImage") + img_node.image = image + img_node.name = "BAKE_TARGET" + + nodes.active = img_node + img_node.select = True + return img_node + + +def create_baked_material_assign(obj, image, mat_name="BAKED_SURFACE_MAT"): + mat = bpy.data.materials.new(name=mat_name) + mat.use_nodes = True + nodes = mat.node_tree.nodes + nodes.clear() + + img_node = nodes.new(type="ShaderNodeTexImage") + img_node.image = image + bsdf = nodes.new(type="ShaderNodeBsdfPrincipled") + out = nodes.new(type="ShaderNodeOutputMaterial") + + mat.node_tree.links.new(img_node.outputs["Color"], bsdf.inputs["Base Color"]) + mat.node_tree.links.new(bsdf.outputs["BSDF"], out.inputs["Surface"]) + + if obj.data.materials: + obj.data.materials[0] = mat + else: + obj.data.materials.append(mat) + return mat + + +# ------------------------- +# Main bake routine +# ------------------------- +def blender_parser_ingest( + source_file: Path, + asset_source: Path, + texture_size=2048, + png_name="surface_texture.png", + obj_name="asset.obj", +): + asset_source.mkdir(parents=True, exist_ok=True) + png_path = asset_source / png_name + + clear_scene() + imported = import_model(source_file) + if not imported: + raise RuntimeError("No mesh objects found after import.") + + setup_studio_lighting() + joined = duplicate_and_join(imported, name="BAKE_MESH") + + bpy.context.view_layer.objects.active = joined + bpy.ops.object.transform_apply(location=True, rotation=True, scale=True) + ensure_uv(joined) + + vcol_name = get_vertex_color_layer(joined) + + img_name = Path(png_name).stem + bake_image = bpy.data.images.new( + img_name, width=int(texture_size), height=int(texture_size) + ) + + if not joined.data.materials: + tmp_mat = bpy.data.materials.new(name="Bake_Temp_Material") + joined.data.materials.append(tmp_mat) + + for slot in joined.material_slots: + if slot.material: + if vcol_name: + inject_vertex_color_to_material(slot.material, vcol_name) + add_bake_image_node(slot.material, bake_image) + + bpy.context.scene.render.engine = "CYCLES" + bpy.ops.object.select_all(action="DESELECT") + joined.select_set(True) + bpy.context.view_layer.objects.active = joined + + print("Baking...") + bpy.ops.object.bake( + type="DIFFUSE", + pass_filter={"COLOR"}, + use_clear=True, + use_selected_to_active=False, + margin=16, + ) + + bake_image.filepath_raw = str(png_path) + bake_image.save() + + create_baked_material_assign(joined, bake_image) + + out_obj = asset_source / obj_name + bpy.ops.wm.obj_export(filepath=str(out_obj), export_selected_objects=True) + + return {"png": str(png_path), "obj": str(out_obj), "mtl": "asset.mtl"} diff --git a/embodichain/toolkits/simready_pipeline/utils/simready_utils.py b/embodichain/toolkits/simready_pipeline/utils/simready_utils.py new file mode 100644 index 00000000..0c5767b5 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/utils/simready_utils.py @@ -0,0 +1,1722 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +import argparse +import base64 +import json +import re +from pathlib import Path +import numpy as np +import trimesh +import pyrender +from PIL import Image +from openai import AzureOpenAI +import itertools +from scipy.spatial import ConvexHull +from typing import Dict, Any, List +from urllib.parse import urlsplit, urlunsplit + + +def _load_gen_config() -> Dict[str, Any]: + config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json" + if not config_path.exists(): + raise FileNotFoundError(f"gen_config.json not found: {config_path}") + + with config_path.open("r", encoding="utf-8") as f: + cfg = json.load(f) + + cfg = cfg.get("llm", {}).get("azure_openai", {}) + cfg.setdefault("api_version", "2024-02-15-preview") + + required = ["api_key", "model", "base_url", "api_version"] + missing = [k for k in required if k not in cfg or not cfg[k]] + if missing: + raise ValueError(f"Missing required config keys: {missing}") + + return cfg + + +def _normalize_azure_endpoint(base_url: str) -> str: + parsed = urlsplit(base_url) + path = parsed.path + + if "/openai/deployments/" in path: + path = path.split("/openai/deployments/")[0] + elif path.endswith("/chat/completions"): + path = path[: -len("/chat/completions")] + + return urlunsplit((parsed.scheme, parsed.netloc, path.rstrip("/"), "", "")) + + +_GEN_CONFIG = _load_gen_config() + +DEPLOYMENT = _GEN_CONFIG["model"] + +AZURE_ENDPOINT = _normalize_azure_endpoint(_GEN_CONFIG["base_url"]) + +client = AzureOpenAI( + api_key=_GEN_CONFIG["api_key"], + api_version=_GEN_CONFIG["api_version"], + azure_endpoint=AZURE_ENDPOINT, +) + +STRATEGY = None + +diagonal_views = [ + ("view_from_111", np.array([1.3, 1.3, 1.3], dtype=float)), + ("view_from_000", np.array([-0.8, -0.8, -0.8], dtype=float)), +] +cardinal_views = [ + ("view_from_front", np.array([1.8, 0.5, 0.5], dtype=float)), + ("view_from_left", np.array([0.5, -1.8, 0.5], dtype=float)), + ("view_from_right", np.array([0.5, 1.8, 0.5], dtype=float)), + ("view_from_back", np.array([-1.8, 0.5, 0.5], dtype=float)), +] +up_down_views = [ + ("view_from_up_to_bottom", np.array([0.5, 0.5, 2.2], dtype=float)), + ("view_from_bottom_to_up", np.array([0.5, 0.5, -1.2], dtype=float)), +] + +up_views = [ + ("view_from_up_to_bottom", np.array([0.5, 0.5, 2.2], dtype=float)), +] + +down_views = [ + ("view_from_bottom_to_up", np.array([0.5, 0.5, -1.2], dtype=float)), +] + +front_views = [ + ("view_from_front", np.array([1.8, 0.5, 0.5], dtype=float)), +] + +side_profile = [ + ("view_from_up_to_bottom", np.array([0.5, 0.5, 2.2], dtype=float)), + ("view_from_front", np.array([1.8, 0.5, 0.5], dtype=float)), +] + + +def normalize_to_unit_cube(mesh): + minb, maxb = mesh.bounds + size = maxb - minb + size = np.maximum(size, 1e-8) + scale = 1.0 / np.max(size) + mesh.apply_scale(scale) + minb_scaled, maxb_scaled = mesh.bounds + center_scaled = (minb_scaled + maxb_scaled) / 2 + translation = np.array([0.5, 0.5, 0.5]) - center_scaled + mesh.apply_translation(translation) + + +def compute_support_area(mesh, eps=1e-2): + z_min = mesh.bounds[0][2] + verts = np.asarray(mesh.vertices) + mask = np.abs(verts[:, 2] - z_min) < eps + pts = verts[mask][:, :2] + if len(pts) < 3: + return 0.0 + try: + hull = ConvexHull(pts) + return hull.volume + except Exception: + return 0.0 + + +# def init_pose(mesh_input): +# fallback_mesh = None +# mesh: trimesh.Trimesh = None +# if isinstance(mesh_input, trimesh.Trimesh): +# mesh = mesh_input +# fallback_mesh = mesh_input.copy() +# else: +# mesh_path = Path(mesh_input).resolve() +# if not mesh_path.exists(): +# raise FileNotFoundError(f"Mesh file not found: {mesh_path}") +# mesh = trimesh.load(mesh_path, force="mesh") +# fallback_mesh = mesh.copy() + +# def compute_pca_axes(mesh): +# verts = np.asarray(mesh.vertices) +# centroid = verts.mean(axis=0) +# centered = verts - centroid +# cov = np.cov(centered.T) +# U, _, _ = np.linalg.svd(cov) +# R = U +# if np.linalg.det(R) < 0: +# R[:, 2] *= -1 +# return R + +# def closest_axis(v): +# idx = np.argmax(np.abs(v)) +# sign = np.sign(v[idx]) +# axis = np.zeros(3) +# axis[idx] = sign +# return axis + +# def generate_discrete_flips(): +# rotations = [] +# Rx90 = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) + +# Ry90 = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]]) + +# I = np.eye(3) +# rotations.append(I) +# Rx180 = np.dot(Rx90, Rx90) +# rotations.append(Rx180) +# rotations.append(Rx90) +# Rx_neg90 = Rx90.T +# rotations.append(Rx_neg90) +# rotations.append(Ry90) +# Ry_neg90 = Ry90.T +# rotations.append(Ry_neg90) +# return rotations + +# def _process_and_score(initial_mesh, alignment_type: str): +# m = initial_mesh.copy() +# if alignment_type == "pca": +# R_pca = compute_pca_axes(m) +# T = np.eye(4) +# T[:3, :3] = R_pca.T +# m.apply_transform(T) +# U = compute_pca_axes(m) +# x_axis, y_axis, z_axis = U[:, 0], U[:, 1], U[:, 2] +# new_x = closest_axis(x_axis) +# new_y = closest_axis(y_axis) +# new_z = closest_axis(z_axis) +# new_z /= np.linalg.norm(new_z) +# new_x = new_x - new_z * np.dot(new_x, new_z) +# new_x /= np.linalg.norm(new_x) +# new_y = np.cross(new_z, new_x) +# R_snap = np.column_stack((new_x, new_y, new_z)) +# T_snap = np.eye(4) +# T_snap[:3, :3] = R_snap +# m.apply_transform(T_snap) + +# elif alignment_type == "obb": +# to_origin, _ = trimesh.bounds.oriented_bounds(m) +# m.apply_transform(to_origin) +# R = to_origin[:3, :3] +# if np.linalg.det(R) < 0: +# fix = np.eye(4) +# fix[2, 2] = -1 +# m.apply_transform(fix) +# else: +# raise ValueError(f"Unknown alignment type: {alignment_type}") + +# best_score = float("inf") +# best_mesh = None + +# for R_flip in generate_discrete_flips(): +# m_candidate = m.copy() + +# T_flip = np.eye(4) +# T_flip[:3, :3] = R_flip +# m_candidate.apply_transform(T_flip) + +# z_min = m_candidate.bounds[0][2] +# m_candidate.apply_translation([0, 0, -z_min]) + +# area = compute_support_area(m_candidate) +# height = m.bounds[1][2] - m.bounds[0][2] +# score = height - 0.1 * area + +# if score < best_score: +# best_score = score +# best_mesh = m_candidate + +# return best_mesh, best_score + +# try: +# mesh_original = mesh.copy() +# final_mesh_pca, score_pca = _process_and_score(mesh_original, "pca") +# final_mesh_obb, score_obb = _process_and_score(mesh_original, "obb") +# if score_pca <= score_obb: +# print( +# f"Selected PCA alignment (Score: {score_pca:.4f} vs OBB: {score_obb:.4f})" +# ) +# result_mesh = final_mesh_pca +# STRATEGY = "PCA" +# else: +# print( +# f"Selected OBB alignment (Score: {score_obb:.4f} vs PCA: {score_pca:.4f})" +# ) +# result_mesh = final_mesh_obb +# STRATEGY = "OBB" + +# normalize_to_unit_cube(result_mesh) +# return result_mesh + +# except Exception as e: +# print(f"Alignment failed, fallback. Error: {e}") +# return fallback_mesh + + +import numpy as np +import trimesh +from pathlib import Path + + +def init_pose(mesh_input): + + fallback_mesh = None + mesh: trimesh.Trimesh = None + + if isinstance(mesh_input, trimesh.Trimesh): + mesh = mesh_input.copy() + fallback_mesh = mesh_input.copy() + else: + mesh_path = Path(mesh_input).resolve() + if not mesh_path.exists(): + raise FileNotFoundError(f"Mesh file not found: {mesh_path}") + mesh = trimesh.load(mesh_path, force="mesh") + fallback_mesh = mesh.copy() + + def compute_pca_axes(mesh): + verts = np.asarray(mesh.vertices) + centroid = verts.mean(axis=0) + centered = verts - centroid + cov = np.cov(centered.T) + U, _, _ = np.linalg.svd(cov) + R = U + if np.linalg.det(R) < 0: + R[:, 2] *= -1 + return R + + def closest_axis(v): + idx = np.argmax(np.abs(v)) + sign = np.sign(v[idx]) + axis = np.zeros(3) + axis[idx] = sign + return axis + + def generate_discrete_flips(): + rotations = [] + Rx90 = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) + Ry90 = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]]) + I = np.eye(3) + rotations.append(I) + Rx180 = Rx90 @ Rx90 + rotations.append(Rx180) + rotations.append(Rx90) + Rx_neg90 = Rx90.T + rotations.append(Rx_neg90) + rotations.append(Ry90) + Ry_neg90 = Ry90.T + rotations.append(Ry_neg90) + return rotations + + def compute_support_area(mesh): + hull = trimesh.convex.convex_hull(mesh) + support_poly = hull.project(plane=[0, 0, 1], origin=[0, 0, 0]) + return support_poly.area + + def stability_score(mesh): + area = compute_support_area(mesh) + com_z = mesh.center_mass[2] + return -(area / (com_z + 1e-6)) + + def normalize_to_unit_cube(mesh): + extents = mesh.extents + scale = 1.0 / np.max(extents) + mesh.apply_scale(scale) + mesh.vertices -= mesh.vertices.mean(axis=0) + z_min = mesh.bounds[0][2] + mesh.apply_translation([0, 0, -z_min]) + + def process_alignment(initial_mesh, align_type): + m = initial_mesh.copy() + if align_type == "pca": + R_pca = compute_pca_axes(m) + T = np.eye(4) + T[:3, :3] = R_pca.T + m.apply_transform(T) + U = compute_pca_axes(m) + x, y, z = U[:, 0], U[:, 1], U[:, 2] + nx = closest_axis(x) + ny = closest_axis(y) + nz = closest_axis(z) + nz /= np.linalg.norm(nz) + nx = nx - nz * np.dot(nx, nz) + nx /= np.linalg.norm(nx) + ny = np.cross(nz, nx) + R_snap = np.column_stack([nx, ny, nz]) + m.apply_transform(np.eye(4)[:3, :3] @ R_snap) + + elif align_type == "obb": + to_origin, _ = trimesh.bounds.oriented_bounds(m) + m.apply_transform(to_origin) + R = to_origin[:3, :3] + if np.linalg.det(R) < 0: + m.apply_transform(np.diag([1, 1, -1, 1])) + else: + raise ValueError(f"Unknown type {align_type}") + + best_score = float("inf") + best = None + for Rf in generate_discrete_flips(): + mc = m.copy() + Tf = np.eye(4) + Tf[:3, :3] = Rf + mc.apply_transform(Tf) + zmin = mc.bounds[0][2] + mc.apply_translation([0, 0, -zmin]) + s = stability_score(mc) + if s < best_score: + best_score = s + best = mc.copy() + return best, best_score + + try: + mesh_pca, score_pca = process_alignment(mesh, "pca") + mesh_obb, score_obb = process_alignment(mesh, "obb") + + area_pca = compute_support_area(mesh_pca) + area_obb = compute_support_area(mesh_obb) + + result_mesh = mesh_obb + STRATEGY = "OBB" + + if area_pca > area_obb * 1.3: + result_mesh = mesh_pca + STRATEGY = "PCA" + + normalize_to_unit_cube(result_mesh) + return result_mesh + + except Exception as e: + return fallback_mesh + + +def extract_json(text): + text = re.sub(r"```json|```", "", text).strip() + match = re.search(r"\{.*\}", text, re.DOTALL) + if not match: + raise ValueError("No JSON object found in response:\n" + text) + return json.loads(match.group()) + + +def encode_image(p): + img_path = Path(p).resolve() + if not img_path.exists(): + raise FileNotFoundError(f"Image file not found: {img_path}") + with open(img_path, "rb") as f: + return base64.b64encode(f.read()).decode() + + +def build_image_inputs(views_data): + content = [] + for v in views_data: + name = v["name"] + img_b64 = encode_image(v["path"]) + content.append({"type": "text", "text": f'View "{name}"'}) + content.append( + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{img_b64}"}, + } + ) + return content + + +def render_views(mesh, views, out_dir, res=512): + import numpy as np + import pyrender + from PIL import Image + import trimesh + + mesh = mesh.copy() + + mesh.apply_translation(-mesh.bounds.mean(axis=0)) + scale = 1.0 / np.max(mesh.extents) + mesh.apply_scale(scale) + mesh_pyr = pyrender.Mesh.from_trimesh(mesh, smooth=True) + renderer = pyrender.OffscreenRenderer(res, res) + cam = pyrender.PerspectiveCamera(yfov=np.pi / 3.0) + results = [] + for name, eye in views: + + if name in ["view_from_111", "view_from_000"]: + up = np.array([-1.0, -1.0, 0.0]) / np.sqrt(2.0) + elif name == "view_from_up_to_bottom": + up = np.array([-1.0, 0, 0.0]) + elif name == "view_from_bottom_to_up": + up = np.array([1.0, 0, 0.0]) + else: + up = np.array([0.0, 0.0, 1.0]) + + target = np.array([0.0, 0.0, 0.0]) + f = target - eye + f_hat = f / np.linalg.norm(f) + + r = np.cross(f_hat, up) + r = r / np.linalg.norm(r) + u = np.cross(r, f_hat) + + R = np.column_stack((r, u, -f_hat)) + + M = np.eye(4) + M[:3, :3] = R + M[:3, 3] = eye + + scene = pyrender.Scene(bg_color=[230, 235, 245, 255]) + + scene.add(mesh_pyr) + scene.add(cam, pose=M) + + scene.add(pyrender.DirectionalLight(color=np.ones(3), intensity=4.0), pose=M) + + fill_pose = np.eye(4) + fill_pose[:3, 3] = eye + np.array([1.0, 1.0, 1.0]) + scene.add( + pyrender.DirectionalLight(color=np.ones(3), intensity=1.5), pose=fill_pose + ) + + back_pose = np.eye(4) + back_pose[:3, 3] = eye + np.array([-1.0, -1.0, -1.0]) + scene.add( + pyrender.DirectionalLight(color=np.ones(3), intensity=1.2), pose=back_pose + ) + + color, _ = renderer.render(scene, flags=pyrender.RenderFlags.RGBA) + + img = Image.fromarray(color) + img = img.convert("RGB") + + path = out_dir / f"{name}.png" + img.save(path, quality=95) + + results.append({"path": str(path), "name": name, "camera_pose": M.tolist()}) + + renderer.delete() + return results + + +def ask_mllm_detect_and_classify(views_data, extra_text=""): + + instruction_text = """ + You are a single-purpose multimodal classifier. You will be given several images (multiple views) of a single object plus an OPTIONAL short text note ("Additional context"). Your job is twofold and must be completed in one step: + 1) Identify the object in plain short form (e.g. "coffee mug", "soccer ball", "laptop", "rock") and put it into the JSON field "detected_object" (string) or null if you truly cannot identify it. + 2) Classify the object's placement/orientation constraint into exactly one of three categories (0,1,2) using the provided canonical definitions and examples, and provide the additional fields described below. + + Important behavior constraints: + - Return ONLY a single valid JSON string (no extra text, no explanation, no comments, no reasoning). + - JSON must be syntactically valid, parseable, and use JSON literals (true/false/null where applicable). + - Field order MUST be EXACTLY: detected_object, category, main_surface, orientation_requirement. + - If a field is not applicable, use the JSON literal null. + - Use common/public default usage (not niche). Follow the TIE-BREAKER rule below if ambiguous. + - Use all provided views. If any view contradicts others, prioritize views that reveal human-interaction surfaces (front/diagonals) but still obey tie-breaker. + - If an OPTIONAL "Additional context" text is provided, use it as auxiliary information to help identification/classification. If the text conflicts with clear visual evidence, prioritize visual evidence. If the images are ambiguous, allow the text to resolve the ambiguity. Do NOT output the additional context—only use it internally for judgment. + + CATEGORY MAPPING (exact): + 0 = Omnidirectional, no constraint + 1 = Rotation-insensitive, upright required + 2 = Has forward-facing primary use surface + + DECISION DEFINITIONS (the ONLY basis for judgment — use common/public default usage): + + Omnidirectional, no constraint (0) + - Object is approx spherical or isotropic; function & appearance essentially identical under arbitrary orientation. + - No placement posture (upright/sideways/flipped/rotated) is expected in public use. + + Rotation-insensitive, upright required (1) + - Object has a stable upright support and a defined upright posture (flat bottom or center-of-gravity alignment). + - Rotating around vertical axis does NOT change its function; but it must be upright (not upside-down or on its side) for normal function. + + Has forward-facing primary use surface (2) + - Object has a single unique surface that carries its core function or primary human interaction (viewing, operating, aiming, serving, etc.). + - In normal public use the object is expected to be oriented so that this surface faces the user/target/line-of-sight. Multiple equivalent faces mean it does NOT qualify. + + TIE-BREAKER / AMBIGUITY RULE (mandatory): + - If more than one category could apply, choose the category with the stricter orientation constraint (precedence: 2 → 1 → 0). + - Prefer common/public default usage, not niche setups. + + EXTENSIVE CANONICAL EXAMPLES (STRONG PRIOR — MUST FOLLOW) + CATEGORY 0 examples: ball, basketball, soccer ball, tennis ball, marble, pebble, orange, balloon(round) + CATEGORY 1 examples: cup, coffee cup,moka pot, drinking glass, bottle, vase, bowl, suitcase(standing), candle + CATEGORY 2 examples: monitor, laptop, smartphone, table lamp (head facing), flashlight, camera, car, bicycle, oven(front), speaker(front grille), keyboard, painting, wall clock + + OUTPUT JSON FORMAT (strict — EXACT four fields in this order; use JSON literals): + { + "detected_object": string or null, + "category": integer, // 0 | 1 | 2 + "main_surface": string or null, + "orientation_requirement": string or null + } + + FIELD RULES: + - "detected_object": short, common object name (lowercase preferred) representing the model's best identification, or null if unidentifiable. + - "category": integer 0|1|2. + - "main_surface": Only provide a short, specific name of the forward-facing surface when category == 2 (e.g. "screen", "lamp_head", "door_face", "keyboard_surface"). Otherwise null. + - "orientation_requirement": Only provide a concise canonical resting-orientation instruction when category == 2. You MUST choose exactly one of the following three semantic directions for the object's normal real-world static pose: + * "face_up" -> the main surface is intended to face upward toward +Z / gravity opposite, e.g. smartphone lying flat with screen up, keyboard on table, tray-like objects. + * "face_forward" -> the main surface is intended to face the user/target in a vertical stance, e.g. monitor screen, oven front, speaker grille, camera front. + * "face_down" -> the main surface is intended to face downward in the usual stable static pose, e.g. brush bristles or contact surface downward when naturally placed/used. + If the object is category 1 or 0, set null. + - Do NOT add any other fields. + + VALIDATION RULES (model must satisfy): + - JSON must be syntactically valid and parseable. + - Field order must be exactly as above. + - No extraneous text. + + INSTRUCTIONS FOR IMAGE USE: + - You will be provided a list of labeled views (each labeled with a short tag like "Front", "Back", "Right", "Left", "Diagonal_1", "Diagonal_2"). Use all images to resolve shape, symmetry, handles, screens, bases, cutouts, wheels, or any directional cues. + - Remember the mesh was normalized to the unit cube [0,0,0]→[1,1,1] for rendering—do NOT infer real-world size from pixel dimensions; rely on shape & functional features. + - If the object is clearly symmetric with no single primary face and no stable base, prefer category 0. If there is a clear base but no single forward-facing use surface, prefer category 1. If there is a screen, grill, face, nozzle, spout, or other unique human-facing surface, prefer category 2. + - For category 2 objects, infer the NORMAL STATIC RESTING ORIENTATION in the real world, not merely the visible camera view. Decide whether the primary surface is usually face_up, face_forward, or face_down in its standard placed state. + + NOW: classify the provided object and identify it using the images and the OPTIONAL Additional context text. +""" + + content = [{"type": "text", "text": instruction_text}] + + if extra_text and extra_text.strip(): + content.append( + {"type": "text", "text": f"Additional context: {extra_text.strip()}"} + ) + + content.extend(build_image_inputs(views_data)) + resp = client.chat.completions.create( + model=DEPLOYMENT, + temperature=0.2, + messages=[{"role": "user", "content": content}], + ) + raw = resp.choices[0].message.content + return extract_json(raw) + + +def ask_mllm_primary_surface( + views_data, + object_name="None", + main_surface="None", + orientation_requirement="None", + extra_text="", +): + + instruction_text = f""" + You are a single-purpose multimodal classifier. You will be given 6 images of a single object, rendered from different views. Your task is to identify **the image that best shows the object's forward-facing primary use surface**, defined as the surface that: + + - Carries the object's core function (viewing, operating, aiming, serving, pressing, interacting, etc.) + - Faces the human user or line-of-sight in normal use + - Is unique and human-accessible (not a symmetrical or bottom/support surface) + - Should prioritize the **front-facing view**, even if other angles also partially show it (e.g., top-down view of a laptop shows screen but front view is preferred) + + Additional guidance based on prior classification: + - Detected object: {object_name} + - Possible main surface: {main_surface} + - Orientation requirement: {orientation_requirement} + + If {main_surface} or {orientation_requirement} are provided (not "None"), use them to help identify which image shows the main functional surface. If they conflict with visual evidence, prioritize visual evidence. + + Return a single valid JSON string with exactly one field: + + {{ + "primary_surface_view": string // the name of the image that best shows the forward-facing primary use surface + }} + + Rules: + - Use only the image IDs (names) provided in input. + - If the object has no clear forward-facing primary surface (fully isotropic or omnidirectional), return null. + - Do NOT add any extra text, explanation, or comments. + - Ensure the JSON is syntactically valid and parseable. + + Use the six views to judge shape, handles, screens, bases, spouts, lenses, doors, or other directional human-facing cues. Prioritize the image that a person would naturally face to use or interact with the object. You can also use any Additional context text provided: {extra_text if extra_text else "None"}. + """ + + content = [{"type": "text", "text": instruction_text}] + + if extra_text and extra_text.strip(): + content.append( + {"type": "text", "text": f"Additional context: {extra_text.strip()}"} + ) + + content.extend(build_image_inputs(views_data)) + resp = client.chat.completions.create( + model=DEPLOYMENT, + temperature=0.2, + messages=[{"role": "user", "content": content}], + ) + raw = resp.choices[0].message.content + return extract_json(raw) + + +# def ask_mllm_primary_surface( +# views_data, +# object_name="None", +# main_surface="None", +# orientation_requirement="None", +# extra_text="", +# ): +# instruction_text = f""" +# You are a single-purpose multimodal classifier. +# You will be given 6 images of one object rendered from different viewpoints. + +# Your task is to identify the ONE image that best shows the object's PRIMARY FUNCTIONAL SURFACE. + +# DEFINITION OF PRIMARY FUNCTIONAL SURFACE: +# - The surface that a human would normally face, look at, press, open, aim at, read from, or interact with in standard use. +# - It is the object's main semantic front / operating face, not merely the largest visible face. +# - It is usually the surface that carries the core function of the object. +# - It must be a unique, human-facing, functional surface. +# - Do NOT choose a support/base/bottom surface. +# - Do NOT choose a random side that happens to be large or clear if it is not the functional face. +# - Do NOT choose a view just because it shows more pixels of the object. + +# HARD PRIORITY RULE: +# - If the object is a laptop, monitor, smartphone, tablet, camera, TV, oven, speaker, microwave, printer, or similar device, the view showing the FRONT FUNCTIONAL FACE must be preferred. +# - For a laptop specifically: the SCREEN VIEW is the primary surface. The keyboard view is secondary and should NOT be chosen unless the screen is not visible at all and the object is not truly a laptop-like clamshell device. +# - For a monitor / display: choose the view where the screen face is most directly visible. +# - For a smartphone / tablet: choose the view where the screen/front glass is most directly visible. +# - For a camera: choose the lens/front face. +# - For a speaker: choose the grille/front face. +# - For an oven / microwave / cabinet / box with a door: choose the door/front panel face. +# - For a brush / broom / tool with a working head: choose the working head or active end if that is the main interaction face. +# - For objects with a clear labeled front, controls, opening, display, nozzle, spout, or intake/output face, choose that face. + +# SELECTION HEURISTICS: +# Prefer the image where the functional surface is: +# 1. most directly facing the camera +# 2. most centered and unobstructed +# 3. most clearly identifiable as the object's real-world front / operating face +# 4. supported by the object's category + +# DO NOT BE MISLED BY: +# - top-down views that expose a larger area but are not the real functional front +# - keyboard surfaces on laptops when the screen is available +# - backs, undersides, bases, or support faces +# - symmetric surfaces with no special functional meaning +# - views that show more geometry but less semantic evidence + +# Additional guidance based on prior classification: +# - Detected object: {object_name} +# - Possible main surface: {main_surface} + +# How to use the additional guidance: +# - If main_surface is not "None", use it as a strong hint for what the functional face is. +# - If orientation_requirement is not "None", use it to infer whether the functional face should be face_up, face_forward, or face_down in normal use. +# - If the hint conflicts with clear visual evidence, trust the images. +# - If the object is category 2, prefer the image that best matches the expected real-world functional orientation, not merely the most visible surface in the render. + +# OUTPUT FORMAT: +# Return only one valid JSON object with exactly one field: + +# {{ +# "primary_surface_view": string or null +# }} + +# RULES: +# - The value must be one of the provided image IDs / view names exactly as given in the input. +# - If the object has no clear primary functional surface, return null. +# - If multiple views show the same surface, choose the one that shows it most directly and most clearly. +# - Do not add any extra text, explanation, markdown, or comments. +# - The JSON must be syntactically valid and parseable. + +# IMPORTANT DECISION EXAMPLES: +# - Laptop -> choose the screen view, not the keyboard view. +# - Monitor -> choose the screen/front view. +# - Smartphone -> choose the screen/front-glass view. +# - Camera -> choose the lens/front view. +# - Speaker -> choose the front grille view. +# - Microwave/Oven -> choose the front door/panel view. +# - Brush -> choose the working head / bristle side if that is the functional face. +# - Broom -> choose the sweeping head side if it is the functional face. + +# You are selecting the view that best reveals the object's real functional front, not the view that simply shows the object most fully. +# Use the six views together, plus any Additional context text: +# {extra_text if extra_text else "None"} +# """ + +# content = [{"type": "text", "text": instruction_text}] + +# if extra_text and extra_text.strip(): +# content.append( +# {"type": "text", "text": f"Additional context: {extra_text.strip()}"} +# ) + +# content.extend(build_image_inputs(views_data)) +# resp = client.chat.completions.create( +# model=DEPLOYMENT, +# temperature=0.0, +# messages=[{"role": "user", "content": content}], +# ) +# raw = resp.choices[0].message.content +# return extract_json(raw) + + +# def ask_mllm_primary_surface( +# views_data, +# object_name="None", +# main_surface="None", +# orientation_requirement="None", +# extra_text="", +# ): +# instruction_text = f""" +# You are a single-purpose multimodal classifier. +# You will be given 6 images of one object rendered from different viewpoints. + +# Your task is to identify the ONE image that best shows the object's PRIMARY FUNCTIONAL SURFACE. + +# CRITICAL TARGET: +# - The correct image is the one where the object's primary surface is DIRECTLY FACING THE VIEWER. +# - In other words, the surface should be as FRONT-FACING / FACE-ON / HEAD-ON as possible. +# - The main surface should look like the object's true front face in the rendered view. +# - Do NOT choose a view where the surface is merely visible from an angle. +# - Do NOT choose a top, side, or bottom view unless that is truly the object's normal front-facing functional face. +# - The goal is to select the image that a human would naturally use when looking straight at the object from its functional front. + +# DEFINITION OF PRIMARY FUNCTIONAL SURFACE: +# - The surface that a human would normally face, look at, press, open, aim at, read from, or interact with in standard use. +# - It is the object's main semantic front / operating face, not merely the largest visible face. +# - It is usually the surface that carries the core function of the object. +# - It must be a unique, human-facing, functional surface. +# - It must be the surface that is most naturally "facing us" in normal use. +# - Do NOT choose a support/base/bottom surface. +# - Do NOT choose a random side that happens to be large or clear if it is not the functional face. +# - Do NOT choose a view just because it shows more pixels of the object. + +# HARD PRIORITY RULE: +# - If the object is a laptop, monitor, smartphone, tablet, camera, TV, oven, speaker, microwave, printer, or similar device, the view showing the FRONT FUNCTIONAL FACE must be preferred. +# - For a laptop specifically: the SCREEN VIEW is the primary surface. The keyboard view is secondary and should NOT be chosen unless the screen is not visible at all and the object is not truly a laptop-like clamshell device. +# - For a monitor / display: choose the view where the screen face is most directly visible and most face-on. +# - For a smartphone / tablet: choose the view where the screen/front glass is most directly visible and most face-on. +# - For a camera: choose the lens/front face that is most directly facing the viewer. +# - For a speaker: choose the grille/front face that is most directly facing the viewer. +# - For an oven / microwave / cabinet / box with a door: choose the door/front panel face that is most directly facing the viewer. +# - For a brush / broom / tool with a working head: choose the working head or active end if that is the main interaction face, but only if it is the face that would be naturally presented toward the user. +# - For objects with a clear labeled front, controls, opening, display, nozzle, spout, or intake/output face, choose the image where that face is most directly facing the viewer. + +# SELECTION HEURISTICS: +# Prefer the image where the functional surface is: +# 1. most directly facing the camera / viewer +# 2. least foreshortened +# 3. most centered and unobstructed +# 4. most clearly identifiable as the object's real-world front / operating face +# 5. most consistent with the object's category + +# DO NOT BE MISLED BY: +# - top-down views that expose a larger area but are not the real front face +# - keyboard surfaces on laptops when the screen is available +# - backs, undersides, bases, or support faces +# - symmetric surfaces with no special functional meaning +# - views that show more geometry but less semantic evidence +# - views where the right surface exists but is shown at an angle instead of facing us directly + +# Additional guidance based on prior classification: +# - Detected object: {object_name} +# - Possible main surface: {main_surface} + +# How to use the additional guidance: +# - If main_surface is not "None", use it as a strong hint for what the functional face is. +# - If the hint conflicts with clear visual evidence, trust the images. + +# OUTPUT FORMAT: +# Return only one valid JSON object with exactly one field: + +# {{ +# "primary_surface_view": string or null +# }} + +# RULES: +# - The value must be one of the provided image IDs / view names exactly as given in the input. +# - If the object has no clear primary functional surface, return null. +# - If multiple views show the same surface, choose the one where that surface is most directly facing the viewer and most face-on. +# - Do not add any extra text, explanation, markdown, or comments. +# - The JSON must be syntactically valid and parseable. + +# IMPORTANT DECISION EXAMPLES: +# - Laptop -> choose the screen view, not the keyboard view. +# - Monitor -> choose the screen/front view. +# - Smartphone -> choose the screen/front-glass view. +# - Camera -> choose the lens/front view. +# - Speaker -> choose the front grille view. +# - Microwave/Oven -> choose the front door/panel view. +# - Brush -> choose the working head / bristle side if that is the functional face, but only when it is the face most directly facing the viewer. +# - Broom -> choose the sweeping head side if it is the functional face, but only when it is the face most directly facing the viewer. + +# You are selecting the view that best reveals the object's real functional front as a face-on view, not the view that simply shows the object most fully. +# Use the six views together, plus any Additional context text: +# {extra_text if extra_text else "None"} +# """ + +# content = [{"type": "text", "text": instruction_text}] + +# if extra_text and extra_text.strip(): +# content.append( +# {"type": "text", "text": f"Additional context: {extra_text.strip()}"} +# ) + +# content.extend(build_image_inputs(views_data)) +# resp = client.chat.completions.create( +# model=DEPLOYMENT, +# temperature=0.0, +# messages=[{"role": "user", "content": content}], +# ) +# raw = resp.choices[0].message.content +# return extract_json(raw) + + +def ask_llm_upright_2a1(object_name, upright_img_path, flipped_img_path): + for p in [upright_img_path, flipped_img_path]: + img_path = Path(p).resolve() + if not img_path.exists(): + raise FileNotFoundError( + f"Image required by LLM for upright judgment not found: {img_path}" + ) + + imgs_payload = [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{encode_image(upright_img_path)}" + }, + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{encode_image(flipped_img_path)}" + }, + }, + ] + + prompt = f""" +You are a physical-world perception model. + +An object of category: "{object_name}" is shown in TWO images. + +IMPORTANT: +- The two images show the SAME object. +- One image is physically correct (upright). +- The other image is rotated 180 degrees (upside-down). +- Exactly ONE image shows the object in its natural real-world upright orientation. + +Your task: choose which image is upright based on common human-world object orientation knowledge. + +Image A = first image +Image B = second image + +Rules: +- Think about gravity, support base, typical usage posture. +- Objects are not used upside-down in normal life. +- Do NOT say "both", "uncertain", or explanations. +- You MUST choose one. + +OUTPUT JSON ONLY: + +{{ + "upright_image": "A" or "B", + "confidence": 0.0-1.0 +}} +""" + + resp = client.chat.completions.create( + model=DEPLOYMENT, + messages=[ + { + "role": "user", + "content": [{"type": "text", "text": prompt}, *imgs_payload], + } + ], + temperature=0.0, + ) + return extract_json(resp.choices[0].message.content) + + +def ask_llm_full_side_profile(object_name, views_data): + img_paths = [] + for v in views_data: + name = v["name"] + path = v["path"] + img_paths.append(path) + + for p in img_paths: + img_path = Path(p).resolve() + if not img_path.exists(): + raise FileNotFoundError( + f"Image required by LLM for upright judgment not found: {img_path}" + ) + imgs_payload = [ + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{encode_image(p)}"}, + } + for p in img_paths + ] + + prompt = f""" + You are a visual reasoning model. + + An object of category: "{object_name}" is shown in TWO images. Both images show the same object in upright posture, but from different angles. + + Your task: determine **which image shows the object's full height and side profile**—that is, the complete body shape and natural standing posture. + + Rules: + - Choose exactly ONE image that best shows the object's full side profile. + - Think about how this object would stand in real life. + - Do NOT output explanations. + - Only return the index of the image. + + OUTPUT JSON ONLY: + + {{ + "full_side_profile_image": "A" or "B", + "confidence": 0.0-1.0 + }} + """ + + resp = client.chat.completions.create( + model=DEPLOYMENT, + messages=[ + { + "role": "user", + "content": [{"type": "text", "text": prompt}, *imgs_payload], + } + ], + temperature=0.0, + ) + return extract_json(resp.choices[0].message.content) + + +def ask_llm_upright_rotation(object_name, rotated_imgs_paths): + """ + rotated_imgs_paths: list of image paths in order [0°, 90°, 180°, 270°] + object_name: string, name of the object + """ + + for p in rotated_imgs_paths: + img_path = Path(p).resolve() + if not img_path.exists(): + raise FileNotFoundError( + f"Image required by LLM for upright judgment not found: {img_path}" + ) + imgs_payload = [ + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{encode_image(p)}"}, + } + for p in rotated_imgs_paths + ] + + prompt = f""" +ou are a physical-world orientation judgment model. + +An object of category: "{object_name}" is shown in FOUR images. +All images show the SAME object from the SAME camera viewpoint. + +Your task is to choose the image that best matches the object's natural upright pose in everyday life. + +Think about: +- how the object would normally rest on a table, floor, or other surface +- gravity and stable support +- the object's base, feet, bottom, opening, handle, screen, or functional side +- the orientation people would normally place, hold, or use it in real life + +Important: +- Choose the image that looks most naturally upright and stable in the real world. +- Do NOT rely on any hidden rotation pattern. +- Do NOT assume the object is already upright in the original image. +- Do NOT explain your reasoning. +- Only return the index of the best upright image. +The correct answer must be the image that a person would most likely consider the object's normal real-world standing orientation. + +Image indices: +- 0 = first image +- 1 = second image +- 2 = third image +- 3 = fourth image + +OUTPUT JSON ONLY: + +{{ + "upright_index": 0|1|2|3, + "confidence": 0.0-1.0 +}} +""" + resp = client.chat.completions.create( + model=DEPLOYMENT, + messages=[ + { + "role": "user", + "content": [{"type": "text", "text": prompt}, *imgs_payload], + } + ], + temperature=0.0, + ) + return extract_json(resp.choices[0].message.content) + + +def ask_llm_dimension(object_name, img_paths, user_text_hint, current_bbox_dims): + + if isinstance(img_paths, (str, Path)): + img_paths = [{"path": str(img_paths)}] + + imgs_payload = [] + for item in img_paths: + img_path = Path(item["path"]).resolve() + if not img_path.exists(): + raise FileNotFoundError(f"Image required by LLM not found: {img_path}") + imgs_payload.append( + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{encode_image(img_path)}"}, + } + ) + + current_bbox_json = json.dumps(current_bbox_dims, ensure_ascii=False) + + prompt = f""" +You are a robotics perception and scene analysis expert. +Your task is to estimate the REAL-WORLD physical size of the object in meters. + +CONTEXT: +- The mesh has already been normalized for rendering. +- You are given the object's CURRENT NORMALIZED AABB SIZE (ordinary axis-aligned bounding box, NOT PCA, NOT minimum-volume OBB). +- Use that normalized bbox size as a STRONG SHAPE PRIOR. +- Your output MUST be a plausible real-world size in meters for the exact state shown in the images. +- You must preserve the object's proportions as much as possible; do NOT invent an anisotropic resize. The downstream system will apply ONLY a uniform scale. + +CURRENT NORMALIZED AABB SIZE (unitless, from ordinary bbox): +{current_bbox_json} + +DEFINITIONS: +- height = vertical size when a human faces the object (top -> bottom), Z axis +- width = left-to-right size when facing the object, Y axis +- depth = front-back thickness, X axis + +USER PROVIDED HINT: +- object_name: {object_name} +- extra_hint: {user_text_hint} + +INSTRUCTIONS: +1. Analyze ALL provided images together. +2. Determine the exact visible state first (open/closed/folded/etc.). +3. Estimate the object's real-world physical dimensions in meters for that exact state. +4. Use the normalized bbox as a shape prior so the returned dimensions are consistent with the object's proportions. +5. If uncertain, give the most plausible central estimate. Do not return null unless completely unrecognizable. + +Return JSON ONLY with: +{{ + "object_name": string, + "object_description": string, + "dimensions_m": {{ + "height": float, + "width": float, + "depth": float + }}, + "confidence": float +}} + +CRITICAL: +- JSON only. +- Units must be meters. +- Output real physical dimensions, not normalized values. +- Do not explain anything. +""" + + resp = client.chat.completions.create( + model=DEPLOYMENT, + messages=[ + { + "role": "user", + "content": [{"type": "text", "text": prompt}, *imgs_payload], + } + ], + temperature=0.0, + ) + return extract_json(resp.choices[0].message.content) + + +def rotate_image_deg(input_path, deg, output_path): + input_path = Path(input_path).resolve() + output_path = Path(output_path).resolve() + + if not input_path.exists(): + raise FileNotFoundError( + f"Input file for image rotation not found: {input_path}" + ) + + img = Image.open(input_path) + img_rot = img.rotate(deg, expand=True) + img_rot.save(output_path) + return str(output_path) + + +def rot_x(deg): + r = np.deg2rad(deg) + c, s = np.cos(r), np.sin(r) + return np.array([[1, 0, 0], [0, c, -s], [0, s, c]]) + + +def rot_y(deg): + r = np.deg2rad(deg) + c, s = np.cos(r), np.sin(r) + return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]]) + + +def rot_z(deg): + r = np.deg2rad(deg) + c, s = np.cos(r), np.sin(r) + return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]]) + + +def apply_rotations(mesh, rotations): + R = np.eye(3) + T = np.eye(4) + T[:3, :3] = rotations + mesh.apply_transform(T) + + +def get_aabb_dims(mesh: trimesh.Trimesh): + + bounds = np.asarray(mesh.bounds, dtype=float) + extents = bounds[1] - bounds[0] + return { + "height": float(extents[2]), + "width": float(extents[1]), + "depth": float(extents[0]), + } + + +def dims_dict_to_xyz(dims: dict): + + return np.array( + [ + float(dims.get("depth", np.nan)), + float(dims.get("width", np.nan)), + float(dims.get("height", np.nan)), + ], + dtype=float, + ) + + +def scale_mesh_uniform_to_dimensions( + mesh: trimesh.Trimesh, + target_dims: dict, + current_dims: dict | None = None, + eps: float = 1e-8, +): + + if current_dims is None: + current_dims = get_aabb_dims(mesh) + + cur = dims_dict_to_xyz(current_dims) + tgt = dims_dict_to_xyz(target_dims) + + valid = np.isfinite(cur) & np.isfinite(tgt) & (cur > eps) & (tgt > eps) + if not np.any(valid): + raise ValueError(f"Invalid dims. current={current_dims}, target={target_dims}") + + ratios = tgt[valid] / cur[valid] + + scale = float(np.median(ratios)) + + center = mesh.bounds.mean(axis=0) + mesh.apply_translation(-center) + mesh.apply_scale(scale) + mesh.apply_translation(center) + + return mesh, scale + + +def ask_llm_semantics_info(object_name, img_paths, user_text_hint=""): + + imgs_payload = [] + for item in img_paths: + img_path = Path(item["path"]).resolve() + if not img_path.exists(): + raise FileNotFoundError(f"Image required by LLM not found: {img_path}") + imgs_payload.append( + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{encode_image(img_path)}"}, + } + ) + + prompt = f""" +You are a robotics asset semantics expert. + +Your task is to infer semantic information from multiple rendered views of a 3D object. +The object will later be used for robotics simulation, physical property estimation, and manipulation planning. + +INPUTS: +- object_name: {object_name} +- user_hint: {user_text_hint} + +INSTRUCTIONS: +1. Use the front view and diagonal views jointly. +2. Infer the most likely semantic category of the object. +3. Identify the most likely main material(s) visible from the object appearance. +4. Write a concise but information-rich description that includes: + - object type / category + - likely main material(s) + - surface finish / texture + - rigid or flexible nature + - notable functional or structural parts +5. Be conservative and grounded in visual evidence. +6. If material is uncertain, provide the most likely hypothesis rather than leaving it empty. +7. The output will be used later to derive physical properties such as density, mass, friction, etc., so the description should be useful for that purpose. + +SEMANTIC TAG RULES: +- Use lowercase snake_case. +- Prefer specific tags when possible, e.g.: + - ceramic_mug + - plastic_storage_box + - wooden_chair + - metal_tool + - glass_bottle + - fabric_soft_toy + - electronic_device +- If uncertain, use a broader but still useful tag such as: + - container + - kitchenware + - hand_tool + - furniture + - toy + - household_item + +OUTPUT JSON SCHEMA: +{{ + "object_name": string, + "semantic_tag": string, + "description": string, + "primary_materials": [string, ...], + "material_confidence": float, + "confidence": float +}} + +FIELD GUIDANCE: +- object_name: canonical short name for the object +- semantic_tag: concise semantic class tag +- description: 1-3 sentences; mention likely material and structural/functional semantics +- primary_materials: list of likely materials in descending plausibility +- material_confidence: confidence in material estimate, from 0.0 to 1.0 +- confidence: confidence in the semantic classification overall, from 0.0 to 1.0 + +CRITICAL RULES: +- OUTPUT JSON ONLY. +- No markdown. +- No extra text. +- Do not return null unless the object is completely unrecognizable. +""" + + resp = client.chat.completions.create( + model=DEPLOYMENT, + messages=[ + { + "role": "user", + "content": [{"type": "text", "text": prompt}, *imgs_payload], + } + ], + temperature=0.0, + ) + return extract_json(resp.choices[0].message.content) + + +def export_final_mesh(mesh, name, out_dir: Path): + out_dir = out_dir.resolve() + out_dir.mkdir(exist_ok=True, parents=True) + bounds = mesh.bounds + minb = bounds[0] + maxb = bounds[1] + bottom_center = np.array( + [(minb[0] + maxb[0]) / 2.0, (minb[1] + maxb[1]) / 2.0, minb[2]], dtype=float + ) + T_trans = np.eye(4) + T_trans[:3, 3] = -bottom_center + mesh.apply_transform(T_trans) + out_path = out_dir / f"{name}_simready.obj" + out_path = out_path.resolve() + + print(f"Exporting final mesh to: {out_path} (bottom-face center moved to origin)") + mesh.export(out_path) + + return str(out_path) + + +def delete_rendered_pngs(output_dir): + output_dir = Path(output_dir) + if not output_dir.exists(): + return + + patterns = [ + "view_*.png", + "*_flipped.png", + ] + + for pattern in patterns: + for p in output_dir.glob(pattern): + p.unlink() + + +def process_mesh(file, name=None, extra_text="", out_dir="renders", res=1024): + if isinstance(file, (str, Path)): + file = Path(file).resolve() + name = file.stem + out_dir = Path(out_dir).resolve() + out_dir.mkdir(exist_ok=True, parents=True) + mesh = init_pose(file) + + images_first = render_views( + mesh, diagonal_views + cardinal_views + up_down_views, out_dir, res + ) + category_res = ask_mllm_detect_and_classify(images_first, extra_text=extra_text) + print(category_res) + category = int(category_res.get("category", 0)) + object_name = str(category_res.get("detected_object", "None")) + main_surface = str(category_res.get("main_surface", "None")) + orientation_requirement = str(category_res.get("orientation_requirement", "None")) + + if category == 0: + pass + + elif category == 1: + images_for_1_1 = render_views(mesh, side_profile, out_dir, res) + side_profile_result = ask_llm_full_side_profile(object_name, images_for_1_1) + print(side_profile_result) + side_profile_result = side_profile_result.get("full_side_profile_image", "B") + if side_profile_result == "B": + upright_img = render_views(mesh, front_views, out_dir, res) + upright_img = upright_img[0]["path"] + flipped_path = str( + Path(upright_img).with_name( + Path(upright_img).stem + f"_180_flipped.png" + ) + ) + rotate_image_deg(upright_img, 180, flipped_path) + upright_result = ask_llm_upright_2a1(object_name, upright_img, flipped_path) + print(upright_result) + try: + upright_choice = upright_result.get("upright_image", "A") + except Exception: + upright_choice = "A" + if upright_choice == "B": + x_flip = rot_x(180) + apply_rotations(mesh, x_flip) + + elif side_profile_result == "A": + upright_img = render_views( + mesh, + [("view_from_up_to_bottom", np.array([0.5, 0.5, 2.2], dtype=float))], + out_dir, + res, + ) + upright_img = upright_img[0]["path"] + rotated_imgs = [] + rotated_imgs.append(upright_img) + rotate_deg = [90, 180, 270] + for deg in rotate_deg: + flipped_path = str( + Path(upright_img).with_name( + Path(upright_img).stem + f"_{deg}_flipped.png" + ) + ) + rotated_imgs.append(rotate_image_deg(upright_img, deg, flipped_path)) + side_rotation_result = ask_llm_upright_rotation(object_name, rotated_imgs) + side_rotation_result = side_rotation_result.get("upright_index", 0) + print("side rotation is", side_rotation_result) + if side_rotation_result == 0: + pass + elif side_rotation_result == 1: + side_r = rot_z(90) + apply_rotations(mesh, side_r) + elif side_rotation_result == 2: + side_r = rot_z(180) + apply_rotations(mesh, side_r) + elif side_rotation_result == 3: + side_r = rot_z(270) + apply_rotations(mesh, side_r) + else: + raise ValueError("no upright index choosen") + side_r = rot_y(90) + apply_rotations(mesh, side_r) + else: + raise ValueError("no side profil choosen") + + elif category == 2: + images_for_2_1 = render_views( + mesh, cardinal_views + up_down_views, out_dir, res + ) + result_main_surface = ask_mllm_primary_surface( + images_for_2_1, object_name, main_surface, orientation_requirement + ) + print(result_main_surface) + primary_view = result_main_surface.get("primary_surface_view", "None") + + if orientation_requirement == "face_forward": + + if primary_view in [i[0] for i in cardinal_views]: + if primary_view == "view_from_front": + print("no need to rotate round z") + elif primary_view == "view_from_left": # left + R = rot_z(90) + apply_rotations(mesh, R) + elif primary_view == "view_from_right": # right + R = rot_z(-90) + apply_rotations(mesh, R) + elif primary_view == "view_from_back": # back + R = rot_z(180) + apply_rotations(mesh, R) + + else: + raise ValueError("unknow views") + + elif primary_view in [i[0] for i in up_down_views]: + if primary_view == "view_from_up_to_bottom": + R = rot_y(90) + apply_rotations(mesh, R) + elif primary_view == "view_from_bottom_to_up": + R = rot_y(-90) + apply_rotations(mesh, R) + else: + raise ValueError("unknow views") + + else: + raise ValueError("unknow views") + normalize_to_unit_cube(mesh) + upright_img = render_views(mesh, front_views, out_dir, res) + upright_img = upright_img[0]["path"] + rotated_imgs = [] + rotated_imgs.append(upright_img) + rotate_deg = [90, 180, 270] + for deg in rotate_deg: + flipped_path = str( + Path(upright_img).with_name( + Path(upright_img).stem + f"_{deg}_flipped.png" + ) + ) + rotated_imgs.append(rotate_image_deg(upright_img, deg, flipped_path)) + result = ask_llm_upright_rotation(object_name, rotated_imgs) + print(result) + upright_result = result.get("upright_index", 0) + if upright_result == 0: + pass + elif upright_result == 1: + upright_deg = rot_x(90) + apply_rotations(mesh, upright_deg) + elif upright_result == 2: + upright_deg = rot_x(180) + apply_rotations(mesh, upright_deg) + elif upright_result == 3: + upright_deg = rot_x(-90) + apply_rotations(mesh, upright_deg) + else: + raise ValueError("upright index unknow") + + elif orientation_requirement == "face_up": + + if primary_view in [i[0] for i in cardinal_views]: + if primary_view == "view_from_front": + R = rot_y(-90) + elif primary_view == "view_from_left": + R = rot_x(-90) + apply_rotations(mesh, R) + elif primary_view == "view_from_right": + R = rot_x(90) + apply_rotations(mesh, R) + elif primary_view == "view_from_back": + R = rot_y(90) + apply_rotations(mesh, R) + else: + raise ValueError("unknow views") + + elif primary_view in [i[0] for i in up_down_views]: + if primary_view == "view_from_up_to_bottom": + print("no need to rotate") + elif primary_view == "view_from_bottom_to_up": + R = rot_x(180) + apply_rotations(mesh, R) + else: + raise ValueError("unknow views") + + else: + raise ValueError("unknow views") + normalize_to_unit_cube(mesh) + upright_img = render_views(mesh, up_views, out_dir, res) + upright_img = upright_img[0]["path"] + rotated_imgs = [] + rotated_imgs.append(upright_img) + rotate_deg = [90, 180, 270] + for deg in rotate_deg: + flipped_path = str( + Path(upright_img).with_name( + Path(upright_img).stem + f"_{deg}_flipped.png" + ) + ) + rotated_imgs.append(rotate_image_deg(upright_img, deg, flipped_path)) + result = ask_llm_upright_rotation(object_name, rotated_imgs) + print(result) + upright_result = result.get("upright_index", 0) + if upright_result == 0: + pass + elif upright_result == 1: + upright_deg = rot_z(90) + apply_rotations(mesh, upright_deg) + elif upright_result == 2: + upright_deg = rot_z(180) + apply_rotations(mesh, upright_deg) + elif upright_result == 3: + upright_deg = rot_z(-90) + apply_rotations(mesh, upright_deg) + else: + raise ValueError("upright index unknow") + + elif orientation_requirement == "face_down": + if primary_view in [i[0] for i in cardinal_views]: + if primary_view == "view_from_front": + R = rot_y(90) + elif primary_view == "view_from_left": + R = rot_x(90) + apply_rotations(mesh, R) + elif primary_view == "view_from_right": + R = rot_x(-90) + apply_rotations(mesh, R) + elif primary_view == "view_from_back": + R = rot_y(-90) + apply_rotations(mesh, R) + else: + raise ValueError("unknow views") + + elif primary_view in [i[0] for i in up_down_views]: + if primary_view == "view_from_up_to_bottom": + print("no need to rotate") + elif primary_view == "view_from_bottom_to_up": + R = rot_x(180) + apply_rotations(mesh, R) + else: + raise ValueError("unknow views") + + else: + raise ValueError("unknow views") + normalize_to_unit_cube(mesh) + upright_img = render_views(mesh, down_views, out_dir, res) + upright_img = upright_img[0]["path"] + rotated_imgs = [] + rotated_imgs.append(upright_img) + rotate_deg = [90, 180, 270] + for deg in rotate_deg: + flipped_path = str( + Path(upright_img).with_name( + Path(upright_img).stem + f"_{deg}_flipped.png" + ) + ) + rotated_imgs.append(rotate_image_deg(upright_img, deg, flipped_path)) + result = ask_llm_upright_rotation(object_name, rotated_imgs) + print(result) + upright_result = result.get("upright_index", 0) + if upright_result == 0: + apply_rotations(mesh, upright_deg) + elif upright_result == 1: + upright_deg = rot_z(90) + apply_rotations(mesh, upright_deg) + elif upright_result == 2: + upright_deg = rot_z(180) + pass + elif upright_result == 3: + upright_deg = rot_z(-90) + apply_rotations(mesh, upright_deg) + else: + raise ValueError("upright index unknow") + + else: + raise ValueError("unknow orientationrequirement") + + else: + raise ValueError() + + # TODO: 还需要再做对齐分析!避免倾斜!!!! + + normalize_to_unit_cube(mesh) + + current_bbox_dims = get_aabb_dims(mesh) + + dimension_views = render_views( + mesh, diagonal_views + cardinal_views + up_down_views, out_dir, res + ) + + dimension_result = ask_llm_dimension( + object_name=object_name, + img_paths=dimension_views, + user_text_hint=extra_text, + current_bbox_dims=current_bbox_dims, + ) + print(dimension_result) + + target_dims = dimension_result.get("dimensions_m", None) + if target_dims is None: + raise ValueError("LLM failed to return dimensions_m") + + mesh, uniform_scale = scale_mesh_uniform_to_dimensions( + mesh=mesh, + target_dims=target_dims, + current_dims=current_bbox_dims, + ) + + print( + { + "uniform_scale": uniform_scale, + "current_bbox_dims": current_bbox_dims, + "target_dims_m": target_dims, + } + ) + + out_path = export_final_mesh(mesh, name, out_dir) + + semantics_result = ask_llm_semantics_info( + object_name=object_name, + img_paths=dimension_views, + user_text_hint=extra_text, + ) + return { + "Path": out_path, + "uniform_scale": uniform_scale, + "target_dims_m": target_dims, + "semantics_result": semantics_result, + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument( + "--file", + required=True, + help="Path to input 3D mesh file (absolute path supported)", + ) + ap.add_argument( + "--extra_text", + default="", + help="Text description for your object, mainly describe the dimension and category", + ) + ap.add_argument( + "--out_dir", + default="renders", + help="Output directory (absolute path supported)", + ) + ap.add_argument( + "--name", + default="test", + help="Output directory (absolute path supported)", + ) + ap.add_argument("--res", type=int, default=1024, help="Rendered image resolution") + args = ap.parse_args() + args.file = Path(args.file).resolve() + args.out_dir = Path(args.out_dir).resolve() + if not args.file.exists(): + print(f"Error: Input file does not exist - {args.file}") + exit(1) + + process_mesh(args.file, args.name, args.extra_text, args.out_dir, args.res) + + +if __name__ == "__main__": + main() diff --git a/embodichain/toolkits/simready_pipeline/utils/texture_utils.py b/embodichain/toolkits/simready_pipeline/utils/texture_utils.py new file mode 100644 index 00000000..a9ef99b9 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/utils/texture_utils.py @@ -0,0 +1,297 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Tuple +import trimesh + + +PBR_TEXTURE_FIELDS = ( + "baseColorTexture", + "metallicRoughnessTexture", + "normalTexture", + "occlusionTexture", + "emissiveTexture", +) + +PBR_SCALAR_FIELDS = ( + "baseColorFactor", + "metallicFactor", + "roughnessFactor", + "emissiveFactor", + "alphaMode", + "alphaCutoff", + "doubleSided", +) + +SIMPLE_SCALAR_FIELDS = ( + "diffuse", + "ambient", + "specular", + "glossiness", +) + + +def _shape(x: Any) -> Optional[Tuple[int, ...]]: + try: + return tuple(x.shape) # numpy / array-like + except Exception: + return None + + +def _to_jsonable(x: Any) -> Any: + + if x is None: + return None + + if hasattr(x, "tolist"): + try: + return x.tolist() + except Exception: + pass + + if hasattr(x, "size") and hasattr(x, "mode"): + try: + return { + "type": type(x).__name__, + "size": list(x.size), + "mode": x.mode, + } + except Exception: + return {"type": type(x).__name__} + + if isinstance(x, (str, int, float, bool)): + return x + + return str(x) + + +def _describe_texture_value(value: Any) -> Dict[str, Any]: + + info: Dict[str, Any] = { + "present": value is not None, + "type": None, + "meta": None, + } + + if value is None: + return info + + info["type"] = type(value).__name__ + info["meta"] = _to_jsonable(value) + return info + + +def _inspect_material(material: Any) -> Dict[str, Any]: + """ + Recursively inspect trimesh materials. + """ + out: Dict[str, Any] = { + "material_class": type(material).__name__ if material is not None else None, + "material_kind": None, + "name": getattr(material, "name", None) if material is not None else None, + "main_color": None, + "texture_count": 0, + "textures": {}, + "scalars": {}, + "children": None, + } + + if material is None: + return out + + out["main_color"] = _to_jsonable(getattr(material, "main_color", None)) + + # MultiMaterial: wrapper around a list of Materials + if isinstance(material, trimesh.visual.material.MultiMaterial): + out["material_kind"] = "multi" + children: List[Dict[str, Any]] = [] + total = 0 + + mats = getattr(material, "materials", None) or [] + for idx, child in enumerate(mats): + child_info = _inspect_material(child) + child_info["index"] = idx + children.append(child_info) + total += int(child_info.get("texture_count", 0)) + + out["children"] = children + out["texture_count"] = total + return out + + # PBRMaterial + if isinstance(material, trimesh.visual.material.PBRMaterial): + out["material_kind"] = "pbr" + for field in PBR_SCALAR_FIELDS: + out["scalars"][field] = _to_jsonable(getattr(material, field, None)) + + texture_count = 0 + for field in PBR_TEXTURE_FIELDS: + tex_value = getattr(material, field, None) + out["textures"][field] = _describe_texture_value(tex_value) + if tex_value is not None: + texture_count += 1 + + out["texture_count"] = texture_count + return out + + # SimpleMaterial + if isinstance(material, trimesh.visual.material.SimpleMaterial): + out["material_kind"] = "simple" + for field in SIMPLE_SCALAR_FIELDS: + out["scalars"][field] = _to_jsonable(getattr(material, field, None)) + + image = getattr(material, "image", None) + out["textures"]["image"] = _describe_texture_value(image) + out["texture_count"] = 1 if image is not None else 0 + return out + + # Generic Material or unknown subclass + out["material_kind"] = "generic_or_unknown" + # Collect anything that looks texture-like or important + for key, value in getattr(material, "__dict__", {}).items(): + if "texture" in key.lower() or key.lower() in {"image", "name"}: + out["textures"][key] = _describe_texture_value(value) + + return out + + +def classify_visual(mesh: trimesh.Trimesh) -> Dict[str, Any]: + """ + Returns a nested dict with: + - top-level visual category + - color mode / texture mode + - uv presence + - material type + - material texture slots + - total texture count + - completeness flags + """ + vis = getattr(mesh, "visual", None) + + result: Dict[str, Any] = { + "visual_class": type(vis).__name__ if vis is not None else None, + "visual_category": "none", + "visual_kind": None, + "visual_defined": False, + "is_color_visual": False, + "is_texture_visual": False, + "uv_present": False, + "uv_shape": None, + "material": None, + "material_type": None, + "material_kind": None, + "texture_count_total": 0, + "texture_state": "none", + "face_materials_present": False, + "face_materials_shape_or_len": None, + "color_mode": None, + "face_colors_shape": None, + "vertex_colors_shape": None, + "has_transparency": None, + "main_color": None, + "notes": [], + } + + if vis is None: + result["notes"].append("mesh.visual is None") + return result + + result["visual_kind"] = getattr(vis, "kind", None) + result["visual_defined"] = bool(getattr(vis, "defined", False)) + + # -------- TextureVisuals -------- + if isinstance(vis, trimesh.visual.texture.TextureVisuals): + result["visual_category"] = "texture" + result["is_texture_visual"] = True + + uv = getattr(vis, "uv", None) + result["uv_present"] = uv is not None + result["uv_shape"] = _shape(uv) + + # face_materials is an optional constructor arg; inspect defensively + face_materials = getattr(vis, "face_materials", None) + result["face_materials_present"] = face_materials is not None + if face_materials is not None: + try: + result["face_materials_shape_or_len"] = len(face_materials) + except Exception: + result["face_materials_shape_or_len"] = _shape(face_materials) + + material = getattr(vis, "material", None) + result["material"] = ( + _inspect_material(material) if material is not None else None + ) + if material is not None: + result["material_type"] = type(material).__name__ + result["material_kind"] = result["material"]["material_kind"] + result["main_color"] = result["material"]["main_color"] + result["texture_count_total"] = int(result["material"]["texture_count"]) + + # TextureVisuals is only really usable when UV exists. + if not result["uv_present"]: + result["texture_state"] = "texture_visual_missing_uv" + result["notes"].append("TextureVisuals exists, but uv is missing.") + elif material is None: + result["texture_state"] = "texture_visual_missing_material" + result["notes"].append("TextureVisuals has uv, but material is missing.") + elif result["texture_count_total"] == 0: + result["texture_state"] = "texture_visual_material_no_textures" + result["notes"].append( + "TextureVisuals has uv and material, but material contains no texture slots/images." + ) + else: + result["texture_state"] = "texture_visual_complete_or_partially_complete" + + # If the visual has alpha/transparency info through material, expose it. + if material is not None and hasattr(material, "alphaMode"): + result["notes"].append(f"alphaMode={getattr(material, 'alphaMode', None)}") + return result + + # -------- ColorVisuals -------- + if isinstance(vis, trimesh.visual.color.ColorVisuals): + result["visual_category"] = "color" + result["is_color_visual"] = True + result["color_mode"] = getattr(vis, "kind", None) + + result["face_colors_shape"] = _shape(getattr(vis, "face_colors", None)) + result["vertex_colors_shape"] = _shape(getattr(vis, "vertex_colors", None)) + result["has_transparency"] = bool(getattr(vis, "transparency", False)) + result["main_color"] = _to_jsonable(getattr(vis, "main_color", None)) + + if result["color_mode"] == "face": + result["texture_state"] = "color_face" + elif result["color_mode"] == "vertex": + result["texture_state"] = "color_vertex" + else: + result["texture_state"] = "color_unset_or_default" + + return result + + # -------- Unknown visual subclass -------- + result["visual_category"] = "unknown" + result["notes"].append( + f"Unhandled visual type: {type(vis).__name__}. Inspect __dict__ for custom extension." + ) + + # Best-effort generic dump for custom visuals + if hasattr(vis, "__dict__"): + result["material"] = { + "raw_attributes": {k: _to_jsonable(v) for k, v in vis.__dict__.items()} + } + + return result diff --git a/embodichain/toolkits/simready_pipeline/utils/usd_utils.py b/embodichain/toolkits/simready_pipeline/utils/usd_utils.py new file mode 100644 index 00000000..ed1286de --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/utils/usd_utils.py @@ -0,0 +1,412 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +import argparse +import json +import shutil +import tempfile +from pathlib import Path +from typing import Dict, Any, Optional, Union + +import numpy as np +import trimesh +from pxr import Gf, Sdf, Usd, UsdGeom, UsdPhysics, UsdShade, UsdUtils, Vt + +DEFAULT_PHYSICS_PARAMS = { + "mass": 1.0, + "density": 1000.0, + "static_friction": 0.5, + "dynamic_friction": 0.5, + "restitution": 0.0, + "linear_damping": 0.7, + "angular_damping": 0.7, + "enable_collision": True, + "enable_ccd": False, + "contact_offset": 0.001, + "rest_offset": 0.0, + "max_linear_velocity": 100.0, + "max_angular_velocity": 50.0, + "max_depenetration_velocity": 100.0, + "solver_min_position_iters": 4, + "solver_min_velocity_iters": 1, + "sleep_threshold": 0.001, +} + + +def parse_glb_with_trimesh(path: Path, texture_dir: Path) -> Dict[str, Any]: + scene = trimesh.load(str(path)) + mesh = scene.dump(concatenate=True) if isinstance(scene, trimesh.Scene) else scene + + tex_filename = "diffuse.png" + tex_path = texture_dir / tex_filename + + material = mesh.visual.material + if hasattr(material, "image") and material.image is not None: + material.image.save(str(tex_path)) + elif ( + hasattr(material, "baseColorTexture") and material.baseColorTexture is not None + ): + material.baseColorTexture.save(str(tex_path)) + + return { + "vertices": np.asarray(mesh.vertices), + "faces": np.asarray(mesh.faces), + "uv": ( + np.asarray(mesh.visual.uv) + if getattr(mesh.visual, "uv", None) is not None + else None + ), + "tex_path": f"./textures/{tex_filename}", + } + + +def build_clean_usd( + data: Dict[str, Any], output_path: Path, physics_params: Dict[str, float] +) -> None: + stage = Usd.Stage.CreateNew(str(output_path)) + UsdGeom.SetStageUpAxis(stage, UsdGeom.Tokens.z) + UsdGeom.SetStageMetersPerUnit(stage, 1.0) + UsdPhysics.Scene.Define(stage, "/PhysicsScene") + + root_prim = UsdGeom.Xform.Define(stage, "/RootNode") + stage.SetDefaultPrim(root_prim.GetPrim()) + + stage.DefinePrim("/RootNode/Looks", "Scope") + UsdGeom.Xform.Define(stage, "/RootNode/geometry_inst") + + new_mat_path = "/RootNode/Looks/Material_0" + new_geo_path = "/RootNode/geometry_inst/geometry_0" + + # --- A. Mesh Definition --- + mesh = UsdGeom.Mesh.Define(stage, new_geo_path) + mesh.CreatePointsAttr(Vt.Vec3fArray([Gf.Vec3f(*v) for v in data["vertices"]])) + mesh.CreateFaceVertexIndicesAttr(Vt.IntArray(data["faces"].flatten().tolist())) + mesh.CreateFaceVertexCountsAttr(Vt.IntArray([3] * len(data["faces"]))) + + if data.get("uv") is not None: + tex_coords = UsdGeom.PrimvarsAPI(mesh).CreatePrimvar( + "st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.varying + ) + tex_coords.Set(Vt.Vec2fArray([Gf.Vec2f(*uv) for uv in data["uv"]])) + + mesh.CreateDoubleSidedAttr(True) + + # --- B. Material Definition --- + material = UsdShade.Material.Define(stage, new_mat_path) + pbr_shader = UsdShade.Shader.Define(stage, f"{new_mat_path}/PBRShader") + pbr_shader.CreateIdAttr("UsdPreviewSurface") + + st_reader = UsdShade.Shader.Define(stage, f"{new_mat_path}/STReader") + st_reader.CreateIdAttr("UsdPrimvarReader_float2") + st_reader.CreateInput("varname", Sdf.ValueTypeNames.Token).Set("st") + + tex_sampler = UsdShade.Shader.Define(stage, f"{new_mat_path}/DiffuseSampler") + tex_sampler.CreateIdAttr("UsdUVTexture") + tex_sampler.CreateInput("file", Sdf.ValueTypeNames.Asset).Set(data["tex_path"]) + tex_sampler.CreateInput("st", Sdf.ValueTypeNames.Float2).ConnectToSource( + st_reader.ConnectableAPI(), "result" + ) + + pbr_shader.CreateInput("diffuseColor", Sdf.ValueTypeNames.Color3f).ConnectToSource( + tex_sampler.ConnectableAPI(), "rgb" + ) + material.CreateSurfaceOutput().ConnectToSource( + pbr_shader.ConnectableAPI(), "surface" + ) + UsdShade.MaterialBindingAPI.Apply(mesh.GetPrim()).Bind(material) + + # --- C. Physics Material Injection --- + binding_api = UsdShade.MaterialBindingAPI(mesh.GetPrim()) + bound_material, _ = binding_api.ComputeBoundMaterial() + + if bound_material: + bound_prim = bound_material.GetPrim() + UsdPhysics.MaterialAPI.Apply(bound_prim) + material_api = UsdPhysics.MaterialAPI(bound_prim) + material_api.CreateDensityAttr().Set(physics_params["density"]) + material_api.CreateRestitutionAttr().Set(physics_params["restitution"]) + material_api.CreateStaticFrictionAttr().Set(physics_params["static_friction"]) + material_api.CreateDynamicFrictionAttr().Set(physics_params["dynamic_friction"]) + + # --- D. Core Rigid Body --- + prim = mesh.GetPrim() + + prim.SetMetadata( + "apiSchemas", + Sdf.TokenListOp.CreateExplicit( + ["PhysicsRigidBodyAPI", "PhysicsMassAPI", "PhysxRigidBodyAPI"] + ), + ) + + prim.SetMetadata("kind", "component") + + collision_api = UsdPhysics.CollisionAPI.Apply(prim) + collision_api.CreateCollisionEnabledAttr(physics_params["enable_collision"]) + + mesh_collision_api = UsdPhysics.MeshCollisionAPI.Apply(prim) + mesh_collision_api.CreateApproximationAttr().Set( + UsdPhysics.Tokens.convexDecomposition + ) + + def set_attr(name, type_name, value): + attr = prim.CreateAttribute(name, type_name) + attr.Set(value) + + set_attr("physics:rigidBodyEnabled", Sdf.ValueTypeNames.Bool, True) + set_attr("physics:kinematicEnabled", Sdf.ValueTypeNames.Bool, False) + set_attr("physics:startsAsleep", Sdf.ValueTypeNames.Bool, False) + + set_attr("physics:velocity", Sdf.ValueTypeNames.Vector3f, Gf.Vec3f(0, 0, 0)) + set_attr("physics:angularVelocity", Sdf.ValueTypeNames.Vector3f, Gf.Vec3f(0, 0, 0)) + set_attr("physics:centerOfMass", Sdf.ValueTypeNames.Point3f, Gf.Vec3f(0, 0, 0)) + set_attr("physics:mass", Sdf.ValueTypeNames.Float, physics_params["mass"]) + + def set_physx(name, type_name, value): + attr = prim.CreateAttribute(f"physxRigidBody:{name}", type_name) + attr.Set(value) + + set_physx( + "linearDamping", Sdf.ValueTypeNames.Float, physics_params["linear_damping"] + ) + set_physx( + "angularDamping", Sdf.ValueTypeNames.Float, physics_params["angular_damping"] + ) + + set_physx( + "maxLinearVelocity", + Sdf.ValueTypeNames.Float, + physics_params["max_linear_velocity"], + ) + set_physx( + "maxAngularVelocity", + Sdf.ValueTypeNames.Float, + physics_params["max_angular_velocity"], + ) + set_physx( + "maxDepenetrationVelocity", + Sdf.ValueTypeNames.Float, + physics_params["max_depenetration_velocity"], + ) + + set_physx("enableCCD", Sdf.ValueTypeNames.Bool, physics_params["enable_ccd"]) + set_physx("enableSpeculativeCCD", Sdf.ValueTypeNames.Bool, False) + + set_physx( + "sleepThreshold", Sdf.ValueTypeNames.Float, physics_params["sleep_threshold"] + ) + set_physx("stabilizationThreshold", Sdf.ValueTypeNames.Float, 0.001) + + set_physx( + "solverPositionIterationCount", + Sdf.ValueTypeNames.Int, + physics_params["solver_min_position_iters"], + ) + set_physx( + "solverVelocityIterationCount", + Sdf.ValueTypeNames.Int, + physics_params["solver_min_velocity_iters"], + ) + + set_physx("lockedPosAxis", Sdf.ValueTypeNames.Int, 0) + set_physx("lockedRotAxis", Sdf.ValueTypeNames.Int, 0) + + # --- E. Collision --- + collision_api = UsdPhysics.CollisionAPI.Apply(prim) + collision_api.CreateCollisionEnabledAttr(physics_params["enable_collision"]) + + mesh_collision_api = UsdPhysics.MeshCollisionAPI.Apply(prim) + mesh_collision_api.CreateApproximationAttr().Set( + UsdPhysics.Tokens.convexDecomposition + ) + + # --- F. Extended --- + prim.CreateAttribute("sim:linearDamping", Sdf.ValueTypeNames.Float).Set( + float(physics_params["linear_damping"]) + ) + prim.CreateAttribute("sim:angularDamping", Sdf.ValueTypeNames.Float).Set( + float(physics_params["angular_damping"]) + ) + prim.CreateAttribute("sim:contactOffset", Sdf.ValueTypeNames.Float).Set( + float(physics_params["contact_offset"]) + ) + prim.CreateAttribute("sim:restOffset", Sdf.ValueTypeNames.Float).Set( + float(physics_params["rest_offset"]) + ) + + prim.CreateAttribute("physx:enableCCD", Sdf.ValueTypeNames.Bool).Set( + physics_params["enable_ccd"] + ) + prim.CreateAttribute("physx:maxLinearVelocity", Sdf.ValueTypeNames.Float).Set( + physics_params["max_linear_velocity"] + ) + prim.CreateAttribute("physx:maxAngularVelocity", Sdf.ValueTypeNames.Float).Set( + physics_params["max_angular_velocity"] + ) + prim.CreateAttribute( + "physx:solverPositionIterationCount", Sdf.ValueTypeNames.Int + ).Set(physics_params["solver_min_position_iters"]) + prim.CreateAttribute( + "physx:solverVelocityIterationCount", Sdf.ValueTypeNames.Int + ).Set(physics_params["solver_min_velocity_iters"]) + prim.CreateAttribute( + "physx:maxDepenetrationVelocity", Sdf.ValueTypeNames.Float + ).Set(physics_params["max_depenetration_velocity"]) + prim.CreateAttribute("physx:sleepThreshold", Sdf.ValueTypeNames.Float).Set( + physics_params["sleep_threshold"] + ) + + stage.GetRootLayer().Save() + print(f"--- Exported base USD: {output_path} ---") + + +def convert_model_to_usd( + input_path: Union[str, Path], + out_dir: Union[str, Path] = "./output_usd", + physics_params: Optional[Dict[str, float]] = None, +) -> Dict[str, Path]: + """ + Importable conversion entry point. + + Args: + input_path: source .glb / mesh path + out_dir: output directory + physics_params: optional override of DEFAULT_PHYSICS_PARAMS + + Returns: + dict with output paths + """ + input_path = Path(input_path).resolve() + output_dir = Path(out_dir).resolve() + base_name = input_path.stem + + final_params = DEFAULT_PHYSICS_PARAMS.copy() + if physics_params: + final_params.update(physics_params) + + if not input_path.exists(): + raise FileNotFoundError(f"Input file not found: {input_path}") + + with tempfile.TemporaryDirectory() as temp_str: + temp_dir = Path(temp_str) + print(f"\n>>> Processing: {base_name}") + + temp_tex_dir = temp_dir / "textures" + temp_tex_dir.mkdir(parents=True, exist_ok=True) + + temp_base_usd = temp_dir / f"{base_name}_inst_base.usda" + temp_inst_usdc = temp_dir / f"{base_name}_inst.usdc" + temp_usdz = temp_dir / f"{base_name}_inst.usdz" + + mesh_data = parse_glb_with_trimesh(input_path, temp_tex_dir) + build_clean_usd(mesh_data, temp_base_usd, final_params) + + inst_stage = Usd.Stage.CreateNew(str(temp_inst_usdc)) + UsdGeom.SetStageUpAxis(inst_stage, UsdGeom.Tokens.z) + UsdGeom.SetStageMetersPerUnit(inst_stage, 1.0) + + inst_root = UsdGeom.Xform.Define(inst_stage, "/RootNode") + inst_stage.SetDefaultPrim(inst_root.GetPrim()) + inst_root.GetPrim().GetReferences().AddReference(f"./{temp_base_usd.name}") + inst_stage.GetRootLayer().Save() + + UsdUtils.CreateNewUsdzPackage( + Sdf.AssetPath(str(temp_inst_usdc)), str(temp_usdz) + ) + + output_dir.mkdir(parents=True, exist_ok=True) + + shutil.copy2(temp_base_usd, output_dir / temp_base_usd.name) + shutil.copy2(temp_inst_usdc, output_dir / temp_inst_usdc.name) + + if temp_usdz.exists(): + shutil.copy2(temp_usdz, output_dir / temp_usdz.name) + if temp_tex_dir.exists(): + shutil.copytree(temp_tex_dir, output_dir / "textures", dirs_exist_ok=True) + + print(f"\n>>> Pipeline completed successfully: {output_dir}") + + return { + "output_dir": output_dir, + "base_usd": output_dir / temp_base_usd.name, + "inst_usdc": output_dir / temp_inst_usdc.name, + "usdz": output_dir / temp_usdz.name, + "textures_dir": output_dir / "textures", + } + + +def load_physics_from_json(json_path: Optional[Path]) -> Optional[Dict[str, Any]]: + + if not json_path: + return None + + if not json_path.exists(): + print( + f"[Warning] JSON file not found: {json_path}, using default physics params." + ) + return None + + try: + with open(json_path, "r", encoding="utf-8") as f: + json_data = json.load(f) + + physics_data = json_data.get("physics", {}).get("properties", {}).get("data") + + if physics_data and isinstance(physics_data, dict): + print(f"[Info] Successfully loaded physics params from JSON.") + return physics_data + else: + print( + f"[Warning] Invalid JSON structure: missing physics.properties.data, using default params." + ) + return None + + except Exception as e: + print( + f"[Warning] Failed to parse JSON file: {str(e)}, using default physics params." + ) + return None + + +def main(): + parser = argparse.ArgumentParser( + description="3D Assets to USD/USDZ conversion pipeline with full physics support." + ) + parser.add_argument( + "--input", required=True, type=Path, help="Path to the source .glb mesh file." + ) + parser.add_argument( + "--json", + type=Path, + default=None, + help="Path to the metadata JSON file (optional, for physics params).", + ) + parser.add_argument( + "--out_dir", + default=Path("./output_usd"), + type=Path, + help="Target directory for final USD/USDZ assets.", + ) + args = parser.parse_args() + + user_physics_params = load_physics_from_json(args.json) + + convert_model_to_usd( + input_path=args.input, out_dir=args.out_dir, physics_params=user_physics_params + ) + + +if __name__ == "__main__": + main() From 06de01be17e38f15fdec6c42c347cdc54665229f Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Mon, 18 May 2026 15:48:47 +0800 Subject: [PATCH 02/17] clean code --- .../simready_pipeline/configs/gen_config.json | 5 +- .../simready_pipeline/parser/geometry.py | 1 - .../simready_pipeline/pipeline/ingest.py | 1 - .../simready_pipeline/utils/geometry_utils.py | 2 - .../simready_pipeline/utils/simready_utils.py | 344 ------------------ 5 files changed, 2 insertions(+), 351 deletions(-) diff --git a/embodichain/toolkits/simready_pipeline/configs/gen_config.json b/embodichain/toolkits/simready_pipeline/configs/gen_config.json index 85d97390..bb137dbe 100644 --- a/embodichain/toolkits/simready_pipeline/configs/gen_config.json +++ b/embodichain/toolkits/simready_pipeline/configs/gen_config.json @@ -1,7 +1,6 @@ { "ingest": { "canonical_asset_name": "asset.obj", - "canonical_texture_name": "", "unprocessed_formats": [".urdf", ".usd"], "parseable_mesh_formats": [".glb", ".gltf", ".obj", ".ply", ".stl"], "blender_texture_size": 2048, @@ -9,10 +8,10 @@ }, "geometry_cleanup": { "ratio": 0.5, + "weld_distance": 0.0001, "merge_dist": 0.00001, "remove_non_manifold": true, - "triangulate": false, - "fill_hole_sides": 8 + "triangulate": false }, "llm": { "azure_openai": { diff --git a/embodichain/toolkits/simready_pipeline/parser/geometry.py b/embodichain/toolkits/simready_pipeline/parser/geometry.py index af9f348c..ab0d9196 100644 --- a/embodichain/toolkits/simready_pipeline/parser/geometry.py +++ b/embodichain/toolkits/simready_pipeline/parser/geometry.py @@ -107,7 +107,6 @@ def parse(self, asset: Asset, asset_root: Path) -> None: "remove_non_manifold", True ), triangulate=GEOMETRY_CLEANUP_CONFIG.get("triangulate", False), - fill_hole_sides=GEOMETRY_CLEANUP_CONFIG.get("fill_hole_sides", 8), ) try: diff --git a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py index 4d323f7c..b39cf8a9 100644 --- a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py +++ b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py @@ -43,7 +43,6 @@ def _load_ingest_config() -> dict: INGEST_CONFIG = _load_ingest_config() CANOCAIL_ASSET_NAME = INGEST_CONFIG.get("canonical_asset_name", "asset.obj") -CANOCAIL_TEXTURE_NAME = INGEST_CONFIG.get("canonical_texture_name", "") UNPROCESSED_FORMATS = INGEST_CONFIG.get( "unprocessed_formats", [".urdf", ".usd"] ) # 当前先复制,后续可以考虑解析 diff --git a/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py b/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py index 98b6f145..6b4cfeee 100644 --- a/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py +++ b/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py @@ -169,7 +169,6 @@ def process_obj( merge_dist=1e-5, remove_non_manifold=True, triangulate=False, - fill_hole_sides=8, ): clear_scene() objs = load_obj(input_path) @@ -188,7 +187,6 @@ def process_obj( triangulate=triangulate, ) decimate_optimized(obj, ratio=ratio, weld_distance=weld_distance) - # fill_holes(obj, max_sides=fill_hole_sides) export_obj(obj, output_path) print("Clean mesh saved to:", output_path) diff --git a/embodichain/toolkits/simready_pipeline/utils/simready_utils.py b/embodichain/toolkits/simready_pipeline/utils/simready_utils.py index 0c5767b5..7542732d 100644 --- a/embodichain/toolkits/simready_pipeline/utils/simready_utils.py +++ b/embodichain/toolkits/simready_pipeline/utils/simready_utils.py @@ -134,135 +134,6 @@ def compute_support_area(mesh, eps=1e-2): return 0.0 -# def init_pose(mesh_input): -# fallback_mesh = None -# mesh: trimesh.Trimesh = None -# if isinstance(mesh_input, trimesh.Trimesh): -# mesh = mesh_input -# fallback_mesh = mesh_input.copy() -# else: -# mesh_path = Path(mesh_input).resolve() -# if not mesh_path.exists(): -# raise FileNotFoundError(f"Mesh file not found: {mesh_path}") -# mesh = trimesh.load(mesh_path, force="mesh") -# fallback_mesh = mesh.copy() - -# def compute_pca_axes(mesh): -# verts = np.asarray(mesh.vertices) -# centroid = verts.mean(axis=0) -# centered = verts - centroid -# cov = np.cov(centered.T) -# U, _, _ = np.linalg.svd(cov) -# R = U -# if np.linalg.det(R) < 0: -# R[:, 2] *= -1 -# return R - -# def closest_axis(v): -# idx = np.argmax(np.abs(v)) -# sign = np.sign(v[idx]) -# axis = np.zeros(3) -# axis[idx] = sign -# return axis - -# def generate_discrete_flips(): -# rotations = [] -# Rx90 = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) - -# Ry90 = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]]) - -# I = np.eye(3) -# rotations.append(I) -# Rx180 = np.dot(Rx90, Rx90) -# rotations.append(Rx180) -# rotations.append(Rx90) -# Rx_neg90 = Rx90.T -# rotations.append(Rx_neg90) -# rotations.append(Ry90) -# Ry_neg90 = Ry90.T -# rotations.append(Ry_neg90) -# return rotations - -# def _process_and_score(initial_mesh, alignment_type: str): -# m = initial_mesh.copy() -# if alignment_type == "pca": -# R_pca = compute_pca_axes(m) -# T = np.eye(4) -# T[:3, :3] = R_pca.T -# m.apply_transform(T) -# U = compute_pca_axes(m) -# x_axis, y_axis, z_axis = U[:, 0], U[:, 1], U[:, 2] -# new_x = closest_axis(x_axis) -# new_y = closest_axis(y_axis) -# new_z = closest_axis(z_axis) -# new_z /= np.linalg.norm(new_z) -# new_x = new_x - new_z * np.dot(new_x, new_z) -# new_x /= np.linalg.norm(new_x) -# new_y = np.cross(new_z, new_x) -# R_snap = np.column_stack((new_x, new_y, new_z)) -# T_snap = np.eye(4) -# T_snap[:3, :3] = R_snap -# m.apply_transform(T_snap) - -# elif alignment_type == "obb": -# to_origin, _ = trimesh.bounds.oriented_bounds(m) -# m.apply_transform(to_origin) -# R = to_origin[:3, :3] -# if np.linalg.det(R) < 0: -# fix = np.eye(4) -# fix[2, 2] = -1 -# m.apply_transform(fix) -# else: -# raise ValueError(f"Unknown alignment type: {alignment_type}") - -# best_score = float("inf") -# best_mesh = None - -# for R_flip in generate_discrete_flips(): -# m_candidate = m.copy() - -# T_flip = np.eye(4) -# T_flip[:3, :3] = R_flip -# m_candidate.apply_transform(T_flip) - -# z_min = m_candidate.bounds[0][2] -# m_candidate.apply_translation([0, 0, -z_min]) - -# area = compute_support_area(m_candidate) -# height = m.bounds[1][2] - m.bounds[0][2] -# score = height - 0.1 * area - -# if score < best_score: -# best_score = score -# best_mesh = m_candidate - -# return best_mesh, best_score - -# try: -# mesh_original = mesh.copy() -# final_mesh_pca, score_pca = _process_and_score(mesh_original, "pca") -# final_mesh_obb, score_obb = _process_and_score(mesh_original, "obb") -# if score_pca <= score_obb: -# print( -# f"Selected PCA alignment (Score: {score_pca:.4f} vs OBB: {score_obb:.4f})" -# ) -# result_mesh = final_mesh_pca -# STRATEGY = "PCA" -# else: -# print( -# f"Selected OBB alignment (Score: {score_obb:.4f} vs PCA: {score_pca:.4f})" -# ) -# result_mesh = final_mesh_obb -# STRATEGY = "OBB" - -# normalize_to_unit_cube(result_mesh) -# return result_mesh - -# except Exception as e: -# print(f"Alignment failed, fallback. Error: {e}") -# return fallback_mesh - - import numpy as np import trimesh from pathlib import Path @@ -652,221 +523,6 @@ def ask_mllm_primary_surface( return extract_json(raw) -# def ask_mllm_primary_surface( -# views_data, -# object_name="None", -# main_surface="None", -# orientation_requirement="None", -# extra_text="", -# ): -# instruction_text = f""" -# You are a single-purpose multimodal classifier. -# You will be given 6 images of one object rendered from different viewpoints. - -# Your task is to identify the ONE image that best shows the object's PRIMARY FUNCTIONAL SURFACE. - -# DEFINITION OF PRIMARY FUNCTIONAL SURFACE: -# - The surface that a human would normally face, look at, press, open, aim at, read from, or interact with in standard use. -# - It is the object's main semantic front / operating face, not merely the largest visible face. -# - It is usually the surface that carries the core function of the object. -# - It must be a unique, human-facing, functional surface. -# - Do NOT choose a support/base/bottom surface. -# - Do NOT choose a random side that happens to be large or clear if it is not the functional face. -# - Do NOT choose a view just because it shows more pixels of the object. - -# HARD PRIORITY RULE: -# - If the object is a laptop, monitor, smartphone, tablet, camera, TV, oven, speaker, microwave, printer, or similar device, the view showing the FRONT FUNCTIONAL FACE must be preferred. -# - For a laptop specifically: the SCREEN VIEW is the primary surface. The keyboard view is secondary and should NOT be chosen unless the screen is not visible at all and the object is not truly a laptop-like clamshell device. -# - For a monitor / display: choose the view where the screen face is most directly visible. -# - For a smartphone / tablet: choose the view where the screen/front glass is most directly visible. -# - For a camera: choose the lens/front face. -# - For a speaker: choose the grille/front face. -# - For an oven / microwave / cabinet / box with a door: choose the door/front panel face. -# - For a brush / broom / tool with a working head: choose the working head or active end if that is the main interaction face. -# - For objects with a clear labeled front, controls, opening, display, nozzle, spout, or intake/output face, choose that face. - -# SELECTION HEURISTICS: -# Prefer the image where the functional surface is: -# 1. most directly facing the camera -# 2. most centered and unobstructed -# 3. most clearly identifiable as the object's real-world front / operating face -# 4. supported by the object's category - -# DO NOT BE MISLED BY: -# - top-down views that expose a larger area but are not the real functional front -# - keyboard surfaces on laptops when the screen is available -# - backs, undersides, bases, or support faces -# - symmetric surfaces with no special functional meaning -# - views that show more geometry but less semantic evidence - -# Additional guidance based on prior classification: -# - Detected object: {object_name} -# - Possible main surface: {main_surface} - -# How to use the additional guidance: -# - If main_surface is not "None", use it as a strong hint for what the functional face is. -# - If orientation_requirement is not "None", use it to infer whether the functional face should be face_up, face_forward, or face_down in normal use. -# - If the hint conflicts with clear visual evidence, trust the images. -# - If the object is category 2, prefer the image that best matches the expected real-world functional orientation, not merely the most visible surface in the render. - -# OUTPUT FORMAT: -# Return only one valid JSON object with exactly one field: - -# {{ -# "primary_surface_view": string or null -# }} - -# RULES: -# - The value must be one of the provided image IDs / view names exactly as given in the input. -# - If the object has no clear primary functional surface, return null. -# - If multiple views show the same surface, choose the one that shows it most directly and most clearly. -# - Do not add any extra text, explanation, markdown, or comments. -# - The JSON must be syntactically valid and parseable. - -# IMPORTANT DECISION EXAMPLES: -# - Laptop -> choose the screen view, not the keyboard view. -# - Monitor -> choose the screen/front view. -# - Smartphone -> choose the screen/front-glass view. -# - Camera -> choose the lens/front view. -# - Speaker -> choose the front grille view. -# - Microwave/Oven -> choose the front door/panel view. -# - Brush -> choose the working head / bristle side if that is the functional face. -# - Broom -> choose the sweeping head side if it is the functional face. - -# You are selecting the view that best reveals the object's real functional front, not the view that simply shows the object most fully. -# Use the six views together, plus any Additional context text: -# {extra_text if extra_text else "None"} -# """ - -# content = [{"type": "text", "text": instruction_text}] - -# if extra_text and extra_text.strip(): -# content.append( -# {"type": "text", "text": f"Additional context: {extra_text.strip()}"} -# ) - -# content.extend(build_image_inputs(views_data)) -# resp = client.chat.completions.create( -# model=DEPLOYMENT, -# temperature=0.0, -# messages=[{"role": "user", "content": content}], -# ) -# raw = resp.choices[0].message.content -# return extract_json(raw) - - -# def ask_mllm_primary_surface( -# views_data, -# object_name="None", -# main_surface="None", -# orientation_requirement="None", -# extra_text="", -# ): -# instruction_text = f""" -# You are a single-purpose multimodal classifier. -# You will be given 6 images of one object rendered from different viewpoints. - -# Your task is to identify the ONE image that best shows the object's PRIMARY FUNCTIONAL SURFACE. - -# CRITICAL TARGET: -# - The correct image is the one where the object's primary surface is DIRECTLY FACING THE VIEWER. -# - In other words, the surface should be as FRONT-FACING / FACE-ON / HEAD-ON as possible. -# - The main surface should look like the object's true front face in the rendered view. -# - Do NOT choose a view where the surface is merely visible from an angle. -# - Do NOT choose a top, side, or bottom view unless that is truly the object's normal front-facing functional face. -# - The goal is to select the image that a human would naturally use when looking straight at the object from its functional front. - -# DEFINITION OF PRIMARY FUNCTIONAL SURFACE: -# - The surface that a human would normally face, look at, press, open, aim at, read from, or interact with in standard use. -# - It is the object's main semantic front / operating face, not merely the largest visible face. -# - It is usually the surface that carries the core function of the object. -# - It must be a unique, human-facing, functional surface. -# - It must be the surface that is most naturally "facing us" in normal use. -# - Do NOT choose a support/base/bottom surface. -# - Do NOT choose a random side that happens to be large or clear if it is not the functional face. -# - Do NOT choose a view just because it shows more pixels of the object. - -# HARD PRIORITY RULE: -# - If the object is a laptop, monitor, smartphone, tablet, camera, TV, oven, speaker, microwave, printer, or similar device, the view showing the FRONT FUNCTIONAL FACE must be preferred. -# - For a laptop specifically: the SCREEN VIEW is the primary surface. The keyboard view is secondary and should NOT be chosen unless the screen is not visible at all and the object is not truly a laptop-like clamshell device. -# - For a monitor / display: choose the view where the screen face is most directly visible and most face-on. -# - For a smartphone / tablet: choose the view where the screen/front glass is most directly visible and most face-on. -# - For a camera: choose the lens/front face that is most directly facing the viewer. -# - For a speaker: choose the grille/front face that is most directly facing the viewer. -# - For an oven / microwave / cabinet / box with a door: choose the door/front panel face that is most directly facing the viewer. -# - For a brush / broom / tool with a working head: choose the working head or active end if that is the main interaction face, but only if it is the face that would be naturally presented toward the user. -# - For objects with a clear labeled front, controls, opening, display, nozzle, spout, or intake/output face, choose the image where that face is most directly facing the viewer. - -# SELECTION HEURISTICS: -# Prefer the image where the functional surface is: -# 1. most directly facing the camera / viewer -# 2. least foreshortened -# 3. most centered and unobstructed -# 4. most clearly identifiable as the object's real-world front / operating face -# 5. most consistent with the object's category - -# DO NOT BE MISLED BY: -# - top-down views that expose a larger area but are not the real front face -# - keyboard surfaces on laptops when the screen is available -# - backs, undersides, bases, or support faces -# - symmetric surfaces with no special functional meaning -# - views that show more geometry but less semantic evidence -# - views where the right surface exists but is shown at an angle instead of facing us directly - -# Additional guidance based on prior classification: -# - Detected object: {object_name} -# - Possible main surface: {main_surface} - -# How to use the additional guidance: -# - If main_surface is not "None", use it as a strong hint for what the functional face is. -# - If the hint conflicts with clear visual evidence, trust the images. - -# OUTPUT FORMAT: -# Return only one valid JSON object with exactly one field: - -# {{ -# "primary_surface_view": string or null -# }} - -# RULES: -# - The value must be one of the provided image IDs / view names exactly as given in the input. -# - If the object has no clear primary functional surface, return null. -# - If multiple views show the same surface, choose the one where that surface is most directly facing the viewer and most face-on. -# - Do not add any extra text, explanation, markdown, or comments. -# - The JSON must be syntactically valid and parseable. - -# IMPORTANT DECISION EXAMPLES: -# - Laptop -> choose the screen view, not the keyboard view. -# - Monitor -> choose the screen/front view. -# - Smartphone -> choose the screen/front-glass view. -# - Camera -> choose the lens/front view. -# - Speaker -> choose the front grille view. -# - Microwave/Oven -> choose the front door/panel view. -# - Brush -> choose the working head / bristle side if that is the functional face, but only when it is the face most directly facing the viewer. -# - Broom -> choose the sweeping head side if it is the functional face, but only when it is the face most directly facing the viewer. - -# You are selecting the view that best reveals the object's real functional front as a face-on view, not the view that simply shows the object most fully. -# Use the six views together, plus any Additional context text: -# {extra_text if extra_text else "None"} -# """ - -# content = [{"type": "text", "text": instruction_text}] - -# if extra_text and extra_text.strip(): -# content.append( -# {"type": "text", "text": f"Additional context: {extra_text.strip()}"} -# ) - -# content.extend(build_image_inputs(views_data)) -# resp = client.chat.completions.create( -# model=DEPLOYMENT, -# temperature=0.0, -# messages=[{"role": "user", "content": content}], -# ) -# raw = resp.choices[0].message.content -# return extract_json(raw) - - def ask_llm_upright_2a1(object_name, upright_img_path, flipped_img_path): for p in [upright_img_path, flipped_img_path]: img_path = Path(p).resolve() From 6148c218866364ab96472a42b2619858704d2bf2 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Mon, 18 May 2026 15:53:03 +0800 Subject: [PATCH 03/17] clean code --- embodichain/toolkits/simready_pipeline/utils/texture_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/embodichain/toolkits/simready_pipeline/utils/texture_utils.py b/embodichain/toolkits/simready_pipeline/utils/texture_utils.py index a9ef99b9..7a2898e8 100644 --- a/embodichain/toolkits/simready_pipeline/utils/texture_utils.py +++ b/embodichain/toolkits/simready_pipeline/utils/texture_utils.py @@ -19,7 +19,6 @@ from typing import Any, Dict, List, Optional, Tuple import trimesh - PBR_TEXTURE_FIELDS = ( "baseColorTexture", "metallicRoughnessTexture", From 8624b49cd1d3b21d41482800325242abcea5a608 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Wed, 20 May 2026 14:18:17 +0800 Subject: [PATCH 04/17] add bake and remesh --- .../simready_pipeline/configs/gen_config.json | 6 +- .../simready_pipeline/pipeline/ingest.py | 7 + .../simready_pipeline/utils/ingest_utils.py | 351 ++++++++---------- 3 files changed, 176 insertions(+), 188 deletions(-) diff --git a/embodichain/toolkits/simready_pipeline/configs/gen_config.json b/embodichain/toolkits/simready_pipeline/configs/gen_config.json index bb137dbe..47a060a3 100644 --- a/embodichain/toolkits/simready_pipeline/configs/gen_config.json +++ b/embodichain/toolkits/simready_pipeline/configs/gen_config.json @@ -4,7 +4,11 @@ "unprocessed_formats": [".urdf", ".usd"], "parseable_mesh_formats": [".glb", ".gltf", ".obj", ".ply", ".stl"], "blender_texture_size": 2048, - "blender_texture_name": "surface_texture.png" + "blender_texture_name": "surface_texture.png", + "blender_remesh_bake": { + "voxel_size": 0.01, + "decimate_ratio": 0.5 + } }, "geometry_cleanup": { "ratio": 0.5, diff --git a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py index b39cf8a9..3bcc4497 100644 --- a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py +++ b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py @@ -52,6 +52,11 @@ def _load_ingest_config() -> dict: tex_size: int = int(INGEST_CONFIG.get("blender_texture_size", 2048)) png_name: str = INGEST_CONFIG.get("blender_texture_name", "surface_texture.png") +BLENDER_REMESH_BAKE_CONFIG = INGEST_CONFIG.get("blender_remesh_bake", {}) +voxel_size: float = float(BLENDER_REMESH_BAKE_CONFIG.get("voxel_size", 0.01)) +decimate_ratio: float = float( + BLENDER_REMESH_BAKE_CONFIG.get("decimate_ratio", 0.5) +) def ingest_one_asset( @@ -116,6 +121,8 @@ def find_first_mesh_file(files, formats): asset_source, texture_size=tex_size, png_name=png_name, + voxel_size=voxel_size, + decimate_ratio=decimate_ratio, obj_name=CANOCAIL_ASSET_NAME, ) diff --git a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py b/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py index 22c70094..98a5f48a 100644 --- a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py +++ b/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py @@ -143,6 +143,7 @@ def trimesh_parse_ingest( asset_source: Path, obj_name: str = "asset.obj", mtl_name: str = "asset.mtl", + write_files: bool = True, ): mesh = load_one_trimesh(source_file) if mesh is None: @@ -216,27 +217,28 @@ def trimesh_parse_ingest( else: print("[WARN] Unknown visual type → export raw") - obj_str, tex_dict = trimesh.exchange.obj.export_obj( - mesh, - include_normals=True, - include_color=True, - include_texture=True, - return_texture=True, - write_texture=False, - mtl_name=mtl_name, - ) + if write_files: + obj_str, tex_dict = trimesh.exchange.obj.export_obj( + mesh, + include_normals=True, + include_color=True, + include_texture=True, + return_texture=True, + write_texture=False, + mtl_name=mtl_name, + ) - # ===== 写 OBJ ===== - with open(obj_path, "w") as f: - f.write(obj_str) + # ===== 写 OBJ ===== + with open(obj_path, "w") as f: + f.write(obj_str) - # ===== 写 texture / mtl ===== - for name, data in tex_dict.items(): - file_path = asset_source / name + # ===== 写 texture / mtl ===== + for name, data in tex_dict.items(): + file_path = asset_source / name - if not file_path.exists(): - with open(file_path, "wb") as f: - f.write(data) + if not file_path.exists(): + with open(file_path, "wb") as f: + f.write(data) return {"visual_ingest": visual_ingest, "visual_source": visual} @@ -244,204 +246,179 @@ def trimesh_parse_ingest( import bpy -def clear_scene(): - bpy.ops.object.select_all(action="SELECT") - bpy.ops.object.delete(use_global=False, confirm=False) - for block in ( - bpy.data.meshes, - bpy.data.materials, - bpy.data.images, - bpy.data.collections, - ): - for item in list(block): - try: - block.remove(item) - except: - pass +def modify_mtl_file(mtl_path: Path, diffuse_name: str, normal_name: str) -> None: + """Modify an exported OBJ .mtl to reference baked textures.""" + mtl_path = Path(mtl_path) + if not mtl_path.exists(): + return + lines = mtl_path.read_text(encoding="utf-8", errors="ignore").splitlines(True) -def import_model(path: Path): - ext = path.suffix.lower() + new_lines = [] + for line in lines: + if line.startswith("Ns "): + new_lines.append("Ns 500.000000\n") + elif line.startswith("Ka "): + new_lines.append("Ka 1.000000 1.000000 1.000000\n") + elif line.startswith("Ks "): + new_lines.append("Ks 0.500000 0.500000 0.500000\n") + else: + new_lines.append(line) - if ext == ".obj": - bpy.ops.wm.obj_import(filepath=str(path)) - elif ext in [".fbx"]: - bpy.ops.import_scene.fbx(filepath=str(path)) - elif ext in [".gltf", ".glb"]: - bpy.ops.import_scene.gltf(filepath=str(path)) - elif ext in [".ply"]: - bpy.ops.wm.ply_import(filepath=str(path)) - else: - raise RuntimeError(f"Unsupported extension: {ext}") + new_lines.append(f"map_Kd {diffuse_name}\n") + new_lines.append(f"map_Bump {normal_name}\n") + new_lines.append(f"bump {normal_name} -bm 1.0\n") - imported = [o for o in bpy.context.scene.objects if o.type == "MESH"] - return imported + mtl_path.write_text("".join(new_lines), encoding="utf-8") -def setup_studio_lighting(): - scene = bpy.context.scene - scene.render.engine = "CYCLES" - cycles = scene.cycles - cycles.samples = 128 - cycles.use_adaptive_sampling = True - - world = scene.world or bpy.data.worlds.new("World") - scene.world = world - world.use_nodes = True - nodes = world.node_tree.nodes - nodes.clear() +def blender_remesh_bake( + source_file: Path, + asset_source: Path, + texture_size: int = 2048, + png_name: str = "surface_texture.png", + voxel_size: float = 0.01, + decimate_ratio: float = 0.5, + obj_name: str = "asset.obj", +): + """Remesh a high-poly mesh into a low-poly one and bake textures via Blender.""" + asset_source = Path(asset_source) + asset_source.mkdir(parents=True, exist_ok=True) + source_file = Path(source_file) - bg = nodes.new(type="ShaderNodeBackground") - bg.inputs["Color"].default_value = (0.8, 0.8, 0.8, 1.0) - out = nodes.new(type="ShaderNodeOutputWorld") - world.node_tree.links.new(bg.outputs["Background"], out.inputs["Surface"]) + bpy.ops.wm.read_factory_settings(use_empty=True) + ext = source_file.suffix.lower() + if ext == ".obj": + bpy.ops.wm.obj_import(filepath=str(source_file)) + elif ext == ".fbx": + bpy.ops.import_scene.fbx(filepath=str(source_file)) + elif ext in [".gltf", ".glb"]: + bpy.ops.import_scene.gltf(filepath=str(source_file)) + elif ext == ".ply": + bpy.ops.wm.ply_import(filepath=str(source_file)) + else: + raise RuntimeError(f"Unsupported extension: {ext}") -def duplicate_and_join(objs, name="BAKE_MESH"): - if not objs: - return None - bpy.ops.object.select_all(action="DESELECT") - for o in objs: - o.select_set(True) - bpy.context.view_layer.objects.active = objs[0] - bpy.ops.object.duplicate() - dupes = [o for o in bpy.context.selected_objects if o.type == "MESH"] - bpy.context.view_layer.objects.active = dupes[0] - bpy.ops.object.join() - joined = bpy.context.active_object - joined.name = name - return joined - - -def ensure_uv(obj): - me = obj.data - if len(me.uv_layers) == 0: - bpy.context.view_layer.objects.active = obj - bpy.ops.object.mode_set(mode="EDIT") - bpy.ops.mesh.select_all(action="SELECT") - bpy.ops.uv.smart_project(angle_limit=66.0, island_margin=0.02) + if bpy.ops.object.mode_set.poll(): bpy.ops.object.mode_set(mode="OBJECT") + imported_meshes = [obj for obj in bpy.context.scene.objects if obj.type == "MESH"] + if not imported_meshes: + raise RuntimeError("No mesh object after import") -def get_vertex_color_layer(obj): - me = obj.data - if hasattr(me, "color_attributes") and len(me.color_attributes) > 0: - return me.color_attributes.active_color.name - return None - - -def inject_vertex_color_to_material(mat, vcol_name): - if not mat.use_nodes: - mat.use_nodes = True - nodes = mat.node_tree.nodes - links = mat.node_tree.links - - pnode = next((n for n in nodes if n.type == "BSDF_PRINCIPLED"), None) - if not pnode: - pnode = nodes.new(type="ShaderNodeBsdfPrincipled") - - attr = nodes.new(type="ShaderNodeAttribute") - attr.attribute_name = vcol_name - links.new(attr.outputs["Color"], pnode.inputs["Base Color"]) - - -def add_bake_image_node(mat, image): - if not mat.use_nodes: - mat.use_nodes = True - nodes = mat.node_tree.nodes - - img_node = nodes.new(type="ShaderNodeTexImage") - img_node.image = image - img_node.name = "BAKE_TARGET" + bpy.ops.object.select_all(action="DESELECT") + for obj in imported_meshes: + obj.select_set(True) + bpy.context.view_layer.objects.active = imported_meshes[0] - nodes.active = img_node - img_node.select = True - return img_node + if len(imported_meshes) > 1: + bpy.ops.object.join() + high_poly = bpy.context.view_layer.objects.active + if not high_poly or high_poly.type != "MESH": + raise RuntimeError("No active mesh object after import") + high_poly.name = "High_Poly" + auto_extrusion = max(high_poly.dimensions) * 0.05 -def create_baked_material_assign(obj, image, mat_name="BAKED_SURFACE_MAT"): - mat = bpy.data.materials.new(name=mat_name) + bpy.ops.object.select_all(action="DESELECT") + high_poly.select_set(True) + bpy.context.view_layer.objects.active = high_poly + bpy.ops.object.duplicate() + low_poly = bpy.context.active_object + if not low_poly: + raise RuntimeError("Failed to duplicate object") + low_poly.name = "Low_Poly_Target" + try: + low_poly.data.materials.clear() + except Exception: + pass + + rem = low_poly.modifiers.new(name="Remesh", type="REMESH") + rem.mode = "VOXEL" + rem.voxel_size = max(float(voxel_size), max(high_poly.dimensions) * 0.005) + rem.use_smooth_shade = True + bpy.ops.object.modifier_apply(modifier="Remesh") + + dec = low_poly.modifiers.new(name="Decimate", type="DECIMATE") + dec.ratio = float(decimate_ratio) + bpy.ops.object.modifier_apply(modifier="Decimate") + + bpy.context.view_layer.objects.active = low_poly + bpy.ops.object.mode_set(mode="EDIT") + bpy.ops.mesh.select_all(action="SELECT") + bpy.ops.uv.smart_project(angle_limit=66.0, island_margin=0.02) + bpy.ops.object.mode_set(mode="OBJECT") + + mat = bpy.data.materials.new(name="BakeMat") mat.use_nodes = True + low_poly.data.materials.append(mat) nodes = mat.node_tree.nodes nodes.clear() - img_node = nodes.new(type="ShaderNodeTexImage") - img_node.image = image - bsdf = nodes.new(type="ShaderNodeBsdfPrincipled") - out = nodes.new(type="ShaderNodeOutputMaterial") - - mat.node_tree.links.new(img_node.outputs["Color"], bsdf.inputs["Base Color"]) - mat.node_tree.links.new(bsdf.outputs["BSDF"], out.inputs["Surface"]) - - if obj.data.materials: - obj.data.materials[0] = mat - else: - obj.data.materials.append(mat) - return mat + def setup_node(name: str, is_color: bool): + img = bpy.data.images.new( + name, width=int(texture_size), height=int(texture_size) + ) + node = nodes.new("ShaderNodeTexImage") + node.image = img + if not is_color: + img.colorspace_settings.name = "Non-Color" + return node, img + diff_node, diff_img = setup_node("diffuse.png", True) + norm_node, norm_img = setup_node("normal.png", False) -# ------------------------- -# Main bake routine -# ------------------------- -def blender_parser_ingest( - source_file: Path, - asset_source: Path, - texture_size=2048, - png_name="surface_texture.png", - obj_name="asset.obj", -): - asset_source.mkdir(parents=True, exist_ok=True) - png_path = asset_source / png_name + scene = bpy.context.scene + scene.render.engine = "CYCLES" + scene.render.bake.use_selected_to_active = True + scene.render.bake.cage_extrusion = auto_extrusion - clear_scene() - imported = import_model(source_file) - if not imported: - raise RuntimeError("No mesh objects found after import.") + bpy.ops.object.select_all(action="DESELECT") + high_poly.select_set(True) + low_poly.select_set(True) + bpy.context.view_layer.objects.active = low_poly - setup_studio_lighting() - joined = duplicate_and_join(imported, name="BAKE_MESH") + nodes.active = diff_node + bpy.ops.object.bake(type="DIFFUSE", pass_filter={"COLOR"}) + diff_img.filepath_raw = str(asset_source / "diffuse.png") + diff_img.save() - bpy.context.view_layer.objects.active = joined - bpy.ops.object.transform_apply(location=True, rotation=True, scale=True) - ensure_uv(joined) + nodes.active = norm_node + bpy.ops.object.bake(type="NORMAL") + norm_img.filepath_raw = str(asset_source / "normal.png") + norm_img.save() - vcol_name = get_vertex_color_layer(joined) + export_path = asset_source / obj_name + bpy.ops.object.select_all(action="DESELECT") + low_poly.select_set(True) + bpy.ops.wm.obj_export(filepath=str(export_path), export_selected_objects=True) - img_name = Path(png_name).stem - bake_image = bpy.data.images.new( - img_name, width=int(texture_size), height=int(texture_size) - ) + mtl_path = asset_source / Path(obj_name).with_suffix(".mtl").name + modify_mtl_file(mtl_path, "diffuse.png", "normal.png") - if not joined.data.materials: - tmp_mat = bpy.data.materials.new(name="Bake_Temp_Material") - joined.data.materials.append(tmp_mat) + return { + "png": str(asset_source / "diffuse.png"), + "obj": str(export_path), + "mtl": str(mtl_path.name), + } - for slot in joined.material_slots: - if slot.material: - if vcol_name: - inject_vertex_color_to_material(slot.material, vcol_name) - add_bake_image_node(slot.material, bake_image) - bpy.context.scene.render.engine = "CYCLES" - bpy.ops.object.select_all(action="DESELECT") - joined.select_set(True) - bpy.context.view_layer.objects.active = joined - - print("Baking...") - bpy.ops.object.bake( - type="DIFFUSE", - pass_filter={"COLOR"}, - use_clear=True, - use_selected_to_active=False, - margin=16, +def blender_parse_ingest(source_file: Path, asset_source: Path, **kwargs): + res = blender_remesh_bake( + source_file=source_file, + asset_source=asset_source, + **kwargs, ) + try: + asset_obj = Path(res["obj"]) + vis = trimesh_parse_ingest(asset_obj, asset_source, write_files=False) + if isinstance(vis, dict): + res.update(vis) + except Exception: + pass + return res - bake_image.filepath_raw = str(png_path) - bake_image.save() - - create_baked_material_assign(joined, bake_image) - - out_obj = asset_source / obj_name - bpy.ops.wm.obj_export(filepath=str(out_obj), export_selected_objects=True) - return {"png": str(png_path), "obj": str(out_obj), "mtl": "asset.mtl"} +def blender_parser_ingest(source_file: Path, asset_source: Path, **kwargs): + return blender_parse_ingest(source_file, asset_source, **kwargs) From d57e0ae5fe9bc5fb0b6e7c202cae1309e609d4e8 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Wed, 20 May 2026 14:36:00 +0800 Subject: [PATCH 05/17] fix review bugs --- .../toolkits/simready_pipeline/__init__.py | 19 +++++++++ .../simready_pipeline/cli/__init__.py | 19 +++++++++ .../simready_pipeline/configs/__init__.py | 19 +++++++++ .../simready_pipeline/core/__init__.py | 19 +++++++++ .../toolkits/simready_pipeline/io/__init__.py | 19 +++++++++ .../simready_pipeline/io/json_store.py | 42 +++++++++++++------ .../simready_pipeline/parser/__init__.py | 19 +++++++++ .../simready_pipeline/pipeline/__init__.py | 19 +++++++++ .../simready_pipeline/pipeline/ingest.py | 4 +- .../simready_pipeline/utils/__init__.py | 19 +++++++++ .../simready_pipeline/utils/simready_utils.py | 2 + 11 files changed, 185 insertions(+), 15 deletions(-) create mode 100644 embodichain/toolkits/simready_pipeline/__init__.py create mode 100644 embodichain/toolkits/simready_pipeline/cli/__init__.py create mode 100644 embodichain/toolkits/simready_pipeline/configs/__init__.py create mode 100644 embodichain/toolkits/simready_pipeline/core/__init__.py create mode 100644 embodichain/toolkits/simready_pipeline/io/__init__.py create mode 100644 embodichain/toolkits/simready_pipeline/parser/__init__.py create mode 100644 embodichain/toolkits/simready_pipeline/pipeline/__init__.py create mode 100644 embodichain/toolkits/simready_pipeline/utils/__init__.py diff --git a/embodichain/toolkits/simready_pipeline/__init__.py b/embodichain/toolkits/simready_pipeline/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/toolkits/simready_pipeline/cli/__init__.py b/embodichain/toolkits/simready_pipeline/cli/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/cli/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/toolkits/simready_pipeline/configs/__init__.py b/embodichain/toolkits/simready_pipeline/configs/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/configs/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/toolkits/simready_pipeline/core/__init__.py b/embodichain/toolkits/simready_pipeline/core/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/core/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/toolkits/simready_pipeline/io/__init__.py b/embodichain/toolkits/simready_pipeline/io/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/io/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/toolkits/simready_pipeline/io/json_store.py b/embodichain/toolkits/simready_pipeline/io/json_store.py index 2ee2b828..379c6f97 100644 --- a/embodichain/toolkits/simready_pipeline/io/json_store.py +++ b/embodichain/toolkits/simready_pipeline/io/json_store.py @@ -16,7 +16,7 @@ import json from pathlib import Path -from typing import Dict, Optional +from typing import Any, Optional from embodichain.toolkits.simready_pipeline.core.asset import Asset @@ -26,16 +26,39 @@ class JsonStore: Simple JSON-based store for Assets and a global registry. """ - def __init__(self, root_dir: str): + def __init__(self, root_dir: str | Path): self.root = Path(root_dir) + self.registry_path = self.root / "registry.json" def _get_asset_json_path(self, asset_id: str) -> Path: return self.root / asset_id / "asset.json" + def load_registry(self) -> dict[str, Any]: + if not self.registry_path.exists(): + return {"assets": {}} + + registry = json.loads(self.registry_path.read_text()) + registry.setdefault("assets", {}) + return registry + + def _write_registry(self, registry: dict[str, Any]) -> None: + self.registry_path.parent.mkdir(parents=True, exist_ok=True) + self.registry_path.write_text(json.dumps(registry, indent=2)) + + def _register_asset(self, asset_id: str, asset_json: dict[str, Any]) -> None: + registry = self.load_registry() + registry["assets"][asset_id] = { + "path": str(self.root / asset_id), + "category": asset_json.get("identity", {}).get("category"), + } + self._write_registry(registry) + def save_asset(self, asset: Asset) -> None: asset_path = self._get_asset_json_path(asset.asset_id) asset_path.parent.mkdir(parents=True, exist_ok=True) - asset_path.write_text(json.dumps(asset.to_dict(), indent=2)) + asset_json = asset.to_dict() + asset_path.write_text(json.dumps(asset_json, indent=2)) + self._register_asset(asset.asset_id, asset_json) def load_asset(self, asset_id: str) -> Optional[Asset]: asset_path = self._get_asset_json_path(asset_id) @@ -44,20 +67,13 @@ def load_asset(self, asset_id: str) -> Optional[Asset]: data = json.loads(asset_path.read_text()) return Asset.from_dict(data) - def write_asset(self, asset_id: str, asset_json: dict) -> None: + def write_asset(self, asset_id: str, asset_json: dict[str, Any]) -> None: asset_root = self.root / asset_id - asset_root.mkdir(exist_ok=True) + asset_root.mkdir(parents=True, exist_ok=True) asset_path = asset_root / "asset.json" asset_path.write_text(json.dumps(asset_json, indent=2)) - - registry = json.loads(self.registry_path.read_text()) - registry["assets"][asset_id] = { - "path": str(asset_root), - "category": asset_json["identity"]["category"], - } - - self.registry_path.write_text(json.dumps(registry, indent=2)) + self._register_asset(asset_id, asset_json) def list_asset_ids(self) -> list[str]: registry = self.load_registry() diff --git a/embodichain/toolkits/simready_pipeline/parser/__init__.py b/embodichain/toolkits/simready_pipeline/parser/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/parser/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/toolkits/simready_pipeline/pipeline/__init__.py b/embodichain/toolkits/simready_pipeline/pipeline/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/pipeline/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py index 3bcc4497..90f71100 100644 --- a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py +++ b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py @@ -73,7 +73,7 @@ def ingest_one_asset( output_root = Path(output_root) output_root.mkdir(parents=True, exist_ok=True) - asset_id = "assets" + asset_id = new_uuid() asset_root = output_root / asset_id asset_root.mkdir(parents=True, exist_ok=False) @@ -97,7 +97,7 @@ def find_first_mesh_file(files, formats): candidates = sorted(p for p in files if p.suffix.lower() == suffix) if candidates: return candidates[0] - return RuntimeError("No Vailed Mesh File") + raise RuntimeError("No Valid Mesh File") if has_unprocessed_format: source_file = None diff --git a/embodichain/toolkits/simready_pipeline/utils/__init__.py b/embodichain/toolkits/simready_pipeline/utils/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/toolkits/simready_pipeline/utils/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/toolkits/simready_pipeline/utils/simready_utils.py b/embodichain/toolkits/simready_pipeline/utils/simready_utils.py index 7542732d..c0248b3c 100644 --- a/embodichain/toolkits/simready_pipeline/utils/simready_utils.py +++ b/embodichain/toolkits/simready_pipeline/utils/simready_utils.py @@ -1174,6 +1174,7 @@ def process_mesh(file, name=None, extra_text="", out_dir="renders", res=1024): if primary_view in [i[0] for i in cardinal_views]: if primary_view == "view_from_front": R = rot_y(-90) + apply_rotations(mesh, R) elif primary_view == "view_from_left": R = rot_x(-90) apply_rotations(mesh, R) @@ -1231,6 +1232,7 @@ def process_mesh(file, name=None, extra_text="", out_dir="renders", res=1024): if primary_view in [i[0] for i in cardinal_views]: if primary_view == "view_from_front": R = rot_y(90) + apply_rotations(mesh, R) elif primary_view == "view_from_left": R = rot_x(90) apply_rotations(mesh, R) From 9b8409dfc2d9297f7502d0f2ee338519fffc0635 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Wed, 20 May 2026 16:29:02 +0800 Subject: [PATCH 06/17] change decimate_ratio to keep robust --- embodichain/toolkits/simready_pipeline/configs/gen_config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/embodichain/toolkits/simready_pipeline/configs/gen_config.json b/embodichain/toolkits/simready_pipeline/configs/gen_config.json index 47a060a3..c0a50ac3 100644 --- a/embodichain/toolkits/simready_pipeline/configs/gen_config.json +++ b/embodichain/toolkits/simready_pipeline/configs/gen_config.json @@ -7,7 +7,7 @@ "blender_texture_name": "surface_texture.png", "blender_remesh_bake": { "voxel_size": 0.01, - "decimate_ratio": 0.5 + "decimate_ratio": 0.9 } }, "geometry_cleanup": { From 0514d2f8d053cf3b765aed277b9586bba20f8899 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Wed, 20 May 2026 16:34:51 +0800 Subject: [PATCH 07/17] run black command --- embodichain/toolkits/simready_pipeline/pipeline/ingest.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py index 90f71100..ea80fc1d 100644 --- a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py +++ b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py @@ -54,9 +54,7 @@ def _load_ingest_config() -> dict: png_name: str = INGEST_CONFIG.get("blender_texture_name", "surface_texture.png") BLENDER_REMESH_BAKE_CONFIG = INGEST_CONFIG.get("blender_remesh_bake", {}) voxel_size: float = float(BLENDER_REMESH_BAKE_CONFIG.get("voxel_size", 0.01)) -decimate_ratio: float = float( - BLENDER_REMESH_BAKE_CONFIG.get("decimate_ratio", 0.5) -) +decimate_ratio: float = float(BLENDER_REMESH_BAKE_CONFIG.get("decimate_ratio", 0.5)) def ingest_one_asset( From 11630e28784e0c99e8cf19439c09847915996773 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Thu, 21 May 2026 11:31:34 +0800 Subject: [PATCH 08/17] updata sim_ready config/create intro/update install config --- docs/source/features/simready_pipeline.md | 200 ++++++++++++++++++ docs/source/index.rst | 1 + docs/source/quick_start/install.md | 8 +- .../simready_pipeline/configs/gen_config.json | 66 ++++-- .../simready_pipeline/parser/geometry.py | 46 ++-- .../simready_pipeline/parser/physics.py | 11 + .../simready_pipeline/pipeline/ingest.py | 24 ++- .../simready_pipeline/utils/geometry_utils.py | 19 +- .../simready_pipeline/utils/ingest_utils.py | 113 +++++++--- pyproject.toml | 11 +- 10 files changed, 432 insertions(+), 67 deletions(-) create mode 100644 docs/source/features/simready_pipeline.md diff --git a/docs/source/features/simready_pipeline.md b/docs/source/features/simready_pipeline.md new file mode 100644 index 00000000..240396cd --- /dev/null +++ b/docs/source/features/simready_pipeline.md @@ -0,0 +1,200 @@ +# SimReady Asset Pipeline + +The SimReady asset pipeline converts raw mesh archives into normalized simulation assets. It ingests a source mesh, preserves or bakes visual materials, cleans mesh topology, estimates real-world scale and semantics with multimodal LLMs, and exports assets that can be loaded directly in EmbodiChain simulations. + +## Quick Start + +Run the pipeline on a single asset directory: + +```bash +python -m embodichain.toolkits.simready_pipeline.cli.start \ + --input_dir /path/to/raw_mesh_folder \ + --output_root YourOutputDir \ + --category YourCategory +``` + +Preview the generated SimReady mesh: + +```bash +python -m embodichain preview-asset \ + --asset_path /path/to/sim_ready_asset_or_usd_asset \ + --asset_type rigid +``` + +## Prerequisites + +The full pipeline uses Blender, trimesh, pyrender, and an Azure OpenAI-compatible endpoint. Install EmbodiChain with the Blender package index enabled as described in the installation guide. + +Set the LLM credentials before running the pipeline, or configure them in `embodichain/toolkits/simready_pipeline/configs/gen_config.json`: + +```bash +export AZURE_OPENAI_API_KEY="your-api-key" +export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/" +``` + +## Processing Flow + +The command above runs the full parser sequence: + +- **Ingest**: finds the first parseable mesh (`.glb`, `.gltf`, `.obj`, `.ply`, `.stl`), archives the raw input, and writes a canonical `asset_source/asset.obj`. +- **Visual processing**: by default, Blender remeshes the source mesh, unwraps UVs, and bakes diffuse and normal textures. With `--simple`, ingest uses trimesh only and skips Blender remesh/bake. +- **Inspection**: detects whether the normalized source is a mesh, articulation, or scene. +- **Geometry processing**: cleans topology and applies Blender decimation to the canonical mesh. +- **SimReady finalization**: renders multi-view images, uses the LLM to infer object orientation, physical dimensions, and semantics, then exports `asset_simready/asset_simready.obj`. +- **Physics and USD export**: infers physics properties and writes a USD package when possible. +- **Internal preview assets**: generates thumbnails and internal metadata for asset browsing. + +## Output Layout + +Each processed asset is written under a generated asset ID: + +```text +simready_car/ +`-- / + |-- asset_archive/ # Raw source directory copy + |-- asset_source/ # Canonical normalized source mesh and textures + | |-- asset.obj + | |-- asset.mtl + | |-- diffuse.png + | `-- normal.png + |-- asset_simready/ # Final oriented and scaled mesh + | `-- asset_simready.obj + |-- asset_usd/ # USD export + `-- asset.json # Metadata, geometry, semantics, physics, and paths +``` + +Use `asset_simready/asset_simready.obj` or `asset_usd/` for simulation preview and downstream scene construction. + +## Command-Line Arguments + +| Argument | Description | Default | +| :--- | :--- | :--- | +| `--input_dir` | Directory containing the raw asset files. | **required** | +| `--output_root` | Directory where processed assets are written. | **required** | +| `--category` | Category hint passed into the pipeline, such as `car`, `bowl`, or `chair`. | **required** | +| `--simple` | Use trimesh-only ingest and skip Blender remesh/bake during ingest. Geometry cleanup later in the pipeline still uses Blender. | `False` | + +## Configuration + +Pipeline hyperparameters live in `embodichain/toolkits/simready_pipeline/configs/gen_config.json`. + +### Ingest + +```json +"ingest": { + "canonical_asset_name": "asset.obj", + "unprocessed_formats": [".urdf", ".usd"], + "parseable_mesh_formats": [".glb", ".gltf", ".obj", ".ply", ".stl"] +} +``` + +This section controls source file discovery and the canonical output mesh name. + +### Mesh Processing + +```json +"mesh_processing": { + "trimesh_ingest": { + "scene_mesh_strategy": "first", + "mtl_name": "asset.mtl", + "visual": { + "default_face_color": [128, 128, 128, 255], + "pbr_base_color_only": true + }, + "export": { + "include_normals": true, + "include_color": true, + "include_texture": true, + "write_texture": false + } + }, + "blender_remesh_bake": { + "remesh": { + "voxel_size": 0.01, + "min_voxel_size_ratio": 0.005, + "use_smooth_shade": true + }, + "decimate": { + "ratio": 0.9 + }, + "uv": { + "angle_limit": 66.0, + "island_margin": 0.02 + }, + "bake": { + "texture_size": 2048, + "diffuse_texture_name": "diffuse.png", + "normal_texture_name": "normal.png", + "cage_extrusion_ratio": 0.05 + } + }, + "blender_cleanup_decimate": { + "enabled": true, + "cleanup": { + "merge_dist": 0.00001, + "remove_non_manifold": true, + "triangulate": false + }, + "simplify": { + "ratio": 0.5, + "weld_distance": 0.0001, + "collapse_triangulate": true + } + }, + "simready_finalize": { + "render_resolution": 1024 + } +} +``` + +`trimesh_ingest` controls the lightweight ingest path. It does not perform mesh decimation; it normalizes visual materials and exports OBJ/MTL files. + +`blender_remesh_bake` controls the default ingest path when `--simple` is not provided. It remeshes the raw mesh, decimates it, unwraps UVs, and bakes textures. + +`blender_cleanup_decimate` controls the later geometry parser stage. It uses Blender mesh operators and the Blender Decimate modifier to clean and simplify the canonical mesh. + +`simready_finalize` controls rendering used by the LLM-driven orientation and scale estimation stage. + +### LLM + +```json +"llm": { + "azure_openai": { + "api_key": "", + "model": "gpt-4o", + "base_url": "", + "api_version": "2024-02-15-preview" + } +} +``` + +This section configures the multimodal LLM used for object classification, orientation selection, dimension inference, semantic annotation, and physics inference. + +## Default vs Simple Ingest + +The default command uses Blender during ingest: + +```bash +python -m embodichain.toolkits.simready_pipeline.cli.start \ + --input_dir /path/to/raw_mesh_folder \ + --output_root YourOutputDir \ + --category YourCategory +``` + +Use `--simple` when you want faster trimesh-only ingest: + +```bash +python -m embodichain.toolkits.simready_pipeline.cli.start \ + --input_dir /path/to/raw_mesh_folder \ + --output_root YourOutputDir \ + --category YourCategory + --simple +``` + +The simple mode only affects the ingest step. The downstream geometry parser still uses Blender cleanup and decimation unless `mesh_processing.blender_cleanup_decimate.enabled` is set to `false`. + +## See Also + +- [Asset Preview](interaction/preview_asset.md): Load generated meshes and USD assets in the simulator. +- [Installation](../quick_start/install.md): Install EmbodiChain with Blender and rendering dependencies. +- [Toolkits](toolkits/index.rst): Other asset preparation utilities. diff --git a/docs/source/index.rst b/docs/source/index.rst index bba85a90..401c4cd6 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -41,6 +41,7 @@ Table of Contents features/online_data.md features/agents.md + features/simready_pipeline.md features/workspace_analyzer/index* features/interaction/index* features/toolkits/index* diff --git a/docs/source/quick_start/install.md b/docs/source/quick_start/install.md index 49aed084..8b2af437 100644 --- a/docs/source/quick_start/install.md +++ b/docs/source/quick_start/install.md @@ -46,7 +46,7 @@ curl -LsSf https://astral.sh/uv/install.sh | sh **Install from PyPI:** ```bash -uv pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site +uv pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site --extra-index-url https://download.blender.org/pypi/ ``` **Install from source (editable mode):** @@ -54,7 +54,7 @@ uv pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple ```bash git clone https://github.com/DexForce/EmbodiChain.git cd EmbodiChain -uv pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site +uv pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site --extra-index-url https://download.blender.org/pypi/ ``` ### pip (PyPI) @@ -63,7 +63,7 @@ uv pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --tru > We strongly recommend using a virtual environment to avoid dependency conflicts. ```bash -pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site +pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site --extra-index-url https://download.blender.org/pypi/ ``` ### From Source @@ -74,7 +74,7 @@ pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ - ```bash git clone https://github.com/DexForce/EmbodiChain.git cd EmbodiChain -pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site +pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site --extra-index-url https://download.blender.org/pypi/ ``` ## Verify Installation diff --git a/embodichain/toolkits/simready_pipeline/configs/gen_config.json b/embodichain/toolkits/simready_pipeline/configs/gen_config.json index c0a50ac3..3e14a50d 100644 --- a/embodichain/toolkits/simready_pipeline/configs/gen_config.json +++ b/embodichain/toolkits/simready_pipeline/configs/gen_config.json @@ -2,21 +2,63 @@ "ingest": { "canonical_asset_name": "asset.obj", "unprocessed_formats": [".urdf", ".usd"], - "parseable_mesh_formats": [".glb", ".gltf", ".obj", ".ply", ".stl"], - "blender_texture_size": 2048, - "blender_texture_name": "surface_texture.png", + "parseable_mesh_formats": [".glb", ".gltf", ".obj", ".ply", ".stl"] + }, + "mesh_processing": { + "trimesh_ingest": { + "scene_mesh_strategy": "first", + "mtl_name": "asset.mtl", + "visual": { + "default_face_color": [128, 128, 128, 255], + "pbr_base_color_only": true + }, + "export": { + "include_normals": true, + "include_color": true, + "include_texture": true, + "write_texture": false + } + }, "blender_remesh_bake": { - "voxel_size": 0.01, - "decimate_ratio": 0.9 + "remesh": { + "voxel_size": 0.01, + "min_voxel_size_ratio": 0.005, + "use_smooth_shade": true + }, + "decimate": { + "ratio": 0.9 + }, + "uv": { + "angle_limit": 66.0, + "island_margin": 0.02 + }, + "bake": { + "texture_size": 2048, + "diffuse_texture_name": "diffuse.png", + "normal_texture_name": "normal.png", + "cage_extrusion_ratio": 0.05 + }, + "material": { + "name": "BakeMat" + } + }, + "blender_cleanup_decimate": { + "enabled": true, + "cleanup": { + "merge_dist": 0.00001, + "remove_non_manifold": true, + "triangulate": false + }, + "simplify": { + "ratio": 0.5, + "weld_distance": 0.0001, + "collapse_triangulate": true + } + }, + "simready_finalize": { + "render_resolution": 1024 } }, - "geometry_cleanup": { - "ratio": 0.5, - "weld_distance": 0.0001, - "merge_dist": 0.00001, - "remove_non_manifold": true, - "triangulate": false - }, "llm": { "azure_openai": { "api_key": "", diff --git a/embodichain/toolkits/simready_pipeline/parser/geometry.py b/embodichain/toolkits/simready_pipeline/parser/geometry.py index ab0d9196..3b75482f 100644 --- a/embodichain/toolkits/simready_pipeline/parser/geometry.py +++ b/embodichain/toolkits/simready_pipeline/parser/geometry.py @@ -14,6 +14,8 @@ # limitations under the License. # ---------------------------------------------------------------------------- +from __future__ import annotations + import json from pathlib import Path from typing import Any @@ -28,7 +30,10 @@ def _load_geometry_cleanup_config() -> dict: config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json" with config_path.open("r", encoding="utf-8") as f: - return json.load(f).get("geometry_cleanup", {}) + cfg = json.load(f) + return cfg.get("mesh_processing", {}).get( + "blender_cleanup_decimate", cfg.get("geometry_cleanup", {}) + ) GEOMETRY_CLEANUP_CONFIG = _load_geometry_cleanup_config() @@ -97,17 +102,34 @@ def parse(self, asset: Asset, asset_root: Path) -> None: return mesh_path = asset_root / asset.asset_data.get("path") - process_obj( - input_path=str(mesh_path), - output_path=str(mesh_path), - ratio=GEOMETRY_CLEANUP_CONFIG.get("ratio", 0.5), - weld_distance=GEOMETRY_CLEANUP_CONFIG.get("weld_distance", 0.0001), - merge_dist=GEOMETRY_CLEANUP_CONFIG.get("merge_dist", 1e-5), - remove_non_manifold=GEOMETRY_CLEANUP_CONFIG.get( - "remove_non_manifold", True - ), - triangulate=GEOMETRY_CLEANUP_CONFIG.get("triangulate", False), - ) + if GEOMETRY_CLEANUP_CONFIG.get("enabled", True): + cleanup_config = GEOMETRY_CLEANUP_CONFIG.get("cleanup", {}) + simplify_config = GEOMETRY_CLEANUP_CONFIG.get("simplify", {}) + process_obj( + input_path=str(mesh_path), + output_path=str(mesh_path), + ratio=simplify_config.get( + "ratio", GEOMETRY_CLEANUP_CONFIG.get("ratio", 0.5) + ), + weld_distance=simplify_config.get( + "weld_distance", + GEOMETRY_CLEANUP_CONFIG.get("weld_distance", 0.0001), + ), + merge_dist=cleanup_config.get( + "merge_dist", GEOMETRY_CLEANUP_CONFIG.get("merge_dist", 1e-5) + ), + remove_non_manifold=cleanup_config.get( + "remove_non_manifold", + GEOMETRY_CLEANUP_CONFIG.get("remove_non_manifold", True), + ), + triangulate=cleanup_config.get( + "triangulate", + GEOMETRY_CLEANUP_CONFIG.get("triangulate", False), + ), + collapse_triangulate=simplify_config.get( + "collapse_triangulate", True + ), + ) try: diff --git a/embodichain/toolkits/simready_pipeline/parser/physics.py b/embodichain/toolkits/simready_pipeline/parser/physics.py index 55c37da3..d9e3d043 100644 --- a/embodichain/toolkits/simready_pipeline/parser/physics.py +++ b/embodichain/toolkits/simready_pipeline/parser/physics.py @@ -87,6 +87,16 @@ RIGID_KEYS = list(DEFAULT_RIGID_PHYSICS.keys()) SOFT_KEYS = list(DEFAULT_SOFTBODY_PHYSICS.keys()) + +def _load_simready_finalize_config() -> dict: + config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json" + with config_path.open("r", encoding="utf-8") as f: + cfg = json.load(f) + return cfg.get("mesh_processing", {}).get("simready_finalize", {}) + + +SIMREADY_FINALIZE_CONFIG = _load_simready_finalize_config() + PHYSICS_SYSTEM_PROMPT = """You are a physics annotation model for robot training and simulation-ready asset ingestion. This task is safety-critical: a wrong physical annotation can cause severe hardware damage, unsafe robot behavior, broken simulation, and large downstream losses. @@ -250,6 +260,7 @@ def _simready_process(self, asset: Asset, asset_root: Path) -> None: "asset", extra_text=str(asset.ingest_info["extra_info"].get("simready_info", "")), out_dir=out_path, + res=int(SIMREADY_FINALIZE_CONFIG.get("render_resolution", 1024)), ) print(result) semantics_generated = {} diff --git a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py index ea80fc1d..55fa0a20 100644 --- a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py +++ b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py @@ -14,6 +14,8 @@ # limitations under the License. # ---------------------------------------------------------------------------- +from __future__ import annotations + from pathlib import Path import json import os @@ -38,10 +40,12 @@ def _load_ingest_config() -> dict: config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json" with config_path.open("r", encoding="utf-8") as f: - return json.load(f).get("ingest", {}) + return json.load(f) -INGEST_CONFIG = _load_ingest_config() +GEN_CONFIG = _load_ingest_config() +INGEST_CONFIG = GEN_CONFIG.get("ingest", {}) +MESH_PROCESSING_CONFIG = GEN_CONFIG.get("mesh_processing", {}) CANOCAIL_ASSET_NAME = INGEST_CONFIG.get("canonical_asset_name", "asset.obj") UNPROCESSED_FORMATS = INGEST_CONFIG.get( "unprocessed_formats", [".urdf", ".usd"] @@ -50,11 +54,10 @@ def _load_ingest_config() -> dict: "parseable_mesh_formats", [".glb", ".gltf", ".obj", ".ply", ".stl"] ) # 主流的需要处理的格式 -tex_size: int = int(INGEST_CONFIG.get("blender_texture_size", 2048)) -png_name: str = INGEST_CONFIG.get("blender_texture_name", "surface_texture.png") -BLENDER_REMESH_BAKE_CONFIG = INGEST_CONFIG.get("blender_remesh_bake", {}) -voxel_size: float = float(BLENDER_REMESH_BAKE_CONFIG.get("voxel_size", 0.01)) -decimate_ratio: float = float(BLENDER_REMESH_BAKE_CONFIG.get("decimate_ratio", 0.5)) +TRIMESH_INGEST_CONFIG = MESH_PROCESSING_CONFIG.get("trimesh_ingest", {}) +BLENDER_REMESH_BAKE_CONFIG = MESH_PROCESSING_CONFIG.get( + "blender_remesh_bake", INGEST_CONFIG.get("blender_remesh_bake", {}) +) def ingest_one_asset( @@ -112,16 +115,15 @@ def find_first_mesh_file(files, formats): asset_source, obj_name=CANOCAIL_ASSET_NAME, mtl_name=Path(CANOCAIL_ASSET_NAME).with_suffix(".mtl").name, + config=TRIMESH_INGEST_CONFIG, ) else: visual_info = blender_parser_ingest( source_file, asset_source, - texture_size=tex_size, - png_name=png_name, - voxel_size=voxel_size, - decimate_ratio=decimate_ratio, obj_name=CANOCAIL_ASSET_NAME, + config=BLENDER_REMESH_BAKE_CONFIG, + trimesh_config=TRIMESH_INGEST_CONFIG, ) asset = Asset( diff --git a/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py b/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py index 6b4cfeee..4fbf7c0b 100644 --- a/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py +++ b/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py @@ -14,6 +14,8 @@ # limitations under the License. # ---------------------------------------------------------------------------- +from __future__ import annotations + import bpy from pathlib import Path @@ -54,7 +56,12 @@ def join_meshes(objs): return bpy.context.active_object -def decimate_optimized(obj, ratio: float = 0.5, weld_distance: float = 0.0001): +def decimate_optimized( + obj, + ratio: float = 0.5, + weld_distance: float = 0.0001, + collapse_triangulate: bool = True, +): bpy.context.view_layer.objects.active = obj @@ -84,7 +91,7 @@ def decimate_optimized(obj, ratio: float = 0.5, weld_distance: float = 0.0001): print(f"Simplifying mesh (Ratio: {ratio})...") decimate_mod = obj.modifiers.new(name="Decimate", type="DECIMATE") decimate_mod.ratio = ratio - decimate_mod.use_collapse_triangulate = True + decimate_mod.use_collapse_triangulate = collapse_triangulate bpy.ops.object.modifier_apply(modifier=decimate_mod.name) # 4) post clean @@ -169,6 +176,7 @@ def process_obj( merge_dist=1e-5, remove_non_manifold=True, triangulate=False, + collapse_triangulate=True, ): clear_scene() objs = load_obj(input_path) @@ -186,7 +194,12 @@ def process_obj( remove_non_manifold=remove_non_manifold, triangulate=triangulate, ) - decimate_optimized(obj, ratio=ratio, weld_distance=weld_distance) + decimate_optimized( + obj, + ratio=ratio, + weld_distance=weld_distance, + collapse_triangulate=collapse_triangulate, + ) export_obj(obj, output_path) print("Clean mesh saved to:", output_path) diff --git a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py b/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py index 98a5f48a..873cb898 100644 --- a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py +++ b/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py @@ -14,6 +14,8 @@ # limitations under the License. # ---------------------------------------------------------------------------- +from __future__ import annotations + import uuid import trimesh import json @@ -117,6 +119,7 @@ def inject_user_extra_info(asset_source: Path, asset: Asset) -> None: def load_one_trimesh( path: str, + scene_mesh_strategy: str = "first", ) -> Union[ trimesh.Trimesh, None ]: # 可能是个scene,但是我们只处理scene中的第一个geometry,如果有多个mesh,复合起来需要下一个版本 @@ -126,6 +129,9 @@ def load_one_trimesh( if len(mesh_or_scene.geometry) == 0: print(f"No geometry found in Scene: {path}") return None + if scene_mesh_strategy == "concatenate": + meshes = list(mesh_or_scene.geometry.values()) + return trimesh.util.concatenate(meshes) first_mesh = list(mesh_or_scene.geometry.values())[0] return first_mesh if isinstance(mesh_or_scene, trimesh.Trimesh): @@ -144,8 +150,15 @@ def trimesh_parse_ingest( obj_name: str = "asset.obj", mtl_name: str = "asset.mtl", write_files: bool = True, + config: Dict[str, Any] | None = None, ): - mesh = load_one_trimesh(source_file) + config = config or {} + visual_config = config.get("visual", {}) + export_config = config.get("export", {}) + scene_mesh_strategy = config.get("scene_mesh_strategy", "first") + mtl_name = config.get("mtl_name", mtl_name) + + mesh = load_one_trimesh(source_file, scene_mesh_strategy=scene_mesh_strategy) if mesh is None: return None @@ -172,7 +185,10 @@ def trimesh_parse_ingest( print("[INFO] No visual → assign default gray") mesh.visual = trimesh.visual.ColorVisuals( - mesh, face_colors=[128, 128, 128, 255] + mesh, + face_colors=visual_config.get( + "default_face_color", [128, 128, 128, 255] + ), ) visual_ingest = "no visual" @@ -192,7 +208,9 @@ def trimesh_parse_ingest( else: # ---------- PBR ---------- - if material_kind == "pbr": + if material_kind == "pbr" and visual_config.get( + "pbr_base_color_only", True + ): print("[WARN] PBR → only baseColorTexture will be used") base_tex = textures.get("baseColorTexture", {}) @@ -220,11 +238,11 @@ def trimesh_parse_ingest( if write_files: obj_str, tex_dict = trimesh.exchange.obj.export_obj( mesh, - include_normals=True, - include_color=True, - include_texture=True, + include_normals=export_config.get("include_normals", True), + include_color=export_config.get("include_color", True), + include_texture=export_config.get("include_texture", True), return_texture=True, - write_texture=False, + write_texture=export_config.get("write_texture", False), mtl_name=mtl_name, ) @@ -275,13 +293,48 @@ def modify_mtl_file(mtl_path: Path, diffuse_name: str, normal_name: str) -> None def blender_remesh_bake( source_file: Path, asset_source: Path, - texture_size: int = 2048, - png_name: str = "surface_texture.png", - voxel_size: float = 0.01, - decimate_ratio: float = 0.5, + texture_size: int | None = None, + png_name: str | None = None, + voxel_size: float | None = None, + decimate_ratio: float | None = None, obj_name: str = "asset.obj", + config: Dict[str, Any] | None = None, ): """Remesh a high-poly mesh into a low-poly one and bake textures via Blender.""" + config = config or {} + remesh_config = config.get("remesh", {}) + decimate_config = config.get("decimate", {}) + uv_config = config.get("uv", {}) + bake_config = config.get("bake", {}) + material_config = config.get("material", {}) + + texture_size = int( + texture_size + or bake_config.get("texture_size", config.get("texture_size", 2048)) + ) + diffuse_texture_name = png_name or bake_config.get( + "diffuse_texture_name", config.get("texture_name", "diffuse.png") + ) + normal_texture_name = bake_config.get( + "normal_texture_name", config.get("normal_texture_name", "normal.png") + ) + voxel_size = float( + voxel_size + if voxel_size is not None + else remesh_config.get("voxel_size", config.get("voxel_size", 0.01)) + ) + min_voxel_size_ratio = float(remesh_config.get("min_voxel_size_ratio", 0.005)) + use_smooth_shade = bool(remesh_config.get("use_smooth_shade", True)) + decimate_ratio = float( + decimate_ratio + if decimate_ratio is not None + else decimate_config.get("ratio", config.get("decimate_ratio", 0.5)) + ) + angle_limit = float(uv_config.get("angle_limit", 66.0)) + island_margin = float(uv_config.get("island_margin", 0.02)) + cage_extrusion_ratio = float(bake_config.get("cage_extrusion_ratio", 0.05)) + material_name = material_config.get("name", "BakeMat") + asset_source = Path(asset_source) asset_source.mkdir(parents=True, exist_ok=True) source_file = Path(source_file) @@ -319,7 +372,7 @@ def blender_remesh_bake( raise RuntimeError("No active mesh object after import") high_poly.name = "High_Poly" - auto_extrusion = max(high_poly.dimensions) * 0.05 + auto_extrusion = max(high_poly.dimensions) * cage_extrusion_ratio bpy.ops.object.select_all(action="DESELECT") high_poly.select_set(True) @@ -336,8 +389,10 @@ def blender_remesh_bake( rem = low_poly.modifiers.new(name="Remesh", type="REMESH") rem.mode = "VOXEL" - rem.voxel_size = max(float(voxel_size), max(high_poly.dimensions) * 0.005) - rem.use_smooth_shade = True + rem.voxel_size = max( + float(voxel_size), max(high_poly.dimensions) * min_voxel_size_ratio + ) + rem.use_smooth_shade = use_smooth_shade bpy.ops.object.modifier_apply(modifier="Remesh") dec = low_poly.modifiers.new(name="Decimate", type="DECIMATE") @@ -347,10 +402,10 @@ def blender_remesh_bake( bpy.context.view_layer.objects.active = low_poly bpy.ops.object.mode_set(mode="EDIT") bpy.ops.mesh.select_all(action="SELECT") - bpy.ops.uv.smart_project(angle_limit=66.0, island_margin=0.02) + bpy.ops.uv.smart_project(angle_limit=angle_limit, island_margin=island_margin) bpy.ops.object.mode_set(mode="OBJECT") - mat = bpy.data.materials.new(name="BakeMat") + mat = bpy.data.materials.new(name=material_name) mat.use_nodes = True low_poly.data.materials.append(mat) nodes = mat.node_tree.nodes @@ -366,8 +421,8 @@ def setup_node(name: str, is_color: bool): img.colorspace_settings.name = "Non-Color" return node, img - diff_node, diff_img = setup_node("diffuse.png", True) - norm_node, norm_img = setup_node("normal.png", False) + diff_node, diff_img = setup_node(diffuse_texture_name, True) + norm_node, norm_img = setup_node(normal_texture_name, False) scene = bpy.context.scene scene.render.engine = "CYCLES" @@ -381,12 +436,12 @@ def setup_node(name: str, is_color: bool): nodes.active = diff_node bpy.ops.object.bake(type="DIFFUSE", pass_filter={"COLOR"}) - diff_img.filepath_raw = str(asset_source / "diffuse.png") + diff_img.filepath_raw = str(asset_source / diffuse_texture_name) diff_img.save() nodes.active = norm_node bpy.ops.object.bake(type="NORMAL") - norm_img.filepath_raw = str(asset_source / "normal.png") + norm_img.filepath_raw = str(asset_source / normal_texture_name) norm_img.save() export_path = asset_source / obj_name @@ -395,16 +450,21 @@ def setup_node(name: str, is_color: bool): bpy.ops.wm.obj_export(filepath=str(export_path), export_selected_objects=True) mtl_path = asset_source / Path(obj_name).with_suffix(".mtl").name - modify_mtl_file(mtl_path, "diffuse.png", "normal.png") + modify_mtl_file(mtl_path, diffuse_texture_name, normal_texture_name) return { - "png": str(asset_source / "diffuse.png"), + "png": str(asset_source / diffuse_texture_name), "obj": str(export_path), "mtl": str(mtl_path.name), } -def blender_parse_ingest(source_file: Path, asset_source: Path, **kwargs): +def blender_parse_ingest( + source_file: Path, + asset_source: Path, + trimesh_config: Dict[str, Any] | None = None, + **kwargs, +): res = blender_remesh_bake( source_file=source_file, asset_source=asset_source, @@ -412,7 +472,12 @@ def blender_parse_ingest(source_file: Path, asset_source: Path, **kwargs): ) try: asset_obj = Path(res["obj"]) - vis = trimesh_parse_ingest(asset_obj, asset_source, write_files=False) + vis = trimesh_parse_ingest( + asset_obj, + asset_source, + write_files=False, + config=trimesh_config, + ) if isinstance(vis, dict): res.update(vis) except Exception: diff --git a/pyproject.toml b/pyproject.toml index 68974e6d..057274b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,11 +48,20 @@ dependencies = [ "h5py", "tensordict", "viser==1.0.21", - "lerobot>=0.4.4" + "lerobot>=0.4.4", + "bpy", + "pyrender==0.1.45" ] [project.optional-dependencies] +[tool.uv.sources] +bpy = { index = "blender" } + +[[tool.uv.index]] +name = "blender" +url = "https://download.blender.org/pypi/" + [tool.setuptools.dynamic] version = { file = ["VERSION"] } From 856d071e9239ef522e07581ac795ceb8028df1c7 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Thu, 21 May 2026 11:48:51 +0800 Subject: [PATCH 09/17] add sim_ready pipeline --- .../toolkits/simready_pipeline/test_config.py | 116 +++++++++++++ .../simready_pipeline/test_trimesh_ingest.py | 158 ++++++++++++++++++ 2 files changed, 274 insertions(+) create mode 100644 tests/toolkits/simready_pipeline/test_config.py create mode 100644 tests/toolkits/simready_pipeline/test_trimesh_ingest.py diff --git a/tests/toolkits/simready_pipeline/test_config.py b/tests/toolkits/simready_pipeline/test_config.py new file mode 100644 index 00000000..426d9124 --- /dev/null +++ b/tests/toolkits/simready_pipeline/test_config.py @@ -0,0 +1,116 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[3] +CONFIG_PATH = ( + REPO_ROOT + / "embodichain" + / "toolkits" + / "simready_pipeline" + / "configs" + / "gen_config.json" +) +ALLOWED_SCENE_MESH_STRATEGIES = {"first", "concatenate"} + + +@pytest.fixture(scope="module") +def gen_config() -> dict[str, Any]: + with CONFIG_PATH.open("r", encoding="utf-8") as f: + return json.load(f) + + +def test_gen_config_uses_mesh_processing_schema(gen_config: dict[str, Any]) -> None: + assert "ingest" in gen_config + assert "mesh_processing" in gen_config + assert "llm" in gen_config + + +def test_mesh_processing_declares_expected_stages( + gen_config: dict[str, Any], +) -> None: + mesh_processing = gen_config["mesh_processing"] + + assert "trimesh_ingest" in mesh_processing + assert "blender_remesh_bake" in mesh_processing + assert "blender_cleanup_decimate" in mesh_processing + assert "simready_finalize" in mesh_processing + + +def test_ingest_config_declares_canonical_mesh_formats( + gen_config: dict[str, Any], +) -> None: + ingest_config = gen_config["ingest"] + parseable_mesh_formats = ingest_config["parseable_mesh_formats"] + + assert ingest_config["canonical_asset_name"].endswith(".obj") + assert isinstance(parseable_mesh_formats, list) + assert parseable_mesh_formats + assert all(fmt.startswith(".") for fmt in parseable_mesh_formats) + + +def test_trimesh_ingest_config_values_are_valid( + gen_config: dict[str, Any], +) -> None: + trimesh_config = gen_config["mesh_processing"]["trimesh_ingest"] + export_config = trimesh_config["export"] + + assert trimesh_config["scene_mesh_strategy"] in ALLOWED_SCENE_MESH_STRATEGIES + assert trimesh_config["mtl_name"].endswith(".mtl") + assert isinstance(trimesh_config["visual"]["default_face_color"], list) + assert isinstance(trimesh_config["visual"]["pbr_base_color_only"], bool) + assert isinstance(export_config["include_normals"], bool) + assert isinstance(export_config["include_color"], bool) + assert isinstance(export_config["include_texture"], bool) + assert isinstance(export_config["write_texture"], bool) + + +def test_blender_mesh_processing_values_are_valid( + gen_config: dict[str, Any], +) -> None: + mesh_processing = gen_config["mesh_processing"] + remesh_bake = mesh_processing["blender_remesh_bake"] + cleanup_decimate = mesh_processing["blender_cleanup_decimate"] + + assert remesh_bake["remesh"]["voxel_size"] > 0.0 + assert remesh_bake["remesh"]["min_voxel_size_ratio"] > 0.0 + assert 0.0 < remesh_bake["decimate"]["ratio"] <= 1.0 + assert remesh_bake["bake"]["texture_size"] > 0 + assert isinstance(cleanup_decimate["enabled"], bool) + assert cleanup_decimate["cleanup"]["merge_dist"] > 0.0 + assert isinstance(cleanup_decimate["cleanup"]["remove_non_manifold"], bool) + assert isinstance(cleanup_decimate["cleanup"]["triangulate"], bool) + assert 0.0 < cleanup_decimate["simplify"]["ratio"] <= 1.0 + assert cleanup_decimate["simplify"]["weld_distance"] > 0.0 + assert isinstance(cleanup_decimate["simplify"]["collapse_triangulate"], bool) + + +def test_simready_finalize_config_values_are_valid( + gen_config: dict[str, Any], +) -> None: + render_resolution = gen_config["mesh_processing"]["simready_finalize"][ + "render_resolution" + ] + + assert isinstance(render_resolution, int) + assert render_resolution > 0 diff --git a/tests/toolkits/simready_pipeline/test_trimesh_ingest.py b/tests/toolkits/simready_pipeline/test_trimesh_ingest.py new file mode 100644 index 00000000..0677f4f3 --- /dev/null +++ b/tests/toolkits/simready_pipeline/test_trimesh_ingest.py @@ -0,0 +1,158 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import importlib +import importlib.util +import sys +import types +from pathlib import Path +from typing import Any + +import pytest + +trimesh = pytest.importorskip("trimesh") + +BOX_VERTEX_COUNT = 8 +CONCATENATED_BOX_VERTEX_COUNT = BOX_VERTEX_COUNT * 2 +DEFAULT_VISUAL_RESULT: dict[str, Any] = { + "visual_category": "None", + "material_kind": None, + "material": {"textures": {}}, + "uv_present": False, + "texture_count_total": 0, +} + + +def _import_ingest_utils(): + if importlib.util.find_spec("bpy") is None: + sys.modules.setdefault("bpy", types.SimpleNamespace()) + return importlib.import_module( + "embodichain.toolkits.simready_pipeline.utils.ingest_utils" + ) + + +def _write_box_obj(path: Path) -> None: + mesh = trimesh.creation.box(extents=(1.0, 1.0, 1.0)) + mesh.export(path) + + +def test_load_one_trimesh_uses_first_scene_geometry(monkeypatch) -> None: + ingest_utils = _import_ingest_utils() + first_box = trimesh.creation.box(extents=(1.0, 1.0, 1.0)) + second_box = trimesh.creation.box(extents=(1.0, 1.0, 1.0)) + scene = trimesh.Scene({"first": first_box, "second": second_box}) + monkeypatch.setattr(ingest_utils.trimesh, "load_mesh", lambda _: scene) + + mesh = ingest_utils.load_one_trimesh("unused.obj", scene_mesh_strategy="first") + + assert len(mesh.vertices) == BOX_VERTEX_COUNT + + +def test_load_one_trimesh_concatenates_scene_geometry(monkeypatch) -> None: + ingest_utils = _import_ingest_utils() + first_box = trimesh.creation.box(extents=(1.0, 1.0, 1.0)) + second_box = trimesh.creation.box(extents=(1.0, 1.0, 1.0)) + scene = trimesh.Scene({"first": first_box, "second": second_box}) + monkeypatch.setattr(ingest_utils.trimesh, "load_mesh", lambda _: scene) + + mesh = ingest_utils.load_one_trimesh( + "unused.obj", scene_mesh_strategy="concatenate" + ) + + assert len(mesh.vertices) == CONCATENATED_BOX_VERTEX_COUNT + + +def test_trimesh_parse_ingest_writes_canonical_obj( + tmp_path: Path, + monkeypatch, +) -> None: + ingest_utils = _import_ingest_utils() + source_file = tmp_path / "source.obj" + asset_source = tmp_path / "asset_source" + _write_box_obj(source_file) + monkeypatch.setattr( + ingest_utils, + "classify_visual", + lambda _: DEFAULT_VISUAL_RESULT, + ) + + result = ingest_utils.trimesh_parse_ingest( + source_file=source_file, + asset_source=asset_source, + obj_name="asset.obj", + config={ + "visual": {"default_face_color": [128, 128, 128, 255]}, + "export": { + "include_normals": True, + "include_color": True, + "include_texture": True, + "write_texture": False, + }, + }, + ) + + assert (asset_source / "asset.obj").is_file() + assert result["visual_ingest"] == "no visual" + assert result["visual_source"]["visual_category"] == "None" + assert result["visual_source"]["uv_present"] is False + assert result["visual_source"]["textures"] == {} + + +def test_trimesh_parse_ingest_passes_export_config( + tmp_path: Path, + monkeypatch, +) -> None: + ingest_utils = _import_ingest_utils() + source_file = tmp_path / "source.obj" + asset_source = tmp_path / "asset_source" + captured_export_kwargs: dict[str, Any] = {} + _write_box_obj(source_file) + monkeypatch.setattr( + ingest_utils, + "classify_visual", + lambda _: DEFAULT_VISUAL_RESULT, + ) + + def fake_export_obj(mesh, **kwargs): + captured_export_kwargs.update(kwargs) + return "o asset\n", {} + + monkeypatch.setattr( + ingest_utils.trimesh.exchange.obj, "export_obj", fake_export_obj + ) + + ingest_utils.trimesh_parse_ingest( + source_file=source_file, + asset_source=asset_source, + obj_name="asset.obj", + config={ + "mtl_name": "custom_asset.mtl", + "export": { + "include_normals": False, + "include_color": False, + "include_texture": False, + "write_texture": True, + }, + }, + ) + + assert captured_export_kwargs["mtl_name"] == "custom_asset.mtl" + assert captured_export_kwargs["include_normals"] is False + assert captured_export_kwargs["include_color"] is False + assert captured_export_kwargs["include_texture"] is False + assert captured_export_kwargs["write_texture"] is True From ce0b37a119f41a551eefd6a1f21b7fbad240d2e4 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Thu, 21 May 2026 11:55:13 +0800 Subject: [PATCH 10/17] translate chinese to english --- embodichain/toolkits/simready_pipeline/core/asset.py | 4 ++-- embodichain/toolkits/simready_pipeline/pipeline/ingest.py | 4 ++-- .../toolkits/simready_pipeline/utils/ingest_utils.py | 6 +++--- .../toolkits/simready_pipeline/utils/simready_utils.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/embodichain/toolkits/simready_pipeline/core/asset.py b/embodichain/toolkits/simready_pipeline/core/asset.py index 59aec900..26a629d4 100644 --- a/embodichain/toolkits/simready_pipeline/core/asset.py +++ b/embodichain/toolkits/simready_pipeline/core/asset.py @@ -29,7 +29,7 @@ class Asset: parsed: Dict[str, Any] = field( default_factory=dict - ) # Visual, Geometry, Topology, 等解析或者入库时而来的信息 + ) # Visual, Geometry, Topology semantics: Dict[str, Any] = field(default_factory=dict) physics: Dict[str, Any] = field(default_factory=dict) simulation: Dict[str, Any] = field(default_factory=dict) @@ -41,7 +41,7 @@ class Asset: status: Dict[str, Any] = field(default_factory=dict) internal: Dict[str, Any] = field(default_factory=dict) - ingest_info: Dict[str, Any] = field(default_factory=dict) # ingest相关的临时信息 + ingest_info: Dict[str, Any] = field(default_factory=dict) def __post_init__(self) -> None: self._init_simulation_defaults() diff --git a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py index 55fa0a20..cd8285cf 100644 --- a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py +++ b/embodichain/toolkits/simready_pipeline/pipeline/ingest.py @@ -49,10 +49,10 @@ def _load_ingest_config() -> dict: CANOCAIL_ASSET_NAME = INGEST_CONFIG.get("canonical_asset_name", "asset.obj") UNPROCESSED_FORMATS = INGEST_CONFIG.get( "unprocessed_formats", [".urdf", ".usd"] -) # 当前先复制,后续可以考虑解析 +) # Copy these for now; parsing can be added later. PARSEABLE_MESH_FORMATS = INGEST_CONFIG.get( "parseable_mesh_formats", [".glb", ".gltf", ".obj", ".ply", ".stl"] -) # 主流的需要处理的格式 +) # Common mesh formats that need processing. TRIMESH_INGEST_CONFIG = MESH_PROCESSING_CONFIG.get("trimesh_ingest", {}) BLENDER_REMESH_BAKE_CONFIG = MESH_PROCESSING_CONFIG.get( diff --git a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py b/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py index 873cb898..970f30cc 100644 --- a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py +++ b/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py @@ -122,7 +122,7 @@ def load_one_trimesh( scene_mesh_strategy: str = "first", ) -> Union[ trimesh.Trimesh, None -]: # 可能是个scene,但是我们只处理scene中的第一个geometry,如果有多个mesh,复合起来需要下一个版本 +]: # The input may be a scene; process only the first geometry unless configured to concatenate. try: mesh_or_scene = trimesh.load_mesh(path) if isinstance(mesh_or_scene, trimesh.Scene): @@ -246,11 +246,11 @@ def trimesh_parse_ingest( mtl_name=mtl_name, ) - # ===== 写 OBJ ===== + # ===== Write OBJ ===== with open(obj_path, "w") as f: f.write(obj_str) - # ===== 写 texture / mtl ===== + # ===== Write texture / MTL ===== for name, data in tex_dict.items(): file_path = asset_source / name diff --git a/embodichain/toolkits/simready_pipeline/utils/simready_utils.py b/embodichain/toolkits/simready_pipeline/utils/simready_utils.py index c0248b3c..876e4901 100644 --- a/embodichain/toolkits/simready_pipeline/utils/simready_utils.py +++ b/embodichain/toolkits/simready_pipeline/utils/simready_utils.py @@ -1292,7 +1292,7 @@ def process_mesh(file, name=None, extra_text="", out_dir="renders", res=1024): else: raise ValueError() - # TODO: 还需要再做对齐分析!避免倾斜!!!! + # TODO: Add alignment analysis to avoid tilted outputs. normalize_to_unit_cube(mesh) From 0cac1ca7e812a849503582364a768e358400e4e1 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Thu, 21 May 2026 12:02:26 +0800 Subject: [PATCH 11/17] add cli commands --- docs/source/features/simready_pipeline.md | 6 ++--- docs/source/guides/cli.md | 32 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/docs/source/features/simready_pipeline.md b/docs/source/features/simready_pipeline.md index 240396cd..218a77e2 100644 --- a/docs/source/features/simready_pipeline.md +++ b/docs/source/features/simready_pipeline.md @@ -9,7 +9,7 @@ Run the pipeline on a single asset directory: ```bash python -m embodichain.toolkits.simready_pipeline.cli.start \ --input_dir /path/to/raw_mesh_folder \ - --output_root YourOutputDir \ + --output_root /path/to/output_folder \ --category YourCategory ``` @@ -177,7 +177,7 @@ The default command uses Blender during ingest: ```bash python -m embodichain.toolkits.simready_pipeline.cli.start \ --input_dir /path/to/raw_mesh_folder \ - --output_root YourOutputDir \ + --output_root /path/to/output_folder \ --category YourCategory ``` @@ -186,7 +186,7 @@ Use `--simple` when you want faster trimesh-only ingest: ```bash python -m embodichain.toolkits.simready_pipeline.cli.start \ --input_dir /path/to/raw_mesh_folder \ - --output_root YourOutputDir \ + --output_root /path/to/output_folder \ --category YourCategory --simple ``` diff --git a/docs/source/guides/cli.md b/docs/source/guides/cli.md index 639183ca..8a8bbff6 100644 --- a/docs/source/guides/cli.md +++ b/docs/source/guides/cli.md @@ -27,6 +27,38 @@ python -m embodichain.data download --all --- +## SimReady Asset Pipeline + +Convert a raw mesh asset directory into sim_ready assets for simulation. + +```bash +# Run the full SimReady pipeline on a single asset directory +python -m embodichain.toolkits.simready_pipeline.cli.start \ + --input_dir /path/to/raw_mesh_folder \ + --output_root /path/to/output_folder \ + --category YourCategory + +# Use trimesh-only ingest for source normalization +python -m embodichain.toolkits.simready_pipeline.cli.start \ + --input_dir /path/to/raw_mesh_folder \ + --output_root /path/to/output_folder \ + --category YourCategory + --simple +``` + +### Arguments + +| Argument | Default | Description | +|---|---|---| +| ``--input_dir`` | *(required)* | Directory containing the raw asset files | +| ``--output_root`` | *(required)* | Directory where processed assets are written | +| ``--category`` | *(required)* | Category hint passed into the pipeline | +| ``--simple`` | ``False`` | Use trimesh-only ingest and skip Blender remesh/bake during ingest | + +The generated output contains the canonical source mesh under ``asset_source/``, the final SimReady mesh under ``asset_simready/``, and USD export files under ``asset_usd/`` when export succeeds. + +--- + ## Preview Asset Preview a USD or mesh asset in the simulation without writing code. From f1afa961f1514084841839a80a1c62509dd7dbf8 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Thu, 21 May 2026 14:26:16 +0800 Subject: [PATCH 12/17] run black command --- embodichain/toolkits/simready_pipeline/core/asset.py | 4 +--- embodichain/toolkits/simready_pipeline/parser/geometry.py | 4 +--- embodichain/toolkits/simready_pipeline/utils/ingest_utils.py | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/embodichain/toolkits/simready_pipeline/core/asset.py b/embodichain/toolkits/simready_pipeline/core/asset.py index 26a629d4..020f696f 100644 --- a/embodichain/toolkits/simready_pipeline/core/asset.py +++ b/embodichain/toolkits/simready_pipeline/core/asset.py @@ -27,9 +27,7 @@ class Asset: identity: Dict[str, Any] = field(default_factory=dict) asset_data: Dict[str, Any] = field(default_factory=dict) - parsed: Dict[str, Any] = field( - default_factory=dict - ) # Visual, Geometry, Topology + parsed: Dict[str, Any] = field(default_factory=dict) # Visual, Geometry, Topology semantics: Dict[str, Any] = field(default_factory=dict) physics: Dict[str, Any] = field(default_factory=dict) simulation: Dict[str, Any] = field(default_factory=dict) diff --git a/embodichain/toolkits/simready_pipeline/parser/geometry.py b/embodichain/toolkits/simready_pipeline/parser/geometry.py index 3b75482f..05a06dce 100644 --- a/embodichain/toolkits/simready_pipeline/parser/geometry.py +++ b/embodichain/toolkits/simready_pipeline/parser/geometry.py @@ -126,9 +126,7 @@ def parse(self, asset: Asset, asset_root: Path) -> None: "triangulate", GEOMETRY_CLEANUP_CONFIG.get("triangulate", False), ), - collapse_triangulate=simplify_config.get( - "collapse_triangulate", True - ), + collapse_triangulate=simplify_config.get("collapse_triangulate", True), ) try: diff --git a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py b/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py index 970f30cc..c59440f9 100644 --- a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py +++ b/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py @@ -186,9 +186,7 @@ def trimesh_parse_ingest( mesh.visual = trimesh.visual.ColorVisuals( mesh, - face_colors=visual_config.get( - "default_face_color", [128, 128, 128, 255] - ), + face_colors=visual_config.get("default_face_color", [128, 128, 128, 255]), ) visual_ingest = "no visual" From babb8974aa06f7a952e2af7fd7366575db6418a4 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Thu, 21 May 2026 14:44:13 +0800 Subject: [PATCH 13/17] update bpy and pyrender install --- docs/source/quick_start/install.md | 8 ++++---- pyproject.toml | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/source/quick_start/install.md b/docs/source/quick_start/install.md index 8b2af437..49aed084 100644 --- a/docs/source/quick_start/install.md +++ b/docs/source/quick_start/install.md @@ -46,7 +46,7 @@ curl -LsSf https://astral.sh/uv/install.sh | sh **Install from PyPI:** ```bash -uv pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site --extra-index-url https://download.blender.org/pypi/ +uv pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site ``` **Install from source (editable mode):** @@ -54,7 +54,7 @@ uv pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple ```bash git clone https://github.com/DexForce/EmbodiChain.git cd EmbodiChain -uv pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site --extra-index-url https://download.blender.org/pypi/ +uv pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site ``` ### pip (PyPI) @@ -63,7 +63,7 @@ uv pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --tru > We strongly recommend using a virtual environment to avoid dependency conflicts. ```bash -pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site --extra-index-url https://download.blender.org/pypi/ +pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site ``` ### From Source @@ -74,7 +74,7 @@ pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ - ```bash git clone https://github.com/DexForce/EmbodiChain.git cd EmbodiChain -pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site --extra-index-url https://download.blender.org/pypi/ +pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site ``` ## Verify Installation diff --git a/pyproject.toml b/pyproject.toml index 057274b2..de1e5deb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,12 +48,14 @@ dependencies = [ "h5py", "tensordict", "viser==1.0.21", - "lerobot>=0.4.4", - "bpy", - "pyrender==0.1.45" + "lerobot>=0.4.4" ] [project.optional-dependencies] +gensim = [ + "bpy", + "pyrender==0.1.45" +] [tool.uv.sources] bpy = { index = "blender" } From 3c0d7c8fb964e5e36942eb8a51ed80b6e0ecca91 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Thu, 21 May 2026 15:35:38 +0800 Subject: [PATCH 14/17] move simready from toolkit to gensim and update relative docs --- .../features/{ => generative_sim}/agents.md | 10 ++++---- docs/source/features/generative_sim/index.rst | 9 +++++++ .../{ => generative_sim}/simready_pipeline.md | 24 +++++++++---------- docs/source/features/online_data.md | 2 +- docs/source/guides/cli.md | 10 ++++---- docs/source/index.rst | 3 +-- .../simready_pipeline => gen_sim}/__init__.py | 0 .../simready_pipeline}/__init__.py | 0 .../simready_pipeline/cli}/__init__.py | 0 .../simready_pipeline/cli/start.py | 8 +++---- .../simready_pipeline/configs}/__init__.py | 0 .../simready_pipeline/configs/gen_config.json | 0 .../simready_pipeline/core}/__init__.py | 0 .../simready_pipeline/core/asset.py | 0 .../simready_pipeline/io}/__init__.py | 0 .../simready_pipeline/io/json_store.py | 2 +- .../simready_pipeline/parser}/__init__.py | 0 .../simready_pipeline/parser/base.py | 12 +++++----- .../simready_pipeline/parser/geometry.py | 6 ++--- .../simready_pipeline/parser/inspector.py | 4 ++-- .../simready_pipeline/parser/internal.py | 4 ++-- .../simready_pipeline/parser/physics.py | 6 ++--- .../simready_pipeline/parser/usd.py | 6 ++--- .../simready_pipeline/pipeline}/__init__.py | 0 .../simready_pipeline/pipeline/ingest.py | 8 +++---- .../simready_pipeline/utils/__init__.py | 19 +++++++++++++++ .../simready_pipeline/utils/geometry_utils.py | 0 .../simready_pipeline/utils/ingest_utils.py | 4 ++-- .../simready_pipeline/utils/simready_utils.py | 0 .../simready_pipeline/utils/texture_utils.py | 0 .../simready_pipeline/utils/usd_utils.py | 0 .../simready_pipeline/test_config.py | 2 +- .../simready_pipeline/test_trimesh_ingest.py | 2 +- 33 files changed, 84 insertions(+), 57 deletions(-) rename docs/source/features/{ => generative_sim}/agents.md (94%) create mode 100644 docs/source/features/generative_sim/index.rst rename docs/source/features/{ => generative_sim}/simready_pipeline.md (87%) rename embodichain/{toolkits/simready_pipeline => gen_sim}/__init__.py (100%) rename embodichain/{toolkits/simready_pipeline/cli => gen_sim/simready_pipeline}/__init__.py (100%) rename embodichain/{toolkits/simready_pipeline/configs => gen_sim/simready_pipeline/cli}/__init__.py (100%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/cli/start.py (88%) rename embodichain/{toolkits/simready_pipeline/core => gen_sim/simready_pipeline/configs}/__init__.py (100%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/configs/gen_config.json (100%) rename embodichain/{toolkits/simready_pipeline/io => gen_sim/simready_pipeline/core}/__init__.py (100%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/core/asset.py (100%) rename embodichain/{toolkits/simready_pipeline/parser => gen_sim/simready_pipeline/io}/__init__.py (100%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/io/json_store.py (97%) rename embodichain/{toolkits/simready_pipeline/pipeline => gen_sim/simready_pipeline/parser}/__init__.py (100%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/parser/base.py (84%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/parser/geometry.py (96%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/parser/inspector.py (95%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/parser/internal.py (96%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/parser/physics.py (98%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/parser/usd.py (95%) rename embodichain/{toolkits/simready_pipeline/utils => gen_sim/simready_pipeline/pipeline}/__init__.py (100%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/pipeline/ingest.py (94%) create mode 100644 embodichain/gen_sim/simready_pipeline/utils/__init__.py rename embodichain/{toolkits => gen_sim}/simready_pipeline/utils/geometry_utils.py (100%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/utils/ingest_utils.py (99%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/utils/simready_utils.py (100%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/utils/texture_utils.py (100%) rename embodichain/{toolkits => gen_sim}/simready_pipeline/utils/usd_utils.py (100%) rename tests/{toolkits => gen_sim}/simready_pipeline/test_config.py (99%) rename tests/{toolkits => gen_sim}/simready_pipeline/test_trimesh_ingest.py (98%) diff --git a/docs/source/features/agents.md b/docs/source/features/generative_sim/agents.md similarity index 94% rename from docs/source/features/agents.md rename to docs/source/features/generative_sim/agents.md index 89602c93..5c75fee5 100644 --- a/docs/source/features/agents.md +++ b/docs/source/features/generative_sim/agents.md @@ -1,4 +1,4 @@ -# EmbodiAgent +# EmbodiAgent(aborted) EmbodiAgent is a hierarchical multi-agent system that enables robots to perform complex manipulation tasks through closed-loop planning, code generation, and validation. The system combines vision-language models (VLMs) and large language models (LLMs) to translate high-level goals into executable robot actions. @@ -169,7 +169,7 @@ embodichain/agents/ ## See Also -- [Online Data Streaming](online_data.md) — Streaming live simulation data for training -- [RL Architecture](../overview/rl/index.rst) — RL training pipeline and algorithms -- [Atomic Actions Tutorial](../tutorial/atomic_actions.rst) — Action primitives used by the CodeAgent -- [Supported Tasks](../resources/task/index.rst) — Available task environments +- [Online Data Streaming](../online_data.md) — Streaming live simulation data for training +- [RL Architecture](../../overview/rl/index.rst) — RL training pipeline and algorithms +- [Atomic Actions Tutorial](../../tutorial/atomic_actions.rst) — Action primitives used by the CodeAgent +- [Supported Tasks](../../resources/task/index.rst) — Available task environments diff --git a/docs/source/features/generative_sim/index.rst b/docs/source/features/generative_sim/index.rst new file mode 100644 index 00000000..1f7c759f --- /dev/null +++ b/docs/source/features/generative_sim/index.rst @@ -0,0 +1,9 @@ +Generative Simulation +===================== + +Generative Simulation collects EmbodiChain features for generating simulation-ready assets and executing agent-driven task workflows. + +.. toctree:: + :maxdepth: 2 + + SimReady Asset Pipeline diff --git a/docs/source/features/simready_pipeline.md b/docs/source/features/generative_sim/simready_pipeline.md similarity index 87% rename from docs/source/features/simready_pipeline.md rename to docs/source/features/generative_sim/simready_pipeline.md index 218a77e2..d0fe08da 100644 --- a/docs/source/features/simready_pipeline.md +++ b/docs/source/features/generative_sim/simready_pipeline.md @@ -7,8 +7,8 @@ The SimReady asset pipeline converts raw mesh archives into normalized simulatio Run the pipeline on a single asset directory: ```bash -python -m embodichain.toolkits.simready_pipeline.cli.start \ - --input_dir /path/to/raw_mesh_folder \ +python -m embodichain.gen_sim.simready_pipeline.cli.start \ + --input_dir /path/to/raw_mesh_folder \ --output_root /path/to/output_folder \ --category YourCategory ``` @@ -25,7 +25,7 @@ python -m embodichain preview-asset \ The full pipeline uses Blender, trimesh, pyrender, and an Azure OpenAI-compatible endpoint. Install EmbodiChain with the Blender package index enabled as described in the installation guide. -Set the LLM credentials before running the pipeline, or configure them in `embodichain/toolkits/simready_pipeline/configs/gen_config.json`: +Set the LLM credentials before running the pipeline, or configure them in `embodichain/gen_sim/simready_pipeline/configs/gen_config.json`: ```bash export AZURE_OPENAI_API_KEY="your-api-key" @@ -76,7 +76,7 @@ Use `asset_simready/asset_simready.obj` or `asset_usd/` for simulation preview a ## Configuration -Pipeline hyperparameters live in `embodichain/toolkits/simready_pipeline/configs/gen_config.json`. +Pipeline hyperparameters live in `embodichain/gen_sim/simready_pipeline/configs/gen_config.json`. ### Ingest @@ -175,8 +175,8 @@ This section configures the multimodal LLM used for object classification, orien The default command uses Blender during ingest: ```bash -python -m embodichain.toolkits.simready_pipeline.cli.start \ - --input_dir /path/to/raw_mesh_folder \ +python -m embodichain.gen_sim.simready_pipeline.cli.start \ + --input_dir /path/to/raw_mesh_folder \ --output_root /path/to/output_folder \ --category YourCategory ``` @@ -184,10 +184,10 @@ python -m embodichain.toolkits.simready_pipeline.cli.start \ Use `--simple` when you want faster trimesh-only ingest: ```bash -python -m embodichain.toolkits.simready_pipeline.cli.start \ - --input_dir /path/to/raw_mesh_folder \ +python -m embodichain.gen_sim.simready_pipeline.cli.start \ + --input_dir /path/to/raw_mesh_folder \ --output_root /path/to/output_folder \ - --category YourCategory + --category YourCategory \ --simple ``` @@ -195,6 +195,6 @@ The simple mode only affects the ingest step. The downstream geometry parser sti ## See Also -- [Asset Preview](interaction/preview_asset.md): Load generated meshes and USD assets in the simulator. -- [Installation](../quick_start/install.md): Install EmbodiChain with Blender and rendering dependencies. -- [Toolkits](toolkits/index.rst): Other asset preparation utilities. +- [Asset Preview](../interaction/preview_asset.md): Load generated meshes and USD assets in the simulator. +- [Installation](../../quick_start/install.md): Install EmbodiChain with Blender and rendering dependencies. +- [Toolkits](../toolkits/index.rst): Other asset preparation utilities. diff --git a/docs/source/features/online_data.md b/docs/source/features/online_data.md index dccd38d1..4c016633 100644 --- a/docs/source/features/online_data.md +++ b/docs/source/features/online_data.md @@ -148,6 +148,6 @@ python examples/agents/datasets/online_dataset_demo.py ## See Also -- [EmbodiAgent](agents.md) — Hierarchical agent that uses online data for training +- [EmbodiAgent](generative_sim/agents.md) — Hierarchical agent that uses online data for training - [RL Architecture](../overview/rl/index.rst) — RL training pipeline - [Data Generation Tutorial](../tutorial/data_generation.rst) — Generating offline datasets diff --git a/docs/source/guides/cli.md b/docs/source/guides/cli.md index 8a8bbff6..623704d6 100644 --- a/docs/source/guides/cli.md +++ b/docs/source/guides/cli.md @@ -33,16 +33,16 @@ Convert a raw mesh asset directory into sim_ready assets for simulation. ```bash # Run the full SimReady pipeline on a single asset directory -python -m embodichain.toolkits.simready_pipeline.cli.start \ - --input_dir /path/to/raw_mesh_folder \ +python -m embodichain.gen_sim.simready_pipeline.cli.start \ + --input_dir /path/to/raw_mesh_folder \ --output_root /path/to/output_folder \ --category YourCategory # Use trimesh-only ingest for source normalization -python -m embodichain.toolkits.simready_pipeline.cli.start \ - --input_dir /path/to/raw_mesh_folder \ +python -m embodichain.gen_sim.simready_pipeline.cli.start \ + --input_dir /path/to/raw_mesh_folder \ --output_root /path/to/output_folder \ - --category YourCategory + --category YourCategory \ --simple ``` diff --git a/docs/source/index.rst b/docs/source/index.rst index 401c4cd6..f2f2a252 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -40,8 +40,7 @@ Table of Contents :glob: features/online_data.md - features/agents.md - features/simready_pipeline.md + features/generative_sim/index* features/workspace_analyzer/index* features/interaction/index* features/toolkits/index* diff --git a/embodichain/toolkits/simready_pipeline/__init__.py b/embodichain/gen_sim/__init__.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/__init__.py rename to embodichain/gen_sim/__init__.py diff --git a/embodichain/toolkits/simready_pipeline/cli/__init__.py b/embodichain/gen_sim/simready_pipeline/__init__.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/cli/__init__.py rename to embodichain/gen_sim/simready_pipeline/__init__.py diff --git a/embodichain/toolkits/simready_pipeline/configs/__init__.py b/embodichain/gen_sim/simready_pipeline/cli/__init__.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/configs/__init__.py rename to embodichain/gen_sim/simready_pipeline/cli/__init__.py diff --git a/embodichain/toolkits/simready_pipeline/cli/start.py b/embodichain/gen_sim/simready_pipeline/cli/start.py similarity index 88% rename from embodichain/toolkits/simready_pipeline/cli/start.py rename to embodichain/gen_sim/simready_pipeline/cli/start.py index c2910db1..ee0372d0 100644 --- a/embodichain/toolkits/simready_pipeline/cli/start.py +++ b/embodichain/gen_sim/simready_pipeline/cli/start.py @@ -20,9 +20,9 @@ os.environ["PYOPENGL_PLATFORM"] = "egl" -from embodichain.toolkits.simready_pipeline.pipeline.ingest import ingest_one_asset -from embodichain.toolkits.simready_pipeline.io.json_store import JsonStore -from embodichain.toolkits.simready_pipeline.parser.base import ParserManager +from embodichain.gen_sim.simready_pipeline.pipeline.ingest import ingest_one_asset +from embodichain.gen_sim.simready_pipeline.io.json_store import JsonStore +from embodichain.gen_sim.simready_pipeline.parser.base import ParserManager def cli_ingest_single( @@ -57,7 +57,7 @@ def cli_ingest_single( def main(): parser = argparse.ArgumentParser( - description="embodichain.toolkits.simready_pipeline Asset Ingestion Pipeline" + description="embodichain.gen_sim.simready_pipeline Asset Ingestion Pipeline" ) parser.add_argument( diff --git a/embodichain/toolkits/simready_pipeline/core/__init__.py b/embodichain/gen_sim/simready_pipeline/configs/__init__.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/core/__init__.py rename to embodichain/gen_sim/simready_pipeline/configs/__init__.py diff --git a/embodichain/toolkits/simready_pipeline/configs/gen_config.json b/embodichain/gen_sim/simready_pipeline/configs/gen_config.json similarity index 100% rename from embodichain/toolkits/simready_pipeline/configs/gen_config.json rename to embodichain/gen_sim/simready_pipeline/configs/gen_config.json diff --git a/embodichain/toolkits/simready_pipeline/io/__init__.py b/embodichain/gen_sim/simready_pipeline/core/__init__.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/io/__init__.py rename to embodichain/gen_sim/simready_pipeline/core/__init__.py diff --git a/embodichain/toolkits/simready_pipeline/core/asset.py b/embodichain/gen_sim/simready_pipeline/core/asset.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/core/asset.py rename to embodichain/gen_sim/simready_pipeline/core/asset.py diff --git a/embodichain/toolkits/simready_pipeline/parser/__init__.py b/embodichain/gen_sim/simready_pipeline/io/__init__.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/parser/__init__.py rename to embodichain/gen_sim/simready_pipeline/io/__init__.py diff --git a/embodichain/toolkits/simready_pipeline/io/json_store.py b/embodichain/gen_sim/simready_pipeline/io/json_store.py similarity index 97% rename from embodichain/toolkits/simready_pipeline/io/json_store.py rename to embodichain/gen_sim/simready_pipeline/io/json_store.py index 379c6f97..65fee676 100644 --- a/embodichain/toolkits/simready_pipeline/io/json_store.py +++ b/embodichain/gen_sim/simready_pipeline/io/json_store.py @@ -18,7 +18,7 @@ from pathlib import Path from typing import Any, Optional -from embodichain.toolkits.simready_pipeline.core.asset import Asset +from embodichain.gen_sim.simready_pipeline.core.asset import Asset class JsonStore: diff --git a/embodichain/toolkits/simready_pipeline/pipeline/__init__.py b/embodichain/gen_sim/simready_pipeline/parser/__init__.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/pipeline/__init__.py rename to embodichain/gen_sim/simready_pipeline/parser/__init__.py diff --git a/embodichain/toolkits/simready_pipeline/parser/base.py b/embodichain/gen_sim/simready_pipeline/parser/base.py similarity index 84% rename from embodichain/toolkits/simready_pipeline/parser/base.py rename to embodichain/gen_sim/simready_pipeline/parser/base.py index 2940d2ec..9583bf7d 100644 --- a/embodichain/toolkits/simready_pipeline/parser/base.py +++ b/embodichain/gen_sim/simready_pipeline/parser/base.py @@ -16,7 +16,7 @@ from typing import Dict, List, Optional from abc import ABC, abstractmethod -from embodichain.toolkits.simready_pipeline.core.asset import Asset +from embodichain.gen_sim.simready_pipeline.core.asset import Asset from pathlib import Path @@ -36,11 +36,11 @@ def parse(self, asset: Asset, asset_root: Path) -> None: raise NotImplementedError -from embodichain.toolkits.simready_pipeline.parser.inspector import AssetInspector -from embodichain.toolkits.simready_pipeline.parser.geometry import GeometryParser -from embodichain.toolkits.simready_pipeline.parser.physics import PhysicsParser -from embodichain.toolkits.simready_pipeline.parser.usd import UsdParser -from embodichain.toolkits.simready_pipeline.parser.internal import InternalParser +from embodichain.gen_sim.simready_pipeline.parser.inspector import AssetInspector +from embodichain.gen_sim.simready_pipeline.parser.geometry import GeometryParser +from embodichain.gen_sim.simready_pipeline.parser.physics import PhysicsParser +from embodichain.gen_sim.simready_pipeline.parser.usd import UsdParser +from embodichain.gen_sim.simready_pipeline.parser.internal import InternalParser class ParserManager: diff --git a/embodichain/toolkits/simready_pipeline/parser/geometry.py b/embodichain/gen_sim/simready_pipeline/parser/geometry.py similarity index 96% rename from embodichain/toolkits/simready_pipeline/parser/geometry.py rename to embodichain/gen_sim/simready_pipeline/parser/geometry.py index 05a06dce..98fa4117 100644 --- a/embodichain/toolkits/simready_pipeline/parser/geometry.py +++ b/embodichain/gen_sim/simready_pipeline/parser/geometry.py @@ -22,9 +22,9 @@ import numpy as np import trimesh -from embodichain.toolkits.simready_pipeline.parser.base import AssetParser -from embodichain.toolkits.simready_pipeline.core.asset import Asset -from embodichain.toolkits.simready_pipeline.utils.geometry_utils import process_obj +from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser +from embodichain.gen_sim.simready_pipeline.core.asset import Asset +from embodichain.gen_sim.simready_pipeline.utils.geometry_utils import process_obj def _load_geometry_cleanup_config() -> dict: diff --git a/embodichain/toolkits/simready_pipeline/parser/inspector.py b/embodichain/gen_sim/simready_pipeline/parser/inspector.py similarity index 95% rename from embodichain/toolkits/simready_pipeline/parser/inspector.py rename to embodichain/gen_sim/simready_pipeline/parser/inspector.py index 59c3bcee..65e113d9 100644 --- a/embodichain/toolkits/simready_pipeline/parser/inspector.py +++ b/embodichain/gen_sim/simready_pipeline/parser/inspector.py @@ -15,8 +15,8 @@ # ---------------------------------------------------------------------------- from pathlib import Path -from embodichain.toolkits.simready_pipeline.core.asset import Asset -from embodichain.toolkits.simready_pipeline.parser.base import AssetParser +from embodichain.gen_sim.simready_pipeline.core.asset import Asset +from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser class AssetInspector(AssetParser): diff --git a/embodichain/toolkits/simready_pipeline/parser/internal.py b/embodichain/gen_sim/simready_pipeline/parser/internal.py similarity index 96% rename from embodichain/toolkits/simready_pipeline/parser/internal.py rename to embodichain/gen_sim/simready_pipeline/parser/internal.py index def8afde..fcd3bafd 100644 --- a/embodichain/toolkits/simready_pipeline/parser/internal.py +++ b/embodichain/gen_sim/simready_pipeline/parser/internal.py @@ -19,8 +19,8 @@ import pyrender from PIL import Image from pathlib import Path -from embodichain.toolkits.simready_pipeline.core.asset import Asset -from embodichain.toolkits.simready_pipeline.parser.base import AssetParser +from embodichain.gen_sim.simready_pipeline.core.asset import Asset +from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser class InternalParser(AssetParser): diff --git a/embodichain/toolkits/simready_pipeline/parser/physics.py b/embodichain/gen_sim/simready_pipeline/parser/physics.py similarity index 98% rename from embodichain/toolkits/simready_pipeline/parser/physics.py rename to embodichain/gen_sim/simready_pipeline/parser/physics.py index d9e3d043..13b80264 100644 --- a/embodichain/toolkits/simready_pipeline/parser/physics.py +++ b/embodichain/gen_sim/simready_pipeline/parser/physics.py @@ -25,9 +25,9 @@ from openai import AzureOpenAI -from embodichain.toolkits.simready_pipeline.core.asset import Asset -from embodichain.toolkits.simready_pipeline.parser.base import AssetParser -from embodichain.toolkits.simready_pipeline.utils.simready_utils import ( +from embodichain.gen_sim.simready_pipeline.core.asset import Asset +from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser +from embodichain.gen_sim.simready_pipeline.utils.simready_utils import ( process_mesh, delete_rendered_pngs, client, diff --git a/embodichain/toolkits/simready_pipeline/parser/usd.py b/embodichain/gen_sim/simready_pipeline/parser/usd.py similarity index 95% rename from embodichain/toolkits/simready_pipeline/parser/usd.py rename to embodichain/gen_sim/simready_pipeline/parser/usd.py index 69c86657..7c8488ba 100644 --- a/embodichain/toolkits/simready_pipeline/parser/usd.py +++ b/embodichain/gen_sim/simready_pipeline/parser/usd.py @@ -19,9 +19,9 @@ import numpy as np import trimesh -from embodichain.toolkits.simready_pipeline.parser.base import AssetParser -from embodichain.toolkits.simready_pipeline.core.asset import Asset -from embodichain.toolkits.simready_pipeline.utils.usd_utils import ( +from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser +from embodichain.gen_sim.simready_pipeline.core.asset import Asset +from embodichain.gen_sim.simready_pipeline.utils.usd_utils import ( convert_model_to_usd, DEFAULT_PHYSICS_PARAMS, ) diff --git a/embodichain/toolkits/simready_pipeline/utils/__init__.py b/embodichain/gen_sim/simready_pipeline/pipeline/__init__.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/utils/__init__.py rename to embodichain/gen_sim/simready_pipeline/pipeline/__init__.py diff --git a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py b/embodichain/gen_sim/simready_pipeline/pipeline/ingest.py similarity index 94% rename from embodichain/toolkits/simready_pipeline/pipeline/ingest.py rename to embodichain/gen_sim/simready_pipeline/pipeline/ingest.py index cd8285cf..b87a16d1 100644 --- a/embodichain/toolkits/simready_pipeline/pipeline/ingest.py +++ b/embodichain/gen_sim/simready_pipeline/pipeline/ingest.py @@ -25,16 +25,16 @@ import tempfile from typing import Iterable, Optional -from embodichain.toolkits.simready_pipeline.core.asset import Asset -from embodichain.toolkits.simready_pipeline.utils.ingest_utils import ( +from embodichain.gen_sim.simready_pipeline.core.asset import Asset +from embodichain.gen_sim.simready_pipeline.utils.ingest_utils import ( new_uuid, trimesh_parse_ingest, blender_parser_ingest, inject_semantic_from_config, inject_user_extra_info, ) -from embodichain.toolkits.simready_pipeline.io.json_store import JsonStore -from embodichain.toolkits.simready_pipeline.parser.base import ParserManager +from embodichain.gen_sim.simready_pipeline.io.json_store import JsonStore +from embodichain.gen_sim.simready_pipeline.parser.base import ParserManager def _load_ingest_config() -> dict: diff --git a/embodichain/gen_sim/simready_pipeline/utils/__init__.py b/embodichain/gen_sim/simready_pipeline/utils/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/gen_sim/simready_pipeline/utils/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/toolkits/simready_pipeline/utils/geometry_utils.py b/embodichain/gen_sim/simready_pipeline/utils/geometry_utils.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/utils/geometry_utils.py rename to embodichain/gen_sim/simready_pipeline/utils/geometry_utils.py diff --git a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py b/embodichain/gen_sim/simready_pipeline/utils/ingest_utils.py similarity index 99% rename from embodichain/toolkits/simready_pipeline/utils/ingest_utils.py rename to embodichain/gen_sim/simready_pipeline/utils/ingest_utils.py index c59440f9..bb5a80d8 100644 --- a/embodichain/toolkits/simready_pipeline/utils/ingest_utils.py +++ b/embodichain/gen_sim/simready_pipeline/utils/ingest_utils.py @@ -21,10 +21,10 @@ import json from pathlib import Path from typing import Union, Dict, Any -from embodichain.toolkits.simready_pipeline.utils.texture_utils import classify_visual +from embodichain.gen_sim.simready_pipeline.utils.texture_utils import classify_visual import hashlib import os -from embodichain.toolkits.simready_pipeline.core.asset import Asset +from embodichain.gen_sim.simready_pipeline.core.asset import Asset def new_uuid() -> str: diff --git a/embodichain/toolkits/simready_pipeline/utils/simready_utils.py b/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/utils/simready_utils.py rename to embodichain/gen_sim/simready_pipeline/utils/simready_utils.py diff --git a/embodichain/toolkits/simready_pipeline/utils/texture_utils.py b/embodichain/gen_sim/simready_pipeline/utils/texture_utils.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/utils/texture_utils.py rename to embodichain/gen_sim/simready_pipeline/utils/texture_utils.py diff --git a/embodichain/toolkits/simready_pipeline/utils/usd_utils.py b/embodichain/gen_sim/simready_pipeline/utils/usd_utils.py similarity index 100% rename from embodichain/toolkits/simready_pipeline/utils/usd_utils.py rename to embodichain/gen_sim/simready_pipeline/utils/usd_utils.py diff --git a/tests/toolkits/simready_pipeline/test_config.py b/tests/gen_sim/simready_pipeline/test_config.py similarity index 99% rename from tests/toolkits/simready_pipeline/test_config.py rename to tests/gen_sim/simready_pipeline/test_config.py index 426d9124..9e0d885f 100644 --- a/tests/toolkits/simready_pipeline/test_config.py +++ b/tests/gen_sim/simready_pipeline/test_config.py @@ -26,7 +26,7 @@ CONFIG_PATH = ( REPO_ROOT / "embodichain" - / "toolkits" + / "gen_sim" / "simready_pipeline" / "configs" / "gen_config.json" diff --git a/tests/toolkits/simready_pipeline/test_trimesh_ingest.py b/tests/gen_sim/simready_pipeline/test_trimesh_ingest.py similarity index 98% rename from tests/toolkits/simready_pipeline/test_trimesh_ingest.py rename to tests/gen_sim/simready_pipeline/test_trimesh_ingest.py index 0677f4f3..4a5e24bb 100644 --- a/tests/toolkits/simready_pipeline/test_trimesh_ingest.py +++ b/tests/gen_sim/simready_pipeline/test_trimesh_ingest.py @@ -42,7 +42,7 @@ def _import_ingest_utils(): if importlib.util.find_spec("bpy") is None: sys.modules.setdefault("bpy", types.SimpleNamespace()) return importlib.import_module( - "embodichain.toolkits.simready_pipeline.utils.ingest_utils" + "embodichain.gen_sim.simready_pipeline.utils.ingest_utils" ) From 2c975920bc242e7d461b2362dab70efc4c28f0cc Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Thu, 21 May 2026 16:48:55 +0800 Subject: [PATCH 15/17] delete bpy test --- tests/gen_sim/simready_pipeline/test_trimesh_ingest.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/gen_sim/simready_pipeline/test_trimesh_ingest.py b/tests/gen_sim/simready_pipeline/test_trimesh_ingest.py index 4a5e24bb..7c20c677 100644 --- a/tests/gen_sim/simready_pipeline/test_trimesh_ingest.py +++ b/tests/gen_sim/simready_pipeline/test_trimesh_ingest.py @@ -17,9 +17,6 @@ from __future__ import annotations import importlib -import importlib.util -import sys -import types from pathlib import Path from typing import Any @@ -39,8 +36,6 @@ def _import_ingest_utils(): - if importlib.util.find_spec("bpy") is None: - sys.modules.setdefault("bpy", types.SimpleNamespace()) return importlib.import_module( "embodichain.gen_sim.simready_pipeline.utils.ingest_utils" ) From 07ee9203e0a867d3601827598f01378840413cb0 Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Thu, 21 May 2026 18:09:28 +0800 Subject: [PATCH 16/17] add openai compatible api support and update the related docs and update CI test process to support gen_sim --- .github/workflows/main.yml | 10 +- .../generative_sim/simready_pipeline.md | 95 +++++++++++++++++-- docs/source/quick_start/install.md | 44 +++++++++ .../simready_pipeline/configs/gen_config.json | 4 +- .../simready_pipeline/parser/physics.py | 4 - .../simready_pipeline/utils/simready_utils.py | 37 +++----- 6 files changed, 155 insertions(+), 39 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 05cc2434..3540cfb9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -75,7 +75,10 @@ jobs: - name: Build docs shell: bash run: | - pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site + pip install -e ".[gensim]" \ + --extra-index-url http://pyp.open3dv.site:2345/simple/ \ + --trusted-host pyp.open3dv.site \ + --extra-index-url https://download.blender.org/pypi/ pip install -r docs/requirements.txt python3 docs/scripts/sync_readme.py cd ${GITHUB_WORKSPACE}/docs @@ -136,7 +139,10 @@ jobs: - uses: actions/checkout@v4 - name: Run tests run: | - pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site + pip install -e ".[gensim]" \ + --extra-index-url http://pyp.open3dv.site:2345/simple/ \ + --trusted-host pyp.open3dv.site \ + --extra-index-url https://download.blender.org/pypi/ echo "Unit test Start" export HF_ENDPOINT=https://hf-mirror.com pytest tests diff --git a/docs/source/features/generative_sim/simready_pipeline.md b/docs/source/features/generative_sim/simready_pipeline.md index d0fe08da..e88eb6cf 100644 --- a/docs/source/features/generative_sim/simready_pipeline.md +++ b/docs/source/features/generative_sim/simready_pipeline.md @@ -23,13 +23,64 @@ python -m embodichain preview-asset \ ## Prerequisites -The full pipeline uses Blender, trimesh, pyrender, and an Azure OpenAI-compatible endpoint. Install EmbodiChain with the Blender package index enabled as described in the installation guide. +The full pipeline uses Blender, trimesh, pyrender, and an OpenAI-compatible multimodal chat completions endpoint. Install EmbodiChain with the `gensim` extra and enable both the EmbodiChain package index and Blender package index. -Set the LLM credentials before running the pipeline, or configure them in `embodichain/gen_sim/simready_pipeline/configs/gen_config.json`: +Install from PyPI with `uv`: ```bash -export AZURE_OPENAI_API_KEY="your-api-key" -export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/" +uv pip install "embodichain[gensim]" \ + --extra-index-url http://pyp.open3dv.site:2345/simple/ \ + --trusted-host pyp.open3dv.site \ + --extra-index-url https://download.blender.org/pypi/ +``` + +Install from source with `uv`: + +```bash +git clone https://github.com/DexForce/EmbodiChain.git +cd EmbodiChain +uv pip install -e ".[gensim]" \ + --extra-index-url http://pyp.open3dv.site:2345/simple/ \ + --trusted-host pyp.open3dv.site \ + --extra-index-url https://download.blender.org/pypi/ +``` + +Install from PyPI with `pip`: + +```bash +pip install "embodichain[gensim]" \ + --extra-index-url http://pyp.open3dv.site:2345/simple/ \ + --trusted-host pyp.open3dv.site \ + --extra-index-url https://download.blender.org/pypi/ +``` + +Install from source with `pip`: + +```bash +git clone https://github.com/DexForce/EmbodiChain.git +cd EmbodiChain +pip install -e ".[gensim]" \ + --extra-index-url http://pyp.open3dv.site:2345/simple/ \ + --trusted-host pyp.open3dv.site \ + --extra-index-url https://download.blender.org/pypi/ +``` + +Set the OpenAI-compatible LLM credentials before running the pipeline, or configure them in `embodichain/gen_sim/simready_pipeline/configs/gen_config.json`. Environment variables override the JSON config. + +OpenAI API example: + +```bash +export OPENAI_API_KEY="your-openai-api-key" +export OPENAI_MODEL="gpt-4o" +export OPENAI_BASE_URL="https://api.openai.com/v1" +``` + +Gemini API example: + +```bash +export OPENAI_API_KEY="your-gemini-api-key" +export OPENAI_MODEL="gemini-3.5-flash" +export OPENAI_BASE_URL="https://generativelanguage.googleapis.com/v1beta/openai/" ``` ## Processing Flow @@ -159,16 +210,44 @@ This section controls source file discovery and the canonical output mesh name. ```json "llm": { - "azure_openai": { + "openai_compatible": { "api_key": "", "model": "gpt-4o", - "base_url": "", - "api_version": "2024-02-15-preview" + "base_url": "https://api.openai.com/v1", + "default_query": {} + } +} +``` + +This section configures the multimodal LLM used for object classification, orientation selection, dimension inference, semantic annotation, and physics inference. Any provider that supports the OpenAI-compatible chat completions API can be used by changing `api_key`, `model`, `base_url`, and optional `default_query` parameters. + +For Gemini, use the same config shape: + +```json +"llm": { + "openai_compatible": { + "api_key": "your-gemini-api-key", + "model": "gemini-3.5-flash", + "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/", + "default_query": {} } } ``` -This section configures the multimodal LLM used for object classification, orientation selection, dimension inference, semantic annotation, and physics inference. +For Azure-style OpenAI-compatible endpoints that require an API version query parameter, use `default_query`: + +```json +"llm": { + "openai_compatible": { + "api_key": "your-api-key", + "model": "gpt-4o", + "base_url": "https://dex-gpt4.openai.azure.com/openai/deployments/gpt-4o", + "default_query": { + "api-version": "2025-01-01-preview" + } + } +} +``` ## Default vs Simple Ingest diff --git a/docs/source/quick_start/install.md b/docs/source/quick_start/install.md index 49aed084..ae408f83 100644 --- a/docs/source/quick_start/install.md +++ b/docs/source/quick_start/install.md @@ -77,6 +77,50 @@ cd EmbodiChain pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site ``` +### Generative Simulation Dependencies + +If you want to use the generative simulation features, install EmbodiChain with the `gensim` extra. This installs the additional rendering and asset-processing dependencies, including `pyrender` and `bpy`. The `bpy` wheel is distributed from Blender's package index, so the Blender index must be included in the install command. + +**Install from PyPI with `uv`:** + +```bash +uv pip install "embodichain[gensim]" \ + --extra-index-url http://pyp.open3dv.site:2345/simple/ \ + --trusted-host pyp.open3dv.site \ + --extra-index-url https://download.blender.org/pypi/ +``` + +**Install from source with `uv`:** + +```bash +git clone https://github.com/DexForce/EmbodiChain.git +cd EmbodiChain +uv pip install -e ".[gensim]" \ + --extra-index-url http://pyp.open3dv.site:2345/simple/ \ + --trusted-host pyp.open3dv.site \ + --extra-index-url https://download.blender.org/pypi/ +``` + +**Install from PyPI with `pip`:** + +```bash +pip install "embodichain[gensim]" \ + --extra-index-url http://pyp.open3dv.site:2345/simple/ \ + --trusted-host pyp.open3dv.site \ + --extra-index-url https://download.blender.org/pypi/ +``` + +**Install from source with `pip`:** + +```bash +git clone https://github.com/DexForce/EmbodiChain.git +cd EmbodiChain +pip install -e ".[gensim]" \ + --extra-index-url http://pyp.open3dv.site:2345/simple/ \ + --trusted-host pyp.open3dv.site \ + --extra-index-url https://download.blender.org/pypi/ +``` + ## Verify Installation Run the demo script to confirm everything is set up correctly: diff --git a/embodichain/gen_sim/simready_pipeline/configs/gen_config.json b/embodichain/gen_sim/simready_pipeline/configs/gen_config.json index 3e14a50d..5a2bf634 100644 --- a/embodichain/gen_sim/simready_pipeline/configs/gen_config.json +++ b/embodichain/gen_sim/simready_pipeline/configs/gen_config.json @@ -60,11 +60,11 @@ } }, "llm": { - "azure_openai": { + "openai_compatible": { "api_key": "", "model": "gpt-4o", "base_url": "", - "api_version": "2024-02-15-preview" + "default_query": {} } } } diff --git a/embodichain/gen_sim/simready_pipeline/parser/physics.py b/embodichain/gen_sim/simready_pipeline/parser/physics.py index 13b80264..7118cfbb 100644 --- a/embodichain/gen_sim/simready_pipeline/parser/physics.py +++ b/embodichain/gen_sim/simready_pipeline/parser/physics.py @@ -21,9 +21,6 @@ from copy import deepcopy from pathlib import Path from typing import Dict, Any, List -from urllib.parse import urlsplit, urlunsplit - -from openai import AzureOpenAI from embodichain.gen_sim.simready_pipeline.core.asset import Asset from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser @@ -348,7 +345,6 @@ def _call_LLM(self, description: str) -> Dict[str, Any]: resp = client.chat.completions.create( model=DEPLOYMENT, temperature=0.0, - response_format={"type": "json_object"}, messages=[ {"role": "system", "content": PHYSICS_SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}, diff --git a/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py b/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py index 876e4901..73db0874 100644 --- a/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py +++ b/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py @@ -17,17 +17,17 @@ import argparse import base64 import json +import os import re from pathlib import Path import numpy as np import trimesh import pyrender from PIL import Image -from openai import AzureOpenAI +from openai import OpenAI import itertools from scipy.spatial import ConvexHull from typing import Dict, Any, List -from urllib.parse import urlsplit, urlunsplit def _load_gen_config() -> Dict[str, Any]: @@ -36,12 +36,17 @@ def _load_gen_config() -> Dict[str, Any]: raise FileNotFoundError(f"gen_config.json not found: {config_path}") with config_path.open("r", encoding="utf-8") as f: - cfg = json.load(f) + raw_cfg = json.load(f) - cfg = cfg.get("llm", {}).get("azure_openai", {}) - cfg.setdefault("api_version", "2024-02-15-preview") + cfg = raw_cfg.get("llm", {}).get("openai_compatible", {}) + cfg["api_key"] = os.getenv("OPENAI_API_KEY") or cfg.get("api_key", "") + cfg["model"] = os.getenv("OPENAI_MODEL") or cfg.get("model", "") + cfg["base_url"] = os.getenv("OPENAI_BASE_URL") or cfg.get("base_url", "") + cfg["default_query"] = cfg.get("default_query", {}) + if cfg["base_url"]: + cfg["base_url"] = cfg["base_url"].rstrip("/") - required = ["api_key", "model", "base_url", "api_version"] + required = ["api_key", "model", "base_url"] missing = [k for k in required if k not in cfg or not cfg[k]] if missing: raise ValueError(f"Missing required config keys: {missing}") @@ -49,28 +54,14 @@ def _load_gen_config() -> Dict[str, Any]: return cfg -def _normalize_azure_endpoint(base_url: str) -> str: - parsed = urlsplit(base_url) - path = parsed.path - - if "/openai/deployments/" in path: - path = path.split("/openai/deployments/")[0] - elif path.endswith("/chat/completions"): - path = path[: -len("/chat/completions")] - - return urlunsplit((parsed.scheme, parsed.netloc, path.rstrip("/"), "", "")) - - _GEN_CONFIG = _load_gen_config() DEPLOYMENT = _GEN_CONFIG["model"] -AZURE_ENDPOINT = _normalize_azure_endpoint(_GEN_CONFIG["base_url"]) - -client = AzureOpenAI( +client = OpenAI( api_key=_GEN_CONFIG["api_key"], - api_version=_GEN_CONFIG["api_version"], - azure_endpoint=AZURE_ENDPOINT, + base_url=_GEN_CONFIG["base_url"], + default_query=_GEN_CONFIG.get("default_query") or None, ) STRATEGY = None From 41f0019e2ad84af3e6dd0d37e4832f03c10e332e Mon Sep 17 00:00:00 2001 From: PengXuanchao Date: Thu, 21 May 2026 18:18:05 +0800 Subject: [PATCH 17/17] refine the params in doc and api format --- .../generative_sim/simready_pipeline.md | 63 ++----------------- 1 file changed, 4 insertions(+), 59 deletions(-) diff --git a/docs/source/features/generative_sim/simready_pipeline.md b/docs/source/features/generative_sim/simready_pipeline.md index e88eb6cf..58aa9cf1 100644 --- a/docs/source/features/generative_sim/simready_pipeline.md +++ b/docs/source/features/generative_sim/simready_pipeline.md @@ -65,9 +65,9 @@ pip install -e ".[gensim]" \ --extra-index-url https://download.blender.org/pypi/ ``` -Set the OpenAI-compatible LLM credentials before running the pipeline, or configure them in `embodichain/gen_sim/simready_pipeline/configs/gen_config.json`. Environment variables override the JSON config. +Set the OpenAI-compatible LLM api(OpenAI, Gemini, Doubao, etc.) before running the pipeline, or configure them in `embodichain/gen_sim/simready_pipeline/configs/gen_config.json`. Environment variables override the JSON config. -OpenAI API example: +OpenAI-compatible API example: ```bash export OPENAI_API_KEY="your-openai-api-key" @@ -75,14 +75,6 @@ export OPENAI_MODEL="gpt-4o" export OPENAI_BASE_URL="https://api.openai.com/v1" ``` -Gemini API example: - -```bash -export OPENAI_API_KEY="your-gemini-api-key" -export OPENAI_MODEL="gemini-3.5-flash" -export OPENAI_BASE_URL="https://generativelanguage.googleapis.com/v1beta/openai/" -``` - ## Processing Flow The command above runs the full parser sequence: @@ -127,38 +119,12 @@ Use `asset_simready/asset_simready.obj` or `asset_usd/` for simulation preview a ## Configuration -Pipeline hyperparameters live in `embodichain/gen_sim/simready_pipeline/configs/gen_config.json`. - -### Ingest - -```json -"ingest": { - "canonical_asset_name": "asset.obj", - "unprocessed_formats": [".urdf", ".usd"], - "parseable_mesh_formats": [".glb", ".gltf", ".obj", ".ply", ".stl"] -} -``` - -This section controls source file discovery and the canonical output mesh name. +Pipeline hyperparameters live in `embodichain/gen_sim/simready_pipeline/configs/gen_config.json`. The main hyperparameters are as follow: ### Mesh Processing ```json "mesh_processing": { - "trimesh_ingest": { - "scene_mesh_strategy": "first", - "mtl_name": "asset.mtl", - "visual": { - "default_face_color": [128, 128, 128, 255], - "pbr_base_color_only": true - }, - "export": { - "include_normals": true, - "include_color": true, - "include_texture": true, - "write_texture": false - } - }, "blender_remesh_bake": { "remesh": { "voxel_size": 0.01, @@ -174,8 +140,6 @@ This section controls source file discovery and the canonical output mesh name. }, "bake": { "texture_size": 2048, - "diffuse_texture_name": "diffuse.png", - "normal_texture_name": "normal.png", "cage_extrusion_ratio": 0.05 } }, @@ -192,19 +156,13 @@ This section controls source file discovery and the canonical output mesh name. "collapse_triangulate": true } }, - "simready_finalize": { - "render_resolution": 1024 - } } ``` -`trimesh_ingest` controls the lightweight ingest path. It does not perform mesh decimation; it normalizes visual materials and exports OBJ/MTL files. - `blender_remesh_bake` controls the default ingest path when `--simple` is not provided. It remeshes the raw mesh, decimates it, unwraps UVs, and bakes textures. `blender_cleanup_decimate` controls the later geometry parser stage. It uses Blender mesh operators and the Blender Decimate modifier to clean and simplify the canonical mesh. -`simready_finalize` controls rendering used by the LLM-driven orientation and scale estimation stage. ### LLM @@ -221,19 +179,6 @@ This section controls source file discovery and the canonical output mesh name. This section configures the multimodal LLM used for object classification, orientation selection, dimension inference, semantic annotation, and physics inference. Any provider that supports the OpenAI-compatible chat completions API can be used by changing `api_key`, `model`, `base_url`, and optional `default_query` parameters. -For Gemini, use the same config shape: - -```json -"llm": { - "openai_compatible": { - "api_key": "your-gemini-api-key", - "model": "gemini-3.5-flash", - "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/", - "default_query": {} - } -} -``` - For Azure-style OpenAI-compatible endpoints that require an API version query parameter, use `default_query`: ```json @@ -241,7 +186,7 @@ For Azure-style OpenAI-compatible endpoints that require an API version query pa "openai_compatible": { "api_key": "your-api-key", "model": "gpt-4o", - "base_url": "https://dex-gpt4.openai.azure.com/openai/deployments/gpt-4o", + "base_url": "your_api", "default_query": { "api-version": "2025-01-01-preview" }