Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import time
import os
import httpx
from datetime import datetime, timezone
from typing import Optional
from typing import Optional, Union
from .types import Config, MaxunError


Expand Down Expand Up @@ -161,6 +162,41 @@ async def extract_with_llm(self, options: dict):
self.client.post("/extract/llm", json=options, timeout=300)
)

async def create_document_robot(
self,
file: Union[str, bytes],
prompt: str,
robot_name: Optional[str] = None,
ollama_model: Optional[str] = None,
file_name: Optional[str] = None,
) -> dict:
"""Create a document-extraction robot from a PDF file path or bytes."""
if isinstance(file, str):
file_name = file_name or os.path.basename(file)
with open(file, 'rb') as f:
file_bytes = f.read()
else:
file_bytes = file
file_name = file_name or 'document.pdf'

data = {'prompt': prompt}
if robot_name:
data['robotName'] = robot_name
if ollama_model:
data['ollamaModel'] = ollama_model

response = await self.client.post(
'/robots/document',
files={'file': (file_name, file_bytes, 'application/pdf')},
data=data,
timeout=120,
)
response.raise_for_status()
body = response.json()
if not body.get('data') and not body.get('robot'):
raise MaxunError('Failed to create document robot')
return body

async def create_crawl_robot(self, url: str, options: dict):
return await self._handle(
self.client.post("/crawl", json={"url": url, **options})
Expand Down