SDK
Python API Reference
OmniScout CLI Python API for programmatic use
Python API Reference
OmniScout CLI engines can be used programmatically from Python code. This guide covers the main APIs.
Installation
pip install omniscout
The Python package namespace is omniscout. Example: from omniscout.client import DaemonClient.
Search API
Basic Search
from omniscout.engines.search import ddg
# Search DuckDuckGo
results = ddg.search("python async patterns", limit=10)
for hit in results:
print(f"{hit.title}: {hit.url}")
print(f" {hit.snippet}\n")
Search with Reranking
from omniscout.engines.search import ddg, embed, rerank
# Search
query = "machine learning"
hits = ddg.search(query, limit=20)
# Embed query and results
query_embedding = embed.embed_texts([query])[0]
hit_embeddings = embed.embed_texts([h.snippet for h in hits])
# Rerank by similarity
ranked = rerank.rerank(query_embedding, hit_embeddings, hits)
for hit in ranked[:5]:
print(f"{hit.title} (score: {hit.score:.2f})")
Hybrid Search (Web + Local Index)
from omniscout.engines.search import pipeline
# Search combining DDG and local vector index
results = pipeline.search_hybrid(
query="robotics",
limit=10,
rerank=True
)
for hit in results:
print(f"{hit.title}: {hit.url}")
Extraction API
Extract URL
from omniscout.engines.extractor import extract_url
# Extract content from URL
result = extract_url("https://example.com")
print(f"Title: {result.title}")
print(f"Author: {result.author}")
print(f"Published: {result.published}")
print(f"Word count: {result.word_count}")
print(f"\n{result.content}") # Markdown content
Extract with Browser Rendering
from omniscout.engines.extractor import extract_url
# Use Chrome for JS-heavy sites
result = extract_url(
"https://example.com",
use_browser=True
)
print(result.content)
Extract HTML
from omniscout.engines.extractor import extract_html
html = """
<html>
<head><title>Example</title></head>
<body>
<article>
<h1>Article Title</h1>
<p>Article content...</p>
</article>
</body>
</html>
"""
result = extract_html(html, format="markdown")
print(result.content)
Crawler API
Crawl Multiple URLs
import asyncio
from omniscout.engines.crawler import crawl_many
async def main():
urls = [
"https://example.com/page1",
"https://example.com/page2",
"https://example.com/page3",
]
results = await crawl_many(
urls=urls,
max_concurrent=5,
throttle_seconds=1.0
)
for url, html in results.items():
print(f"{url}: {len(html)} bytes")
asyncio.run(main())
Crawl with Options
from omniscout.engines.crawler import crawl_many, CrawlOptions
async def main():
options = CrawlOptions(
max_concurrent=10,
throttle_seconds=0.5,
timeout_seconds=30,
use_browser_fallback=True
)
results = await crawl_many(
urls=["https://example.com", ...],
options=options
)
asyncio.run(main())
Research API
Run Research Pipeline
from omniscout.engines.research import run_research
# Run full research pipeline
report = run_research(
topic="state of local AI agents in 2026",
depth=1,
results=8
)
print(f"Topic: {report.topic}")
print(f"Summary: {report.summary}\n")
print("Sources:")
for source in report.sources:
print(f" - {source.title}: {source.url}")
print("\nTop Passages:")
for passage in report.passages[:5]:
print(f" {passage.text[:100]}...")
print(f" (from {passage.source_url}, score: {passage.score:.2f})\n")
Browser API
One-Shot Browser Operations
from omniscout.engines.browser import BrowserEngine
engine = BrowserEngine()
# Take screenshot (one-shot engine defaults to full_page=True)
result = engine.screenshot(
url="https://example.com",
output_path="screenshot.png",
full_page=True,
)
print(f"Screenshot saved: {result.output_path}")
# Generate PDF
result = engine.pdf(
url="https://example.com",
output_path="page.pdf"
)
print(f"PDF saved: {result.output_path}")
Persistent Browser Profiles
from omniscout.engines.browser import BrowserEngine
engine = BrowserEngine()
# Create profile
engine.create_profile("work")
# Use profile (cookies/login state persist)
result = engine.screenshot(
url="https://news.ycombinator.com",
profile="work",
headful=True # Show browser window
)
Long-Lived Sessions
from omniscout.engines.browser import BrowserEngine
from omniscout.store.sessions import SessionStore
engine = BrowserEngine()
store = SessionStore()
# Start session
session_info = engine.session_start(profile="default", headful=False)
print(f"Session ID: {session_info.id}")
print(f"CDP endpoint: {session_info.ws_endpoint}")
# Later, attach to session via daemon client
from omniscout.client import DaemonClient
client = DaemonClient()
result = client.navigate(
url="https://example.com",
session=session_info.id
)
Daemon Client API
Basic Commands
from omniscout.client import DaemonClient
client = DaemonClient()
# Navigate
result = client.navigate(
url="https://example.com",
session="default"
)
# Click element
result = client.click(
selector="button.submit",
session="default"
)
# Fill form
result = client.fill(
selector="input[name=search]",
value="query text",
session="default"
)
# Take screenshot
result = client.screenshot(
session="default",
output_path="screenshot.png"
)
# Get interactive elements
result = client.snapshot(session="default")
for ref in result.refs:
print(f"{ref.ref}: {ref.role} - {ref.name}")
Error Handling
from omniscout.client import DaemonClient, DaemonError
client = DaemonClient()
try:
result = client.navigate(
url="https://example.com",
session="nonexistent"
)
except DaemonError as e:
print(f"Error: {e}")
print(f"Kind: {e.kind}")
print(f"Action: {e.action}")
Custom Timeout
from omniscout.client import DaemonClient
client = DaemonClient(timeout=120.0) # 120 second timeout
result = client.navigate(
url="https://slow-site.com",
session="default"
)
Configuration API
Get Paths
from omniscout.config import get_paths
paths = get_paths()
print(f"Data dir: {paths.data}")
print(f"Config dir: {paths.config}")
print(f"Cache dir: {paths.cache}")
print(f"Profiles dir: {paths.profiles}")
print(f"Qdrant dir: {paths.qdrant}")
Get Settings
from omniscout.config import get_settings
settings = get_settings()
print(f"Default source: {settings.default_source}")
print(f"Search limit: {settings.search_limit}")
print(f"Embedding model: {settings.embedding_model}")
print(f"Browser: {settings.browser}")
print(f"Browser executable: {settings.browser_executable}")
Override Settings
import os
from omniscout.config import get_settings
# Via environment variables
os.environ["OMNISCOUT_DATA_DIR"] = "/custom/data"
os.environ["OMNISCOUT_CONFIG_DIR"] = "/custom/config"
# Settings are cached, so create new instance
from omniscout.config import _settings_cache
_settings_cache.clear()
settings = get_settings()
Models API
Search Models
from omniscout.models import SearchHit, SearchResponse
# Create search hit
hit = SearchHit(
url="https://example.com",
title="Example",
snippet="Example snippet",
source="ddg",
score=0.95,
rank=1
)
# Create search response
response = SearchResponse(
query="example",
source="ddg",
count=1,
hits=[hit]
)
# Serialize to JSON
import json
print(json.dumps(response.model_dump(), indent=2))
Extract Models
from omniscout.models import ExtractResult
from datetime import datetime
result = ExtractResult(
url="https://example.com",
title="Example Article",
author="John Doe",
published="2024-01-15",
site_name="Example",
content="# Example\n\nContent here...",
text="Example\n\nContent here...",
links=["https://example.com/related"],
word_count=150,
format="markdown",
cached=False,
fetched_at=datetime.utcnow()
)
print(result.model_dump_json(indent=2))
Research Models
from omniscout.models import (
ResearchReport,
ResearchSource,
ResearchPassage
)
source = ResearchSource(
url="https://example.com",
title="Example",
snippet="Example snippet"
)
passage = ResearchPassage(
text="Passage text here",
source_url="https://example.com",
score=0.92
)
report = ResearchReport(
topic="example topic",
summary="Summary of research",
sources=[source],
passages=[passage]
)
print(report.model_dump_json(indent=2))
Logging API
Configure Logging
from omniscout.logging import configure_logging, get_logger
# Configure logging (call once at startup)
configure_logging(verbose=True)
# Get logger for your module
log = get_logger("my_module")
log.info("Starting operation")
log.debug("Debug information")
log.warning("Warning message")
log.error("Error message")
Complete Example: Research Workflow
import asyncio
import json
from omniscout.engines.research import run_research
from omniscout.engines.extractor import extract_url
from omniscout.models import ResearchReport
async def research_workflow():
# Run research
print("Running research...")
report = run_research(
topic="local AI agents",
depth=1,
results=5
)
# Save report
with open("research.json", "w") as f:
json.dump(report.model_dump(), f, indent=2, default=str)
# Extract top source
if report.sources:
top_source = report.sources[0]
print(f"\nExtracting top source: {top_source.url}")
extract_result = extract_url(top_source.url)
print(f"Title: {extract_result.title}")
print(f"Word count: {extract_result.word_count}")
# Save extracted content
with open("extracted.md", "w") as f:
f.write(extract_result.content)
# Print summary
print(f"\nResearch Summary:")
print(report.summary)
print(f"\nTop Passages:")
for i, passage in enumerate(report.passages[:3], 1):
print(f"{i}. {passage.text[:100]}...")
asyncio.run(research_workflow())
Complete Example: Browser Automation
from omniscout.client import DaemonClient
import time
def browser_workflow():
client = DaemonClient()
# Navigate to page
print("Navigating to example.com...")
client.navigate(
url="https://example.com",
session="default",
wait_until="networkidle"
)
# Get interactive elements
print("Getting page elements...")
snapshot = client.snapshot(session="default")
print(f"Found {len(snapshot.refs)} interactive elements:")
for ref in snapshot.refs[:5]:
print(f" {ref.ref}: {ref.role} - {ref.name}")
# Take screenshot
print("Taking screenshot...")
client.screenshot(
session="default",
output_path="page.png"
)
# Scroll down
print("Scrolling...")
client.scroll(
direction="down",
amount=3,
session="default"
)
time.sleep(1)
# Take another screenshot
client.screenshot(
session="default",
output_path="page_scrolled.png"
)
print("Done!")
browser_workflow()
Type Hints
All APIs use Python type hints for IDE autocomplete and type checking:
from typing import Optional
from omniscout.models import SearchResponse, ExtractResult
def process_search(response: SearchResponse) -> None:
for hit in response.hits:
print(f"{hit.title}: {hit.url}")
def process_extract(result: ExtractResult) -> Optional[str]:
if result.word_count > 100:
return result.content
return None
Async APIs
Most I/O operations support async:
import asyncio
from omniscout.engines.crawler import crawl_many
async def main():
# Crawl multiple URLs concurrently
results = await crawl_many(
urls=["https://example.com/1", "https://example.com/2"],
max_concurrent=5
)
for url, html in results.items():
print(f"{url}: {len(html)} bytes")
asyncio.run(main())
Testing
Use the provided test fixtures for offline testing:
import pytest
from pathlib import Path
@pytest.fixture
def article_html():
fixture_path = Path(__file__).parent / "fixtures" / "article.html"
return fixture_path.read_text()
def test_extract(article_html):
from omniscout.engines.extractor import extract_html
result = extract_html(article_html)
assert result.title
assert result.word_count > 0
Performance Tips
- Reuse Clients: Create one
DaemonClientand reuse it - Batch Operations: Use
crawl_many()for concurrent fetching - Cache Results: Check
PageCachebefore fetching - Lazy Loading: Models and indices load on-demand
- Async: Use async APIs for I/O-bound operations
- Profiles: Reuse browser profiles to avoid re-login
Troubleshooting
Chrome Not Found
from omniscout.config import get_settings
settings = get_settings()
print(f"Browser: {settings.browser}")
print(f"Browser executable: {settings.browser_executable}")
Or from the CLI:
omniscout settings browsers
omniscout settings set browser edge
Daemon Connection Issues
from omniscout.client import DaemonClient
from omniscout.daemon import lifecycle
# Check daemon status
port = lifecycle.read_port()
print(f"Daemon port: {port}")
# Force restart
lifecycle.stop()
client = DaemonClient() # Auto-starts daemon
Embedding Model Download
The first search/research operation downloads the embedding model (~100MB). This is cached locally.
from omniscout.engines.search import embed
# Pre-download model
embed.embed_texts(["test"])