Skip to main content

Python SDK

The official Scrapebit SDK for Python.

Installation

pip install scrapebit

Quick Start

from scrapebit import Scrapebit

client = Scrapebit(api_key="YOUR_API_KEY")

# Scrape a webpage
result = client.content.scrape(
url="https://example.com",
extract={
"title": "The page title",
"description": "The meta description"
}
)

print(result.data)

Configuration

client = Scrapebit(
api_key="YOUR_API_KEY",
base_url="https://api.scrapebit.com/v1", # Optional
timeout=30, # Request timeout in seconds
max_retries=3 # Number of retries on failure
)

Content API

Scrape URL

result = client.content.scrape(
url="https://example.com/products",
extract={
"products": "List of all product names and prices"
},
pagination={
"next_button_selector": ".load-more",
"max_pages": 5
}
)

Extract Data

result = client.content.extract(
url="https://example.com/team",
prompt="Extract all team members with names and roles"
)

Get Scraped Data

# List all
data_list = client.content.list(page=1, limit=20)

# Get specific
data = client.content.get("scrape_abc123")

# Delete
client.content.delete("scrape_abc123")

PDF API

Generate PDF

pdf = client.pdf.generate(
url="https://example.com/report",
format="a4",
orientation="portrait"
)

print(pdf.data.pdf_url)

List PDFs

pdfs = client.pdf.list()

Screenshot API

Capture Screenshot

screenshot = client.screenshot.capture(
url="https://example.com",
full_page=True,
type="png"
)

print(screenshot.data.image_url)

Mobile Screenshot

screenshot = client.screenshot.capture(
url="https://example.com",
viewport_preset="mobile"
)

Scheduling API

Create Schedule

schedule = client.schedules.create(
name="Daily Price Check",
type="scrape",
url="https://shop.example.com/product",
frequency="daily",
time="09:00",
timezone="America/New_York",
config={
"extract": {
"price": "Current product price"
}
}
)

Manage Schedules

# List all
schedules = client.schedules.list()

# Pause
client.schedules.pause("schedule_abc123")

# Resume
client.schedules.resume("schedule_abc123")

# Delete
client.schedules.delete("schedule_abc123")

Deep Research API

Create Research Session

research = client.research.create(
name="Competitor Analysis",
data_sources=["scrape_abc123", "scrape_def456"]
)

Ask Questions

answer = client.research.ask(
research_id="research_abc123",
question="What are the main differences between competitors?"
)

print(answer.data.answer)

User API

Get Profile

profile = client.user.profile()
print(profile.data.credits.balance)

Get Usage

usage = client.user.usage(period="month")
print(usage.data.summary.total_credits_used)

Error Handling

from scrapebit import Scrapebit, ScrapebitError

try:
result = client.content.scrape(url="https://example.com")
except ScrapebitError as e:
print(f"API Error: {e.code} - {e.message}")

if e.code == "insufficient_credits":
# Handle insufficient credits
pass

Async Support

import asyncio
from scrapebit import AsyncScrapebit

async def main():
client = AsyncScrapebit(api_key="YOUR_API_KEY")

result = await client.content.scrape(
url="https://example.com"
)

print(result.data)

asyncio.run(main())

Examples

Price Monitoring

def monitor_price(product_url: str) -> dict:
result = client.content.scrape(
url=product_url,
extract={
"price": "The current price",
"in_stock": "Whether the item is in stock"
}
)

return {
"price": result.data.extracted["price"],
"in_stock": result.data.extracted["in_stock"],
"scraped_at": datetime.now()
}

Bulk Scraping

from concurrent.futures import ThreadPoolExecutor

def bulk_scrape(urls: list) -> list:
with ThreadPoolExecutor(max_workers=5) as executor:
results = list(executor.map(
lambda url: client.content.scrape(url=url),
urls
))
return results

Pandas Integration

import pandas as pd

# Scrape data
result = client.content.extract(
url="https://example.com/products",
prompt="Extract all products with name, price, and rating",
format="json"
)

# Convert to DataFrame
df = pd.DataFrame(result.data.extracted)
print(df.head())

# Export to CSV
df.to_csv("products.csv", index=False)

Data Pipeline

def scrape_and_store(url: str, output_file: str):
# Scrape
result = client.content.scrape(
url=url,
extract={"data": "All relevant data"}
)

# Process
data = result.data.extracted

# Store
with open(output_file, "w") as f:
json.dump(data, f, indent=2)

return output_file