How to Build a Contract Clause Extractor API in Python
Build a production-ready contract clause extraction API using LegalGuard AI. Extract key clauses, obligations, dates, and risk terms from any contract in seconds.

Every contract contains dozens of clauses — termination rights, payment terms, liability caps, IP ownership, non-compete provisions — and understanding exactly what each clause commits you to is a time-intensive process even for experienced legal teams.
For developers building legal tech tools, contract management platforms, or compliance systems, the ability to programmatically extract, classify, and analyze contract clauses is a core capability. This guide shows you how to build a production-ready contract clause extractor API using LegalGuard AI.
What We're Building
A FastAPI microservice that accepts contract documents (PDF or plain text) and returns:
- Extracted clauses — organized by category (payment, termination, liability, IP, etc.)
- Key dates and deadlines — commencement dates, termination dates, notice periods, renewal windows
- Obligations per party — what each party must do, by when
- Risk flags — unusual terms, one-sided provisions, missing standard protections
- Structured JSON output — ready for downstream systems, databases, or dashboards
Why Clause Extraction Matters
According to the World Commerce & Contracting Association, companies lose 9.2% of annual revenue due to poor contract management. The bottleneck is almost always in the review phase: lawyers and contract managers spend hours manually reading contracts to find the terms that matter.
Automated clause extraction changes the economics. Instead of spending 4-8 hours reviewing a standard vendor agreement, a contract manager can get a structured extraction in under 30 seconds and focus human attention on the high-risk, non-standard terms.
Step 1: Set Up the LegalGuard AI Client
import httpx
import base64
from pathlib import Path
from dataclasses import dataclass, field
from typing import Optional
LEGALGUARD_API_KEY = "YOUR_API_KEY"
LEGALGUARD_BASE_URL = "https://apivult.com/legalguard/v1"
@dataclass
class ClauseExtractionResult:
contract_id: str
contract_type: str
parties: list[dict] = field(default_factory=list)
clauses: dict[str, list[dict]] = field(default_factory=dict)
key_dates: list[dict] = field(default_factory=list)
obligations: dict[str, list[str]] = field(default_factory=dict)
risk_flags: list[dict] = field(default_factory=list)
summary: str = ""
confidence_score: float = 0.0
def extract_clauses_from_text(
text: str,
contract_id: str,
extraction_config: dict = None
) -> ClauseExtractionResult:
"""
Extract clauses from contract text.
Args:
text: The full contract text
contract_id: Unique identifier for this contract
extraction_config: Optional config specifying which clauses to extract
Returns:
ClauseExtractionResult with all extracted data
"""
payload = {
"text": text,
"document_type": "contract",
"extract": extraction_config or {
"clauses": True,
"dates": True,
"obligations": True,
"risk_flags": True,
"parties": True
}
}
response = httpx.post(
f"{LEGALGUARD_BASE_URL}/extract",
headers={
"X-RapidAPI-Key": LEGALGUARD_API_KEY,
"Content-Type": "application/json"
},
json=payload,
timeout=30
)
response.raise_for_status()
data = response.json()
return ClauseExtractionResult(
contract_id=contract_id,
contract_type=data.get("contract_type", "unknown"),
parties=data.get("parties", []),
clauses=data.get("clauses", {}),
key_dates=data.get("key_dates", []),
obligations=data.get("obligations", {}),
risk_flags=data.get("risk_flags", []),
summary=data.get("summary", ""),
confidence_score=data.get("confidence", 0.0)
)
def extract_clauses_from_pdf(
file_path: str,
contract_id: str,
extraction_config: dict = None
) -> ClauseExtractionResult:
"""Extract clauses from a PDF contract file."""
pdf_bytes = Path(file_path).read_bytes()
pdf_b64 = base64.b64encode(pdf_bytes).decode("utf-8")
payload = {
"document": pdf_b64,
"document_type": "contract_pdf",
"extract": extraction_config or {
"clauses": True,
"dates": True,
"obligations": True,
"risk_flags": True,
"parties": True
}
}
response = httpx.post(
f"{LEGALGUARD_BASE_URL}/extract",
headers={
"X-RapidAPI-Key": LEGALGUARD_API_KEY,
"Content-Type": "application/json"
},
json=payload,
timeout=60
)
response.raise_for_status()
data = response.json()
return ClauseExtractionResult(
contract_id=contract_id,
contract_type=data.get("contract_type", "unknown"),
parties=data.get("parties", []),
clauses=data.get("clauses", {}),
key_dates=data.get("key_dates", []),
obligations=data.get("obligations", {}),
risk_flags=data.get("risk_flags", []),
summary=data.get("summary", ""),
confidence_score=data.get("confidence", 0.0)
)Step 2: Build the FastAPI Extraction Service
from fastapi import FastAPI, UploadFile, File, HTTPException, Form
from fastapi.responses import JSONResponse
import uvicorn
app = FastAPI(
title="Contract Clause Extractor API",
description="Extract and analyze contract clauses using LegalGuard AI",
version="1.0.0"
)
@app.post("/extract/text")
async def extract_from_text(
text: str = Form(...),
contract_id: str = Form(default=""),
include_risk_flags: bool = Form(default=True)
):
"""Extract clauses from raw contract text."""
if not text.strip():
raise HTTPException(status_code=400, detail="Contract text cannot be empty")
if not contract_id:
import uuid
contract_id = str(uuid.uuid4())
config = {
"clauses": True,
"dates": True,
"obligations": True,
"risk_flags": include_risk_flags,
"parties": True
}
result = extract_clauses_from_text(text, contract_id, config)
return format_response(result)
@app.post("/extract/pdf")
async def extract_from_pdf(
file: UploadFile = File(...),
contract_id: str = Form(default=""),
include_risk_flags: bool = Form(default=True)
):
"""Extract clauses from an uploaded PDF contract."""
if not file.filename.lower().endswith(".pdf"):
raise HTTPException(status_code=400, detail="Only PDF files are supported")
if not contract_id:
contract_id = file.filename.rsplit(".", 1)[0]
# Save temp file
import tempfile
import os
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(await file.read())
tmp_path = tmp.name
try:
config = {
"clauses": True,
"dates": True,
"obligations": True,
"risk_flags": include_risk_flags,
"parties": True
}
result = extract_clauses_from_pdf(tmp_path, contract_id, config)
finally:
os.unlink(tmp_path)
return format_response(result)
@app.post("/extract/batch")
async def extract_batch(contracts: list[dict]):
"""
Batch extract clauses from multiple contracts.
Each item in the list should have:
- text: The contract text
- contract_id: Unique identifier
"""
if len(contracts) > 20:
raise HTTPException(status_code=400, detail="Maximum 20 contracts per batch")
results = []
for contract in contracts:
text = contract.get("text", "")
contract_id = contract.get("contract_id", "")
if not text:
continue
result = extract_clauses_from_text(text, contract_id)
results.append(format_response(result))
return {"results": results, "count": len(results)}
def format_response(result: ClauseExtractionResult) -> dict:
"""Format extraction result for API response."""
return {
"contract_id": result.contract_id,
"contract_type": result.contract_type,
"confidence_score": result.confidence_score,
"summary": result.summary,
"parties": result.parties,
"key_dates": result.key_dates,
"clauses": result.clauses,
"obligations": result.obligations,
"risk_flags": result.risk_flags,
"risk_count": len(result.risk_flags),
"clause_categories": list(result.clauses.keys())
}Step 3: Add a Clause Search Endpoint
Once you've extracted clauses, you often need to search across them — for example, finding all contracts that have a specific liability cap or termination clause:
from typing import Optional
# In-memory clause store (replace with a database in production)
clause_store: dict[str, ClauseExtractionResult] = {}
@app.post("/store/{contract_id}")
async def store_extraction(contract_id: str, text: str = Form(...)):
"""Extract and store clauses for later search."""
result = extract_clauses_from_text(text, contract_id)
clause_store[contract_id] = result
return {"stored": True, "contract_id": contract_id}
@app.get("/search")
async def search_clauses(
clause_type: Optional[str] = None,
keyword: Optional[str] = None,
has_risk_flag: Optional[bool] = None
):
"""
Search stored contract clauses.
Args:
clause_type: Filter by clause category (e.g., "termination", "liability")
keyword: Search for a keyword within clause text
has_risk_flag: Filter contracts with/without risk flags
"""
results = []
for contract_id, extraction in clause_store.items():
# Filter by risk flags
if has_risk_flag is not None:
if has_risk_flag and not extraction.risk_flags:
continue
if not has_risk_flag and extraction.risk_flags:
continue
# Filter by clause type
if clause_type:
if clause_type not in extraction.clauses:
continue
matching_clauses = extraction.clauses[clause_type]
else:
matching_clauses = [
clause
for clauses in extraction.clauses.values()
for clause in clauses
]
# Filter by keyword
if keyword:
keyword_lower = keyword.lower()
matching_clauses = [
c for c in matching_clauses
if keyword_lower in c.get("text", "").lower()
]
if matching_clauses:
results.append({
"contract_id": contract_id,
"contract_type": extraction.contract_type,
"matching_clauses": matching_clauses[:3], # Limit to top 3 per contract
"risk_flag_count": len(extraction.risk_flags)
})
return {
"results": results,
"count": len(results)
}Step 4: Common Clause Categories to Extract
When configuring your extraction, these are the clause categories most commonly needed:
# Standard clause categories for commercial contracts
STANDARD_CLAUSE_CONFIG = {
"clauses": {
"categories": [
"payment_terms", # Invoice due dates, late fees, payment methods
"termination", # How and when either party can exit the contract
"limitation_of_liability", # Liability caps and exclusions
"intellectual_property", # IP ownership and licensing terms
"confidentiality", # NDA provisions and exceptions
"warranties", # What each party warrants
"indemnification", # Who indemnifies whom for what
"dispute_resolution", # Arbitration, jurisdiction, governing law
"renewal_and_extension", # Auto-renewal clauses and opt-out windows
"force_majeure", # Events excusing non-performance
"assignment", # Whether the contract can be transferred
"notice", # Required notice periods and methods
]
},
"dates": True,
"obligations": True,
"risk_flags": {
"enabled": True,
"check_for": [
"unlimited_liability", # No liability cap
"one_sided_termination", # Only one party can terminate
"auto_renewal_no_notice", # Auto-renews with no opt-out window
"broad_ip_assignment", # Assigns all IP broadly
"personal_guarantee", # Individual liability for corporate obligations
"unilateral_modification", # One party can change terms unilaterally
]
}
}
# Use in extraction
result = extract_clauses_from_text(
text=contract_text,
contract_id="vendor-agreement-2026-001",
extraction_config=STANDARD_CLAUSE_CONFIG
)Example Output Structure
Here's what a typical extraction response looks like:
{
"contract_id": "vendor-agreement-2026-001",
"contract_type": "software_license_agreement",
"confidence_score": 0.94,
"parties": [
{"name": "Acme Corp", "role": "licensor"},
{"name": "TechStartup Inc", "role": "licensee"}
],
"key_dates": [
{"label": "effective_date", "date": "2026-03-01"},
{"label": "initial_term_end", "date": "2027-02-28"},
{"label": "renewal_notice_deadline", "date": "2026-11-30"},
{"label": "payment_due", "recurring": "net_30"}
],
"clauses": {
"termination": [
{
"text": "Either party may terminate this Agreement for convenience...",
"notice_required_days": 30,
"for_cause": true,
"for_convenience": true
}
],
"limitation_of_liability": [
{
"text": "In no event shall either party's liability exceed...",
"cap_type": "fees_paid",
"cap_multiplier": 12,
"excludes": ["consequential", "indirect", "punitive"]
}
]
},
"risk_flags": [
{
"type": "auto_renewal_no_notice",
"severity": "medium",
"clause": "renewal_and_extension",
"description": "Contract auto-renews without a notice window — review renewal_notice_deadline"
}
]
}Running the Service
# Install dependencies
pip install fastapi uvicorn python-multipart httpx
# Run the extraction service
uvicorn main:app --host 0.0.0.0 --port 8000 --reload
# Test with curl
curl -X POST http://localhost:8000/extract/text \
-F "text=This Software License Agreement ('Agreement') is entered into..." \
-F "contract_id=test-001"What This Enables
With this clause extractor in place, you can build:
- Contract management dashboards — see all your contracts' key dates and obligations in one view
- Renewal reminder systems — automatically alert 90/60/30 days before renewal windows close
- Compliance monitoring — flag any new contracts that lack required provisions
- M&A due diligence tools — quickly extract and compare key terms across hundreds of target company contracts
- Legal review prioritization — automatically surface the contracts with the most risk flags for attorney review
Getting Started
Get access to the LegalGuard AI API at apivult.com. The free tier supports 20 contract extractions per month — enough to validate the extraction quality against your specific contract types before scaling up.
Pro tier adds bulk processing, custom clause category definitions, and cross-contract obligation tracking for enterprise legal teams.