Semantic Search Example
Complete example of using semantic search to find documents by meaning.
Overview
This example demonstrates:
- Performing semantic search queries
- Filtering search results
- Using enhanced search (Pro+)
- Displaying and ranking results
Python Example
import requests
from typing import Optional, Dict, Any, List
class ArchivusSearch:
def __init__(self, api_key: str, tenant: str):
self.api_key = api_key
self.tenant = tenant
self.base_url = "https://api.archivus.app/api/v1"
self.headers = {
"Authorization": f"Bearer {api_key}",
"X-Tenant-Subdomain": tenant
}
def search(
self,
query: str,
mode: str = "semantic",
limit: int = 20,
folder_id: Optional[str] = None,
tag_id: Optional[str] = None,
date_start: Optional[str] = None,
date_end: Optional[str] = None
) -> Dict[str, Any]:
"""Perform semantic or keyword search."""
url = f"{self.base_url}/search"
params = {
"q": query,
"mode": mode,
"limit": limit
}
if folder_id:
params["folder_id"] = folder_id
if tag_id:
params["tag_id"] = tag_id
if date_start:
params["date_start"] = date_start
if date_end:
params["date_end"] = date_end
response = requests.get(url, headers=self.headers, params=params)
response.raise_for_status()
return response.json()
def enhanced_search(
self,
query: str,
filters: Optional[Dict[str, Any]] = None,
min_score: float = 0.7
) -> Dict[str, Any]:
"""Perform enhanced AI search (Pro+)."""
url = f"{self.base_url}/search/enhanced"
data = {
"query": query,
"min_score": min_score
}
if filters:
data["filters"] = filters
response = requests.post(url, headers=self.headers, json=data)
response.raise_for_status()
return response.json()
def display_results(self, results: Dict[str, Any]):
"""Display search results in a readable format."""
search_results = results.get("results", [])
total = results.get("total", 0)
mode = results.get("mode", "unknown")
print(f"\nFound {total} documents (mode: {mode})")
print("=" * 60)
for i, result in enumerate(search_results, 1):
doc = result["document"]
score = result["score"]
relevance = result["relevance"]
print(f"\n{i}. {doc['filename']}")
print(f" Score: {score:.2f} ({relevance})")
if doc.get("ai_summary"):
summary = doc["ai_summary"][:100] + "..." if len(doc["ai_summary"]) > 100 else doc["ai_summary"]
print(f" Summary: {summary}")
if result.get("highlight"):
print(f" Match: {result['highlight']}")
# Usage Example
def main():
search = ArchivusSearch(
api_key="YOUR_API_KEY",
tenant="your-tenant"
)
# Basic semantic search
print("=== Basic Semantic Search ===")
results = search.search(
query="contracts expiring in Q4 2025",
mode="semantic",
limit=10
)
search.display_results(results)
# Filtered search
print("\n=== Filtered Search ===")
results = search.search(
query="employee benefits",
mode="semantic",
folder_id="folder_hr",
date_start="2025-01-01",
limit=10
)
search.display_results(results)
# Enhanced search (Pro+)
print("\n=== Enhanced Search ===")
results = search.enhanced_search(
query="Find all contracts with renewal clauses",
filters={
"tags": ["contract"],
"ai_categories": ["legal"],
"date_range": {
"start": "2025-01-01",
"end": "2025-12-31"
}
},
min_score=0.7
)
search.display_results(results)
if __name__ == "__main__":
main()
JavaScript Example
class ArchivusSearch {
constructor(apiKey, tenant) {
this.apiKey = apiKey;
this.tenant = tenant;
this.baseURL = 'https://api.archivus.app/api/v1';
this.headers = {
'Authorization': `Bearer ${apiKey}`,
'X-Tenant-Subdomain': tenant
};
}
async search(query, options = {}) {
const {
mode = 'semantic',
limit = 20,
folderId = null,
tagId = null,
dateStart = null,
dateEnd = null
} = options;
const url = new URL(`${this.baseURL}/search`);
url.searchParams.set('q', query);
url.searchParams.set('mode', mode);
url.searchParams.set('limit', limit);
if (folderId) url.searchParams.set('folder_id', folderId);
if (tagId) url.searchParams.set('tag_id', tagId);
if (dateStart) url.searchParams.set('date_start', dateStart);
if (dateEnd) url.searchParams.set('date_end', dateEnd);
const response = await fetch(url, { headers: this.headers });
if (!response.ok) {
throw new Error(`Search failed: ${response.statusText}`);
}
return response.json();
}
async enhancedSearch(query, filters = {}, minScore = 0.7) {
const response = await fetch(`${this.baseURL}/search/enhanced`, {
method: 'POST',
headers: { ...this.headers, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, filters, min_score: minScore })
});
if (!response.ok) {
throw new Error(`Enhanced search failed: ${response.statusText}`);
}
return response.json();
}
displayResults(results) {
const searchResults = results.results || [];
const total = results.total || 0;
const mode = results.mode || 'unknown';
console.log(`\nFound ${total} documents (mode: ${mode})`);
console.log('='.repeat(60));
searchResults.forEach((result, i) => {
const doc = result.document;
const score = result.score;
const relevance = result.relevance;
console.log(`\n${i + 1}. ${doc.filename}`);
console.log(` Score: ${score.toFixed(2)} (${relevance})`);
if (doc.ai_summary) {
const summary = doc.ai_summary.length > 100
? doc.ai_summary.substring(0, 100) + '...'
: doc.ai_summary;
console.log(` Summary: ${summary}`);
}
if (result.highlight) {
console.log(` Match: ${result.highlight}`);
}
});
}
}
// Usage Example
async function main() {
const search = new ArchivusSearch('YOUR_API_KEY', 'your-tenant');
// Basic semantic search
console.log('=== Basic Semantic Search ===');
const results = await search.search(
'contracts expiring in Q4 2025',
{ mode: 'semantic', limit: 10 }
);
search.displayResults(results);
// Filtered search
console.log('\n=== Filtered Search ===');
const filteredResults = await search.search(
'employee benefits',
{
mode: 'semantic',
folderId: 'folder_hr',
dateStart: '2025-01-01',
limit: 10
}
);
search.displayResults(filteredResults);
// Enhanced search (Pro+)
console.log('\n=== Enhanced Search ===');
const enhancedResults = await search.enhancedSearch(
'Find all contracts with renewal clauses',
{
tags: ['contract'],
ai_categories: ['legal'],
date_range: {
start: '2025-01-01',
end: '2025-12-31'
}
},
0.7
);
search.displayResults(enhancedResults);
}
main();
Real-World Use Cases
Find Expiring Contracts
# Find all contracts expiring in the next 90 days
from datetime import datetime, timedelta
end_date = (datetime.now() + timedelta(days=90)).strftime("%Y-%m-%d")
results = search.search(
query="contracts expiring soon",
mode="semantic",
date_end=end_date,
tag_id="tag_contract"
)
# Process results
for result in results["results"]:
doc = result["document"]
print(f"Contract: {doc['filename']} expires soon")
Find Related Documents
# Find documents similar to a specific document
document_id = "doc_abc123"
document = client.get_document(document_id)
# Use document summary to find similar documents
query = f"documents similar to {document['ai_summary'][:100]}"
results = search.search(query, mode="semantic")
print(f"Found {results['total']} similar documents")
Search by Concept
# Find documents about a concept without exact keywords
results = search.search(
query="employee health insurance benefits",
mode="semantic"
)
# This will find documents about:
# - Health insurance
# - Employee benefits
# - Medical coverage
# - Healthcare plans
# etc.
Best Practices
Query Optimization
- Use natural language - “Find contracts expiring soon” vs “contract expire”
- Be specific - “Q4 2025 service contracts” vs “contracts”
- Combine with filters - Use semantic search + date/tag filters
Performance
- Limit results - Use
limitparameter (default: 20) - Use filters - Narrow search scope for faster results
- Cache results - Cache search results when possible
Result Handling
- Check scores - Filter by relevance score if needed
- Display highlights - Show matching text to users
- Handle empty results - Provide helpful messages
Next Steps
- Chat Integration - Ask questions about search results
- Search API - Complete search API reference
- Semantic Search Guide - Deep dive into semantic search
Questions? Check the FAQ or contact support@ubiship.com