Semantic Search Example

Complete example of using semantic search to find documents by meaning.


Overview

This example demonstrates:

  • Performing semantic search queries
  • Filtering search results
  • Using enhanced search (Pro+)
  • Displaying and ranking results

Python Example

import requests
from typing import Optional, Dict, Any, List

class ArchivusSearch:
    def __init__(self, api_key: str, tenant: str):
        self.api_key = api_key
        self.tenant = tenant
        self.base_url = "https://api.archivus.app/api/v1"
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "X-Tenant-Subdomain": tenant
        }
    
    def search(
        self,
        query: str,
        mode: str = "semantic",
        limit: int = 20,
        folder_id: Optional[str] = None,
        tag_id: Optional[str] = None,
        date_start: Optional[str] = None,
        date_end: Optional[str] = None
    ) -> Dict[str, Any]:
        """Perform semantic or keyword search."""
        url = f"{self.base_url}/search"
        params = {
            "q": query,
            "mode": mode,
            "limit": limit
        }
        
        if folder_id:
            params["folder_id"] = folder_id
        if tag_id:
            params["tag_id"] = tag_id
        if date_start:
            params["date_start"] = date_start
        if date_end:
            params["date_end"] = date_end
        
        response = requests.get(url, headers=self.headers, params=params)
        response.raise_for_status()
        return response.json()
    
    def enhanced_search(
        self,
        query: str,
        filters: Optional[Dict[str, Any]] = None,
        min_score: float = 0.7
    ) -> Dict[str, Any]:
        """Perform enhanced AI search (Pro+)."""
        url = f"{self.base_url}/search/enhanced"
        data = {
            "query": query,
            "min_score": min_score
        }
        
        if filters:
            data["filters"] = filters
        
        response = requests.post(url, headers=self.headers, json=data)
        response.raise_for_status()
        return response.json()
    
    def display_results(self, results: Dict[str, Any]):
        """Display search results in a readable format."""
        search_results = results.get("results", [])
        total = results.get("total", 0)
        mode = results.get("mode", "unknown")
        
        print(f"\nFound {total} documents (mode: {mode})")
        print("=" * 60)
        
        for i, result in enumerate(search_results, 1):
            doc = result["document"]
            score = result["score"]
            relevance = result["relevance"]
            
            print(f"\n{i}. {doc['filename']}")
            print(f"   Score: {score:.2f} ({relevance})")
            if doc.get("ai_summary"):
                summary = doc["ai_summary"][:100] + "..." if len(doc["ai_summary"]) > 100 else doc["ai_summary"]
                print(f"   Summary: {summary}")
            if result.get("highlight"):
                print(f"   Match: {result['highlight']}")


# Usage Example
def main():
    search = ArchivusSearch(
        api_key="YOUR_API_KEY",
        tenant="your-tenant"
    )
    
    # Basic semantic search
    print("=== Basic Semantic Search ===")
    results = search.search(
        query="contracts expiring in Q4 2025",
        mode="semantic",
        limit=10
    )
    search.display_results(results)
    
    # Filtered search
    print("\n=== Filtered Search ===")
    results = search.search(
        query="employee benefits",
        mode="semantic",
        folder_id="folder_hr",
        date_start="2025-01-01",
        limit=10
    )
    search.display_results(results)
    
    # Enhanced search (Pro+)
    print("\n=== Enhanced Search ===")
    results = search.enhanced_search(
        query="Find all contracts with renewal clauses",
        filters={
            "tags": ["contract"],
            "ai_categories": ["legal"],
            "date_range": {
                "start": "2025-01-01",
                "end": "2025-12-31"
            }
        },
        min_score=0.7
    )
    search.display_results(results)


if __name__ == "__main__":
    main()

JavaScript Example

class ArchivusSearch {
  constructor(apiKey, tenant) {
    this.apiKey = apiKey;
    this.tenant = tenant;
    this.baseURL = 'https://api.archivus.app/api/v1';
    this.headers = {
      'Authorization': `Bearer ${apiKey}`,
      'X-Tenant-Subdomain': tenant
    };
  }
  
  async search(query, options = {}) {
    const {
      mode = 'semantic',
      limit = 20,
      folderId = null,
      tagId = null,
      dateStart = null,
      dateEnd = null
    } = options;
    
    const url = new URL(`${this.baseURL}/search`);
    url.searchParams.set('q', query);
    url.searchParams.set('mode', mode);
    url.searchParams.set('limit', limit);
    
    if (folderId) url.searchParams.set('folder_id', folderId);
    if (tagId) url.searchParams.set('tag_id', tagId);
    if (dateStart) url.searchParams.set('date_start', dateStart);
    if (dateEnd) url.searchParams.set('date_end', dateEnd);
    
    const response = await fetch(url, { headers: this.headers });
    if (!response.ok) {
      throw new Error(`Search failed: ${response.statusText}`);
    }
    
    return response.json();
  }
  
  async enhancedSearch(query, filters = {}, minScore = 0.7) {
    const response = await fetch(`${this.baseURL}/search/enhanced`, {
      method: 'POST',
      headers: { ...this.headers, 'Content-Type': 'application/json' },
      body: JSON.stringify({ query, filters, min_score: minScore })
    });
    
    if (!response.ok) {
      throw new Error(`Enhanced search failed: ${response.statusText}`);
    }
    
    return response.json();
  }
  
  displayResults(results) {
    const searchResults = results.results || [];
    const total = results.total || 0;
    const mode = results.mode || 'unknown';
    
    console.log(`\nFound ${total} documents (mode: ${mode})`);
    console.log('='.repeat(60));
    
    searchResults.forEach((result, i) => {
      const doc = result.document;
      const score = result.score;
      const relevance = result.relevance;
      
      console.log(`\n${i + 1}. ${doc.filename}`);
      console.log(`   Score: ${score.toFixed(2)} (${relevance})`);
      if (doc.ai_summary) {
        const summary = doc.ai_summary.length > 100
          ? doc.ai_summary.substring(0, 100) + '...'
          : doc.ai_summary;
        console.log(`   Summary: ${summary}`);
      }
      if (result.highlight) {
        console.log(`   Match: ${result.highlight}`);
      }
    });
  }
}

// Usage Example
async function main() {
  const search = new ArchivusSearch('YOUR_API_KEY', 'your-tenant');
  
  // Basic semantic search
  console.log('=== Basic Semantic Search ===');
  const results = await search.search(
    'contracts expiring in Q4 2025',
    { mode: 'semantic', limit: 10 }
  );
  search.displayResults(results);
  
  // Filtered search
  console.log('\n=== Filtered Search ===');
  const filteredResults = await search.search(
    'employee benefits',
    {
      mode: 'semantic',
      folderId: 'folder_hr',
      dateStart: '2025-01-01',
      limit: 10
    }
  );
  search.displayResults(filteredResults);
  
  // Enhanced search (Pro+)
  console.log('\n=== Enhanced Search ===');
  const enhancedResults = await search.enhancedSearch(
    'Find all contracts with renewal clauses',
    {
      tags: ['contract'],
      ai_categories: ['legal'],
      date_range: {
        start: '2025-01-01',
        end: '2025-12-31'
      }
    },
    0.7
  );
  search.displayResults(enhancedResults);
}

main();

Real-World Use Cases

Find Expiring Contracts

# Find all contracts expiring in the next 90 days
from datetime import datetime, timedelta

end_date = (datetime.now() + timedelta(days=90)).strftime("%Y-%m-%d")

results = search.search(
    query="contracts expiring soon",
    mode="semantic",
    date_end=end_date,
    tag_id="tag_contract"
)

# Process results
for result in results["results"]:
    doc = result["document"]
    print(f"Contract: {doc['filename']} expires soon")
# Find documents similar to a specific document
document_id = "doc_abc123"
document = client.get_document(document_id)

# Use document summary to find similar documents
query = f"documents similar to {document['ai_summary'][:100]}"
results = search.search(query, mode="semantic")

print(f"Found {results['total']} similar documents")

Search by Concept

# Find documents about a concept without exact keywords
results = search.search(
    query="employee health insurance benefits",
    mode="semantic"
)

# This will find documents about:
# - Health insurance
# - Employee benefits
# - Medical coverage
# - Healthcare plans
# etc.

Best Practices

Query Optimization

  1. Use natural language - “Find contracts expiring soon” vs “contract expire”
  2. Be specific - “Q4 2025 service contracts” vs “contracts”
  3. Combine with filters - Use semantic search + date/tag filters

Performance

  1. Limit results - Use limit parameter (default: 20)
  2. Use filters - Narrow search scope for faster results
  3. Cache results - Cache search results when possible

Result Handling

  1. Check scores - Filter by relevance score if needed
  2. Display highlights - Show matching text to users
  3. Handle empty results - Provide helpful messages

Next Steps


Questions? Check the FAQ or contact support@ubiship.com