Documentation

Document Types

Types for document upload, processing, and search operations

DocumentUploadResult

Result of a single document upload (returned by upload_one())

@dataclass(frozen=True)
class DocumentUploadResult:
    document_id: str                                          # Unique document identifier
    filename: str                                             # Original filename
    object_key: str                                           # Storage object key (S3 path)
    content_type: str                                         # MIME type
    size_bytes: int                                           # File size in bytes
    text_extraction_status: DocumentProcessingStatus = PENDING  # Processing status
    embedding_status: Optional[str] = None
    page_count: Optional[int] = None                          # Pages (for PDFs)
    chunk_count: Optional[int] = None                         # Text chunks created
    extracted_text_preview: Optional[str] = None
    created_at: Optional[datetime] = None
    processing_error: Optional[str] = None                    # Error message if failed
    processing_error_type: Optional[str] = None               # Error classification
    processing_is_retryable: Optional[bool] = None            # Whether retry may succeed

    # Properties
    is_failed: bool       # text_extraction_status == FAILED
    is_completed: bool    # text_extraction_status == COMPLETED
    is_pending: bool      # text_extraction_status in (PENDING, PROCESSING)

    def to_dict(self, exclude_none: bool = True) -> dict[str, Any]: ...

BatchDocumentUploadResults

List of DocumentUploadResult with batch helpers (returned by upload())

class BatchDocumentUploadResults(list):
    # Properties
    has_failures: bool         # True if any processing failed
    failed_count: int          # Number of failed results
    succeeded_count: int       # Number of successful results
    pending_count: int         # Number still pending

    # Methods
    def failed(self) -> list[DocumentUploadResult]: ...
    def succeeded(self) -> list[DocumentUploadResult]: ...
    def pending(self) -> list[DocumentUploadResult]: ...
    def retryable(self) -> list[DocumentUploadResult]: ...
    def raise_on_failures(self) -> BatchDocumentUploadResults: ...
    def summary(self) -> str: ...  # e.g. "3 succeeded, 1 failed (1 retryable)"

DocumentItem

Document summary in list responses

@dataclass(frozen=True)
class DocumentItem:
    id: str                                    # Unique document identifier
    filename: str                              # Original uploaded filename
    content_type: str                          # MIME type
    size_bytes: int                            # File size
    text_extraction_status: str                # pending | processing | completed | failed
    title: Optional[str] = None
    page_count: Optional[int] = None           # Pages (for PDFs)
    embedding_status: Optional[str] = None
    created_at: Optional[datetime] = None
    updated_at: Optional[datetime] = None
    folder_id: Optional[str] = None
    tags: Optional[list[str]] = None

DocumentDetails

Full document details (from get response)

@dataclass(frozen=True)
class DocumentDetails:
    id: str
    filename: str
    content_type: str
    size_bytes: int
    text_extraction_status: str
    title: Optional[str] = None
    page_count: Optional[int] = None
    embedding_status: Optional[str] = None
    chunk_count: Optional[int] = None           # Number of text chunks
    extracted_text_preview: Optional[str] = None # Preview of extracted text
    created_at: Optional[datetime] = None
    updated_at: Optional[datetime] = None
    folder_id: Optional[str] = None
    tags: Optional[list[str]] = None
    object_key: Optional[str] = None
    download_url: Optional[str] = None          # Presigned download URL

DocumentList

Paginated list of documents

@dataclass(frozen=True)
class DocumentList:
    documents: list[DocumentItem]    # Document summaries
    total_count: int                 # Total matching query
    page: int                        # Current page number (1-based)
    page_size: int                   # Items per page
    has_more: bool                   # More pages available

DocumentChunk

Individual text chunk from a document

@dataclass(frozen=True)
class DocumentChunk:
    id: str                                 # Chunk identifier
    document_id: str                        # Parent document ID
    content: str                            # Text content
    chunk_index: int                        # Position in document (0-based)
    page_numbers: Optional[list[int]] = None  # Pages this chunk spans
    token_count: Optional[int] = None
    embedding: Optional[list[float]] = None   # Vector embedding (if requested)

DocumentChunksResponse

Response containing all chunks for a document

@dataclass(frozen=True)
class DocumentChunksResponse:
    document_id: str                # Parent document ID
    chunks: list[DocumentChunk]     # All chunks
    total_chunks: int               # Total number of chunks

DocumentSearchResult

Single search result (chunk with relevance score)

@dataclass(frozen=True)
class DocumentSearchResult:
    chunk_id: str                             # Chunk identifier
    document_id: str                          # Parent document ID
    document_filename: str                    # Filename of the document
    content: str                              # Matching text content
    score: float                              # Similarity score (0-1)
    page_numbers: Optional[list[int]] = None  # Pages this chunk spans
    chunk_index: Optional[int] = None         # Position in document

DocumentSearchResponse

Complete search response

@dataclass(frozen=True)
class DocumentSearchResponse:
    results: list[DocumentSearchResult]    # Matching chunks
    total_count: int                       # Total matches
    search_time_ms: int                    # Search time in ms
    query: str                             # The search query used

DocumentStatusResult

Document processing status (from get_status() or wait_for_processing())

@dataclass(frozen=True)
class DocumentStatusResult:
    document_id: str
    text_extraction_status: str                   # pending | processing | completed | failed
    embedding_status: Optional[str] = None
    page_count: Optional[int] = None
    chunk_count: Optional[int] = None
    error_message: Optional[str] = None

    # Properties
    is_completed: bool    # text_extraction_status == "completed"
    is_failed: bool       # text_extraction_status == "failed"
    is_processing: bool   # text_extraction_status in ("pending", "processing")

DocumentQuotaCheck

Quota check response for document uploads

@dataclass(frozen=True)
class DocumentQuotaCheck:
    can_proceed: bool              # Whether upload can proceed
    document_count: int            # Current document count
    document_limit: int            # Maximum allowed
    storage_used_bytes: int        # Storage currently used
    storage_limit_bytes: int       # Storage limit
    message: str = ""

    # Properties
    documents_remaining: int       # document_limit - document_count
    storage_remaining_bytes: int   # storage_limit_bytes - storage_used_bytes

DocumentPresignedUploadResult

Presigned URL response for low-level upload workflow

@dataclass(frozen=True)
class DocumentPresignedUploadResult:
    upload_url: str                              # Presigned URL for S3 upload
    object_key: str                              # S3 object key
    expires_at: Optional[datetime] = None        # URL expiration
    upload_headers: Optional[dict[str, str]] = None  # Headers for upload request

DocumentConfirmResult

Upload confirmation response

@dataclass(frozen=True)
class DocumentConfirmResult:
    document_id: str     # Created document ID
    filename: str        # Original filename
    status: str          # Processing status
    message: str = ""    # Confirmation message

DocumentDeleteResult

Single document deletion result

@dataclass(frozen=True)
class DocumentDeleteResult:
    id: str                                # Document identifier
    status: str                            # Deletion status (deleted/failed)
    message: str = ""                      # Additional information
    deleted_at: Optional[datetime] = None  # Deletion timestamp

DocumentBatchDeleteResponse

Response for batch document deletion

@dataclass(frozen=True)
class DocumentBatchDeleteResponse:
    deleted: list[DocumentDeleteResult]     # Successfully deleted
    failed: list[DocumentDeleteResult]      # Failed to delete
    summary: dict[str, int]                 # {total, deleted, failed}

DocumentProcessingStatus

Enum for document processing status

class DocumentProcessingStatus(str, Enum):
    PENDING = "pending"        # Uploaded, not yet started
    PROCESSING = "processing"  # Text extraction in progress
    COMPLETED = "completed"    # Processing completed
    FAILED = "failed"          # Processing failed

DocumentProcessingErrorType

Classification of document processing errors for retry strategies

class DocumentProcessingErrorType(str, Enum):
    TIMEOUT = "timeout"                    # Processing timed out (retryable)
    EXTRACTION_ERROR = "extraction_error"  # Text extraction failed (may be retryable)
    UNSUPPORTED_FORMAT = "unsupported_format"  # Format not supported (permanent)
    CORRUPT_FILE = "corrupt_file"          # File is corrupted (permanent)
    PASSWORD_PROTECTED = "password_protected"  # Requires password (permanent)
    TOO_LARGE = "too_large"                # Exceeds size limits (permanent)
    EMBEDDING_ERROR = "embedding_error"    # Embedding generation failed (retryable)
    RESOURCE_LIMIT = "resource_limit"      # Quota exceeded (permanent)
    UNKNOWN = "unknown"                    # Unclassified (not retryable)

DocumentProcessingFailure

Details about a failed document processing operation with error classification

@dataclass(frozen=True)
class DocumentProcessingFailure:
    document_id: str                              # ID of the document that failed
    error_message: str                            # Human-readable error message
    error_type: DocumentProcessingErrorType       # Classification of the error
    is_retryable: bool                            # Whether the operation can be retried
    filename: Optional[str] = None                # Original filename

    # Class methods
    @staticmethod
    def classify_error(error_msg: str) -> tuple[DocumentProcessingErrorType, bool]: ...
    @classmethod
    def from_error_message(cls, document_id: str, error_msg: str, filename: Optional[str] = None) -> DocumentProcessingFailure: ...

DocumentsResource

All document operation methods

Document Upload Guide

How to upload and process documents