Documentation
Managing Files
List, search, and manage your uploaded files
File Access
Access your uploaded images along with their AI-generated descriptions, tags, visible text, and processing history.
List Files
Get a paginated list of your files
async with Scopix(api_key="scopix_...") as client: # List recent files files = await client.files.list(limit=20)
print(f"Total files: {files.total_count}") print(f"Has more: {files.has_more}")
for f in files.items: # Summary items expose lightweight fields like filename, media_type, # created_at, description_status — not upload_description. print(f"{f.title or f.filename} ({f.media_type}) — {f.description_status}")Search Files
Search by description content
# Search in descriptions and metadatafiles = await client.files.list( search="damaged electrical equipment", search_mode="all" # all | metadata | visible_text)
for f in files.items: # To read the AI description, fetch details: details = await client.files.get(f.id) print(f"{details.title}: {details.upload_description}")Filter by Tags
Find files with specific tags
# Files must have ALL specified tagsfiles = await client.files.list( tags=["priority", "reviewed"])Filter by Description Status
Filter files by whether they have descriptions
# Only files with completed descriptionsfiles = await client.files.list(has_description=True)
# Only files without descriptions (pending or failed)files = await client.files.list(has_description=False)Filter by Date
Get files from a date range
from datetime import datetime, timedelta
# Files from the last weekfiles = await client.files.list( date_from=datetime.now() - timedelta(days=7), date_to=datetime.now())Sorting
Control result ordering
files = await client.files.list( sort_by="content_created_at", # created_at | content_created_at | title | size_bytes sort_order="desc" # desc | asc)Pagination
Paginate through large result sets
offset = 0limit = 50all_files = []
while True: files = await client.files.list( limit=limit, offset=offset ) all_files.extend(files.items)
if not files.has_more: break
offset += limit
print(f"Retrieved {len(all_files)} total files")Auto-Pagination
Use list_all() to automatically iterate through all pages
# Iterate through all files automatically. Each item is a# UserFileSummary (ImageFileSummary / VideoFileSummary / ...).async for file in client.files.list_all( search="damaged", tags=["priority"]): print(f"{file.title or file.filename} ({file.media_type})")Get File Details
Get complete information for a single file
from scopix import ImageFileDetails
details = await client.files.get(file_id="abc123...")
# Shared fields work for any media type.print(f"Title: {details.title}")print(f"Size: {details.size_bytes} bytes")
# Image-specific fields require narrowing.if isinstance(details, ImageFileDetails): print(f"Format: {details.format}") print(f"Visible Text: {details.visible_text}")
# OCR text regions with bounding boxes for region in details.text_regions or []: bbox = region.get("bounding_box", {}) print(f" '{region['text']}'" f" at ({bbox.get('x_min', 0):.2f}, {bbox.get('y_min', 0):.2f})" f" → ({bbox.get('x_max', 0):.2f}, {bbox.get('y_max', 0):.2f})")
# All AI descriptions attached to this image for desc in details.full_descriptions or []: print(f"\nDescription:") print(f" Content: {desc.description}") print(f" Processing time: {desc.processing_time_ms}ms")
# Image URLs print(f"\nFull URL: {details.full_url}") print(f"Thumbnail: {details.thumbnail_url}") print(f"Medium: {details.medium_url}")Update File Metadata
Change title, tags, or user description
# Update titleresult = await client.files.update( file_id="abc123...", title="Damaged Pole #42")
# Update tagsresult = await client.files.update( file_id="abc123...", tags=["reviewed", "priority", "damage"])
# Set a custom user description (overrides the AI-generated one in display)result = await client.files.update( file_id="abc123...", user_description="Manual inspection photo showing corrosion on north face")
# Reset user description back to AI-generatedresult = await client.files.update( file_id="abc123...", user_description=None)
print(f"Updated at: {result.updated_at}")Delete a File
Soft-delete a file (recoverable for 30 days)
result = await client.files.delete(file_id="abc123...")
print(f"Deleted: {result.message}")print(f"Deleted at: {result.deleted_at}")Batch Delete Files
Delete multiple files in one operation
result = await client.files.batch_delete( file_ids=["id1", "id2", "id3"] # Max 100 files, no duplicates)
print(f"Deleted: {result.summary['deleted']}")print(f"Skipped: {result.summary['skipped']}") # Files still processingprint(f"Failed: {result.summary['failed']}")
# Access individual resultsfor item in result.deleted: print(f"Deleted {item.id} at {item.deleted_at}")Get Image Variants
Get URLs for different image sizes
# Available variants: tiny_64, small_256, medium_512, medium_750, large_1024, legend_annotated, architectural_design_annotated, originalurl = await client.files.get_variant( file_id="abc123...", variant_type="medium_750")
print(f"Variant URL: {url}")Download Original File
Download the full original file as bytes
content = await client.files.download(file_id="abc123...")
with open("downloaded.jpg", "wb") as f: f.write(content)Trigger Variants
Manually (re)queue generation of image variants
result = await client.files.trigger_variants(file_id="abc123...")
# Response: success, message, task_id, current_status, image_id, skipped_duplicateprint(f"Task ID: {result['task_id']}")print(f"Current status: {result['current_status']}")if result.get("skipped_duplicate"): print("Variant generation was already in progress — reused existing task")Filter by IDs
Retrieve specific files by their IDs
files = await client.files.list( ids=["id1", "id2", "id3"], # Max 500 IDs)UserFile Type
UserFile is a TypeAlias — a union of four concrete dataclasses discriminated by media_type. It is not itself a class, so it has no fields of its own.
# Declared in scopix.types.filesUserFile: TypeAlias = Union[ImageFile, VideoFile, DocumentFile, LinkFile]
# list() returns a FileList whose .items are UserFileSummary — the# lightweight tier, NOT UserFile. Narrow with isinstance:from scopix import ImageFileSummary, VideoFileSummary
result = await client.files.list(limit=20)for f in result.items: if isinstance(f, ImageFileSummary): print(f"image {f.id}: {f.dimensions} {f.description_status}") elif isinstance(f, VideoFileSummary): print(f"video {f.id}: {f.scene_count} scenes")Tiers: *Summary (list views — minimal fields), *File (card/preview tier, includes AI output like upload_description), and *FileDetails (full detail). upload_description, user_description, description_error, and domain extraction fields live on the *File and *FileDetails tiers — not on *Summary.
UserFileDetails Type
Like UserFile, UserFileDetails is a TypeAlias — a union of four concrete detail dataclasses returned by client.files.get(). Narrow with isinstance (or match) to access media-specific fields.
# Declared in scopix.types.filesUserFileDetails: TypeAlias = Union[ ImageFileDetails, VideoFileDetails, DocumentFileDetails, LinkFileDetails,]
file = await client.files.get(file_id)match file: case ImageFileDetails(): show(file.full_url, file.dimensions) case VideoFileDetails(): play(file.video_url) case DocumentFileDetails(): print(file.page_count, file.text_extraction_status) case LinkFileDetails(): print(file.source_url, file.crawl_status)ImageFileDetails
@dataclass(frozen=True)class ImageFileDetails: # Required id: str size_bytes: int content_type: str hash: str created_at: datetime upload_method: str # "STREAMING" | "presigned" | "multipart" has_full_description: bool # Discriminator media_type: Literal["image"] = "image" # Common metadata title: Optional[str] filename: Optional[str] original_filename: Optional[str] tags: list[str] content_created_at: Optional[datetime] updated_at: Optional[datetime] content_category: Optional[str] # Description / OCR upload_description: Optional[str] user_description: Optional[str] description_status: Optional[str] description_error: Optional[str] confidence_score: Optional[float] visible_text: Optional[str] text_regions: Optional[list[dict[str, Any]]] description_generated_at: Optional[datetime] full_descriptions: list[FullDescription] processing_history: list[ProcessingHistory] formatted_document: Optional[str] # Image-specific dimensions: dict[str, int] # {"width": …, "height": …} format: str full_url: Optional[str] medium_url: Optional[str] thumbnail_url: Optional[str] original_url: Optional[str] blur_hash: Optional[str] variant_status: Optional[str] variant_count: Optional[int] # Schema / parent custom_schema_id: Optional[str] parent_document_id: Optional[str] # Domain-specific extractions (populated by content_category) # legend_data, architectural_design_data, ce_plan_data, # layout_region_data, real_estate_data, mining_data, # construction_data, facility_assessment_data, technical_diagram_data, # pid_data, pfd_data, mls_compliance_check, # marketplace_compliance_check, schedule_data, robotics_data, # custom_schema_data, extraction_correctionsVideoFileDetails
@dataclass(frozen=True)class VideoFileDetails: # Required id: str size_bytes: int content_type: str hash: str created_at: datetime upload_method: str has_full_description: bool media_type: Literal["video"] = "video" # Common metadata (same as image) title: Optional[str] filename: Optional[str] original_filename: Optional[str] tags: list[str] content_created_at: Optional[datetime] updated_at: Optional[datetime] content_category: Optional[str] # Description upload_description: Optional[str] user_description: Optional[str] description_status: Optional[str] description_error: Optional[str] confidence_score: Optional[float] # Video-specific dimensions: Optional[dict[str, int]] format: Optional[str] thumbnail_url: Optional[str] blur_hash: Optional[str] video_url: Optional[str] video_metadata: Optional[dict[str, Any]] video_analysis_status: Optional[str] video_analysis_job_id: Optional[str] scene_count: Optional[int] has_audio_transcript: Optional[bool] analysis_skipped: bool analysis_skip_reason: Optional[str] credits_required_for_analysis: Optional[int]DocumentFileDetails
@dataclass(frozen=True)class DocumentFileDetails: # Required id: str size_bytes: int content_type: str hash: str created_at: datetime upload_method: str has_full_description: bool media_type: Literal["document"] = "document" # Common metadata title: Optional[str] filename: Optional[str] original_filename: Optional[str] tags: list[str] content_created_at: Optional[datetime] updated_at: Optional[datetime] content_category: Optional[str] thumbnail_url: Optional[str] # Description upload_description: Optional[str] user_description: Optional[str] description_status: Optional[str] description_error: Optional[str] confidence_score: Optional[float] # Document-specific document_type: Optional[str] page_count: Optional[int] text_extraction_status: Optional[str] digitization_status: Optional[str] chunk_count: Optional[int] document_url: Optional[str]LinkFileDetails
@dataclass(frozen=True)class LinkFileDetails: # Required id: str size_bytes: int content_type: str hash: str created_at: datetime upload_method: str has_full_description: bool media_type: Literal["link"] = "link" # Common metadata title: Optional[str] filename: Optional[str] original_filename: Optional[str] tags: list[str] content_created_at: Optional[datetime] updated_at: Optional[datetime] content_category: Optional[str] thumbnail_url: Optional[str] # Description upload_description: Optional[str] user_description: Optional[str] description_status: Optional[str] description_error: Optional[str] confidence_score: Optional[float] # Link-specific source_url: Optional[str] domain: Optional[str] og_metadata: Optional[dict[str, Any]] favicon_url: Optional[str] page_title: Optional[str] crawl_status: Optional[str] crawl_error: Optional[str] crawled_at: Optional[datetime] extracted_images: Optional[dict[str, Any]] extracted_images_count: Optional[int]Domain extractions (legend_data, architectural_design_data, and siblings on ImageFileDetails) are populated only when the file was uploaded with a matching content_category. See the types reference for field-level shapes.

