Documentation

Managing Files

List, search, and manage your uploaded files

File Access

Access your uploaded images along with their AI-generated descriptions, tags, visible text, and processing history.

List Files

Get a paginated list of your files

python
async with Scopix(api_key="scopix_...") as client:
# List recent files
files = await client.files.list(limit=20)
print(f"Total files: {files.total_count}")
print(f"Has more: {files.has_more}")
for f in files.items:
# Summary items expose lightweight fields like filename, media_type,
# created_at, description_status — not upload_description.
print(f"{f.title or f.filename} ({f.media_type}) — {f.description_status}")

Search Files

Search by description content

python
# Search in descriptions and metadata
files = await client.files.list(
search="damaged electrical equipment",
search_mode="all" # all | metadata | visible_text
)
for f in files.items:
# To read the AI description, fetch details:
details = await client.files.get(f.id)
print(f"{details.title}: {details.upload_description}")

Filter by Tags

Find files with specific tags

python
# Files must have ALL specified tags
files = await client.files.list(
tags=["priority", "reviewed"]
)

Filter by Description Status

Filter files by whether they have descriptions

python
# Only files with completed descriptions
files = await client.files.list(has_description=True)
# Only files without descriptions (pending or failed)
files = await client.files.list(has_description=False)

Filter by Date

Get files from a date range

python
from datetime import datetime, timedelta
# Files from the last week
files = await client.files.list(
date_from=datetime.now() - timedelta(days=7),
date_to=datetime.now()
)

Sorting

Control result ordering

python
files = await client.files.list(
sort_by="content_created_at", # created_at | content_created_at | title | size_bytes
sort_order="desc" # desc | asc
)

Pagination

Paginate through large result sets

python
offset = 0
limit = 50
all_files = []
while True:
files = await client.files.list(
limit=limit,
offset=offset
)
all_files.extend(files.items)
if not files.has_more:
break
offset += limit
print(f"Retrieved {len(all_files)} total files")

Auto-Pagination

Use list_all() to automatically iterate through all pages

python
# Iterate through all files automatically. Each item is a
# UserFileSummary (ImageFileSummary / VideoFileSummary / ...).
async for file in client.files.list_all(
search="damaged",
tags=["priority"]
):
print(f"{file.title or file.filename} ({file.media_type})")

Get File Details

Get complete information for a single file

python
from scopix import ImageFileDetails
details = await client.files.get(file_id="abc123...")
# Shared fields work for any media type.
print(f"Title: {details.title}")
print(f"Size: {details.size_bytes} bytes")
# Image-specific fields require narrowing.
if isinstance(details, ImageFileDetails):
print(f"Format: {details.format}")
print(f"Visible Text: {details.visible_text}")
# OCR text regions with bounding boxes
for region in details.text_regions or []:
bbox = region.get("bounding_box", {})
print(f" '{region['text']}'"
f" at ({bbox.get('x_min', 0):.2f}, {bbox.get('y_min', 0):.2f})"
f" → ({bbox.get('x_max', 0):.2f}, {bbox.get('y_max', 0):.2f})")
# All AI descriptions attached to this image
for desc in details.full_descriptions or []:
print(f"\nDescription:")
print(f" Content: {desc.description}")
print(f" Processing time: {desc.processing_time_ms}ms")
# Image URLs
print(f"\nFull URL: {details.full_url}")
print(f"Thumbnail: {details.thumbnail_url}")
print(f"Medium: {details.medium_url}")

Update File Metadata

Change title, tags, or user description

python
# Update title
result = await client.files.update(
file_id="abc123...",
title="Damaged Pole #42"
)
# Update tags
result = await client.files.update(
file_id="abc123...",
tags=["reviewed", "priority", "damage"]
)
# Set a custom user description (overrides the AI-generated one in display)
result = await client.files.update(
file_id="abc123...",
user_description="Manual inspection photo showing corrosion on north face"
)
# Reset user description back to AI-generated
result = await client.files.update(
file_id="abc123...",
user_description=None
)
print(f"Updated at: {result.updated_at}")

Delete a File

Soft-delete a file (recoverable for 30 days)

python
result = await client.files.delete(file_id="abc123...")
print(f"Deleted: {result.message}")
print(f"Deleted at: {result.deleted_at}")

Batch Delete Files

Delete multiple files in one operation

python
result = await client.files.batch_delete(
file_ids=["id1", "id2", "id3"] # Max 100 files, no duplicates
)
print(f"Deleted: {result.summary['deleted']}")
print(f"Skipped: {result.summary['skipped']}") # Files still processing
print(f"Failed: {result.summary['failed']}")
# Access individual results
for item in result.deleted:
print(f"Deleted {item.id} at {item.deleted_at}")

Get Image Variants

Get URLs for different image sizes

python
# Available variants: tiny_64, small_256, medium_512, medium_750, large_1024, legend_annotated, architectural_design_annotated, original
url = await client.files.get_variant(
file_id="abc123...",
variant_type="medium_750"
)
print(f"Variant URL: {url}")

Download Original File

Download the full original file as bytes

python
content = await client.files.download(file_id="abc123...")
with open("downloaded.jpg", "wb") as f:
f.write(content)

Trigger Variants

Manually (re)queue generation of image variants

python
result = await client.files.trigger_variants(file_id="abc123...")
# Response: success, message, task_id, current_status, image_id, skipped_duplicate
print(f"Task ID: {result['task_id']}")
print(f"Current status: {result['current_status']}")
if result.get("skipped_duplicate"):
print("Variant generation was already in progress — reused existing task")

Filter by IDs

Retrieve specific files by their IDs

python
files = await client.files.list(
ids=["id1", "id2", "id3"], # Max 500 IDs
)

UserFile Type

UserFile is a TypeAlias — a union of four concrete dataclasses discriminated by media_type. It is not itself a class, so it has no fields of its own.

python
# Declared in scopix.types.files
UserFile: TypeAlias = Union[ImageFile, VideoFile, DocumentFile, LinkFile]
# list() returns a FileList whose .items are UserFileSummary — the
# lightweight tier, NOT UserFile. Narrow with isinstance:
from scopix import ImageFileSummary, VideoFileSummary
result = await client.files.list(limit=20)
for f in result.items:
if isinstance(f, ImageFileSummary):
print(f"image {f.id}: {f.dimensions} {f.description_status}")
elif isinstance(f, VideoFileSummary):
print(f"video {f.id}: {f.scene_count} scenes")

Tiers: *Summary (list views — minimal fields), *File (card/preview tier, includes AI output like upload_description), and *FileDetails (full detail). upload_description, user_description, description_error, and domain extraction fields live on the *File and *FileDetails tiers — not on *Summary.

UserFileDetails Type

Like UserFile, UserFileDetails is a TypeAlias — a union of four concrete detail dataclasses returned by client.files.get(). Narrow with isinstance (or match) to access media-specific fields.

python
# Declared in scopix.types.files
UserFileDetails: TypeAlias = Union[
ImageFileDetails, VideoFileDetails, DocumentFileDetails, LinkFileDetails,
]
file = await client.files.get(file_id)
match file:
case ImageFileDetails():
show(file.full_url, file.dimensions)
case VideoFileDetails():
play(file.video_url)
case DocumentFileDetails():
print(file.page_count, file.text_extraction_status)
case LinkFileDetails():
print(file.source_url, file.crawl_status)

ImageFileDetails

python
@dataclass(frozen=True)
class ImageFileDetails:
# Required
id: str
size_bytes: int
content_type: str
hash: str
created_at: datetime
upload_method: str # "STREAMING" | "presigned" | "multipart"
has_full_description: bool
# Discriminator
media_type: Literal["image"] = "image"
# Common metadata
title: Optional[str]
filename: Optional[str]
original_filename: Optional[str]
tags: list[str]
content_created_at: Optional[datetime]
updated_at: Optional[datetime]
content_category: Optional[str]
# Description / OCR
upload_description: Optional[str]
user_description: Optional[str]
description_status: Optional[str]
description_error: Optional[str]
confidence_score: Optional[float]
visible_text: Optional[str]
text_regions: Optional[list[dict[str, Any]]]
description_generated_at: Optional[datetime]
full_descriptions: list[FullDescription]
processing_history: list[ProcessingHistory]
formatted_document: Optional[str]
# Image-specific
dimensions: dict[str, int] # {"width": …, "height": …}
format: str
full_url: Optional[str]
medium_url: Optional[str]
thumbnail_url: Optional[str]
original_url: Optional[str]
blur_hash: Optional[str]
variant_status: Optional[str]
variant_count: Optional[int]
# Schema / parent
custom_schema_id: Optional[str]
parent_document_id: Optional[str]
# Domain-specific extractions (populated by content_category)
# legend_data, architectural_design_data, ce_plan_data,
# layout_region_data, real_estate_data, mining_data,
# construction_data, facility_assessment_data, technical_diagram_data,
# pid_data, pfd_data, mls_compliance_check,
# marketplace_compliance_check, schedule_data, robotics_data,
# custom_schema_data, extraction_corrections

VideoFileDetails

python
@dataclass(frozen=True)
class VideoFileDetails:
# Required
id: str
size_bytes: int
content_type: str
hash: str
created_at: datetime
upload_method: str
has_full_description: bool
media_type: Literal["video"] = "video"
# Common metadata (same as image)
title: Optional[str]
filename: Optional[str]
original_filename: Optional[str]
tags: list[str]
content_created_at: Optional[datetime]
updated_at: Optional[datetime]
content_category: Optional[str]
# Description
upload_description: Optional[str]
user_description: Optional[str]
description_status: Optional[str]
description_error: Optional[str]
confidence_score: Optional[float]
# Video-specific
dimensions: Optional[dict[str, int]]
format: Optional[str]
thumbnail_url: Optional[str]
blur_hash: Optional[str]
video_url: Optional[str]
video_metadata: Optional[dict[str, Any]]
video_analysis_status: Optional[str]
video_analysis_job_id: Optional[str]
scene_count: Optional[int]
has_audio_transcript: Optional[bool]
analysis_skipped: bool
analysis_skip_reason: Optional[str]
credits_required_for_analysis: Optional[int]

DocumentFileDetails

python
@dataclass(frozen=True)
class DocumentFileDetails:
# Required
id: str
size_bytes: int
content_type: str
hash: str
created_at: datetime
upload_method: str
has_full_description: bool
media_type: Literal["document"] = "document"
# Common metadata
title: Optional[str]
filename: Optional[str]
original_filename: Optional[str]
tags: list[str]
content_created_at: Optional[datetime]
updated_at: Optional[datetime]
content_category: Optional[str]
thumbnail_url: Optional[str]
# Description
upload_description: Optional[str]
user_description: Optional[str]
description_status: Optional[str]
description_error: Optional[str]
confidence_score: Optional[float]
# Document-specific
document_type: Optional[str]
page_count: Optional[int]
text_extraction_status: Optional[str]
digitization_status: Optional[str]
chunk_count: Optional[int]
document_url: Optional[str]

LinkFileDetails

python
@dataclass(frozen=True)
class LinkFileDetails:
# Required
id: str
size_bytes: int
content_type: str
hash: str
created_at: datetime
upload_method: str
has_full_description: bool
media_type: Literal["link"] = "link"
# Common metadata
title: Optional[str]
filename: Optional[str]
original_filename: Optional[str]
tags: list[str]
content_created_at: Optional[datetime]
updated_at: Optional[datetime]
content_category: Optional[str]
thumbnail_url: Optional[str]
# Description
upload_description: Optional[str]
user_description: Optional[str]
description_status: Optional[str]
description_error: Optional[str]
confidence_score: Optional[float]
# Link-specific
source_url: Optional[str]
domain: Optional[str]
og_metadata: Optional[dict[str, Any]]
favicon_url: Optional[str]
page_title: Optional[str]
crawl_status: Optional[str]
crawl_error: Optional[str]
crawled_at: Optional[datetime]
extracted_images: Optional[dict[str, Any]]
extracted_images_count: Optional[int]

Domain extractions (legend_data, architectural_design_data, and siblings on ImageFileDetails) are populated only when the file was uploaded with a matching content_category. See the types reference for field-level shapes.