Video
Analyze video content using multimodal models, enabling video understanding, scene analysis, and temporal reasoning.
Prerequisites
pip install apertis
Get your API Key from Apertis
Basic Video Analysis
from apertis import Apertis
def main():
client = Apertis()
response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe what happens in this video."},
{
"type": "video_url",
"video_url": {
"url": "https://example.com/video.mp4"
}
}
]
}
]
)
print(response.choices[0].message.content)
if __name__ == "__main__":
main()
Local Video (Base64)
import base64
from apertis import Apertis
def encode_video(video_path: str) -> str:
"""Encode video to base64."""
with open(video_path, "rb") as video_file:
return base64.standard_b64encode(video_file.read()).decode("utf-8")
def main():
client = Apertis()
video_path = "path/to/your/video.mp4"
base64_video = encode_video(video_path)
response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What is happening in this video?"},
{
"type": "video_url",
"video_url": {
"url": f"data:video/mp4;base64,{base64_video}"
}
}
]
}
]
)
print(response.choices[0].message.content)
if __name__ == "__main__":
main()
Video Question Answering
from apertis import Apertis
def main():
client = Apertis()
video_url = "https://example.com/cooking-video.mp4"
questions = [
"What dish is being prepared?",
"What ingredients are used?",
"How many steps are in this recipe?",
"What cooking techniques are demonstrated?"
]
for question in questions:
response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": question},
{"type": "video_url", "video_url": {"url": video_url}}
]
}
]
)
print(f"Q: {question}")
print(f"A: {response.choices[0].message.content}\n")
if __name__ == "__main__":
main()
Temporal Analysis
from apertis import Apertis
def main():
client = Apertis()
response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": """Analyze this video and provide:
1. A timeline of key events with approximate timestamps
2. Scene transitions
3. Any significant actions or changes"""
},
{
"type": "video_url",
"video_url": {"url": "https://example.com/event-video.mp4"}
}
]
}
]
)
print(response.choices[0].message.content)
if __name__ == "__main__":
main()
Video with Audio Analysis
from apertis import Apertis
def main():
client = Apertis()
response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": """Analyze both the visual and audio content of this video:
1. What is being shown visually?
2. What is being said or what sounds are present?
3. How do the audio and video relate to each other?"""
},
{
"type": "video_url",
"video_url": {"url": "https://example.com/presentation.mp4"}
}
]
}
]
)
print(response.choices[0].message.content)
if __name__ == "__main__":
main()
Video Comparison
from apertis import Apertis
def main():
client = Apertis()
response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Compare these two videos. What are the similarities and differences?"},
{"type": "video_url", "video_url": {"url": "https://example.com/video1.mp4"}},
{"type": "video_url", "video_url": {"url": "https://example.com/video2.mp4"}}
]
}
]
)
print(response.choices[0].message.content)
if __name__ == "__main__":
main()
Streaming Video Analysis
from apertis import Apertis
def main():
client = Apertis()
stream = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Provide a detailed scene-by-scene breakdown of this video."},
{"type": "video_url", "video_url": {"url": "https://example.com/movie-clip.mp4"}}
]
}
],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
print()
if __name__ == "__main__":
main()
Action Recognition
from apertis import Apertis
def main():
client = Apertis()
response = client.chat.completions.create(
model="gemini-3-pro-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": """Identify all actions performed in this video.
For each action, provide:
- Action name
- Who/what is performing it
- Approximate duration or timestamp"""
},
{"type": "video_url", "video_url": {"url": "https://example.com/sports.mp4"}}
]
}
]
)
print(response.choices[0].message.content)
if __name__ == "__main__":
main()
Supported Models
Video analysis is available on:
| Provider | Models |
|---|---|
gemini-3-pro-preview, gemini-2.5-flash, gemini-2.0-flash | |
| OpenAI | gpt-4.1 (limited video support) |
API Reference
Video URL Object
| Field | Type | Description |
|---|---|---|
url | str | Video URL or base64 data URI |
Supported Formats
- MP4
- MOV
- AVI
- MKV
- WebM
Limitations
- Maximum video length varies by model
- Large videos may be sampled or truncated
- Processing time increases with video length
- Some models may not support audio track analysis