OpenAI Realtime

With the OpenAI Realtime integration, you can have AI agents join calls running on Stream edge infrastructure. The integration uses WebRTC for both agent and users and guarantees best quality in poor conditions.

Make sure to check our OpenAI Realtime integration tutorials to get an initial working setup with AI.

Quickstart

This is how an integration with OpenAI looks like: on your backend you use one of Stream SDKs and have OpenAI join a call hosted on Stream. After that you can directly use the OpenAI Realtime API to handle events and to pass instructions to the AI agent.

Connecting an AI Agent

The following example shows how to connect an AI agent to your Stream application:

// Initialize the SDK with API credentials
const { StreamClient } = require("@stream-io/node-sdk");

const streamClient = new StreamClient("your-api-key", "your-api-secret");

// Create a call object
const call = streamClient.video.call("default", "call-id");

// Connect the OpenAI agent to the call
const openAiApiKey = "your-openai-api-key";

const realtimeClient = await streamClient.video.connectOpenAi({
  call,
  openAiApiKey,
  agentUserId: "lucy",
  model: "gpt-4o-realtime-preview",
});

// Update the session with instructions for the AI agent
realtimeClient.updateSession({
  instructions:
    "You are a helpful assistant that can answer questions and help with tasks.",
});

# Make sure you have the Stream package with OpenAI integration installed:
# pip install getstream[openai-realtime]

import asyncio
from getstream import Stream

async def main():
    # Initialize the SDK with API credentials
    client = Stream(
        api_key="your-api-key", 
        api_secret="your-api-secret"
    )

    # Create a call object
    call = client.video.call("default", "call-id")

    # Connect the OpenAI agent to the call
    openai_api_key = "your-openai-api-key"

    # Use the context manager to handle the connection, the agent will 
    # disconnect when leaving this context block
    async with call.connect_openai(
        openai_api_key, 
        "assistant", 
        model="gpt-4o-realtime-preview"
    ) as connection:
        # Send an event to the agent
        await connection.session.update(
            session={
                "instructions": "You are a helpful assistant that can answer questions and help with tasks."
            }
        )

if __name__ == "__main__":
    asyncio.run(main())

// OpenAI agent connection for Golang SDK coming soon, reach out to support if you want to know more

// OpenAI agent connection for Java SDK coming soon, reach out to support if you want to know more

A full sample Node.js integration project is available on Github here.

AI Agent Configuration

The connectOpenAI method returns an instance of RealtimeClient from the OpenAI Realtime API, the following example demonstrates how to:

Change the AI agent’s voice
Pass custom instructions
Add a function tool for submitting support tickets
Send an item and trigger a generation

// After connecting the OpenAI agent as shown above
const realtimeClient = await streamClient.video.connectOpenAi({
  call,
  openAiApiKey,
  agentUserId: "support-agent",
});

// Change the voice to 'alloy'
realtimeClient.updateSession({ voice: "alloy" });

// Set detailed instructions for the AI agent
realtimeClient.updateSession({
  instructions: `You are a friendly customer support agent named Nova. 
  Help customers with questions and create support tickets when needed.`,
});

// Add a function tool for submitting support tickets
realtimeClient.addTool(
  {
    name: "submit_support_ticket",
    description: "Creates a support ticket in the customer service system",
    parameters: {
      type: "object",
      properties: {
        customer_name: { 
          type: "string", 
          description: "Customer name" 
        },
        email: { 
          type: "string", 
          description: "Customer email" 
        },
        issue: { 
          type: "string", 
          description: "Issue description" 
        },
      },
      required: ["customer_name", "email", "issue"],
    },
  },
  async ({ customer_name, email, issue }) => {
    // Call your ticketing system API
    const result = await createTicketInSystem(customer_name, email, issue);
    return { success: true, ticket_id: result.id };
  },
);

// Send a message to trigger a generation
realtimeClient.sendUserMessageContent([
  { type: "input_text", text: "How are you?" },
]);

# Make sure you have the Stream package with OpenAI integration installed:
# pip install getstream[openai-realtime]

import asyncio
import aiohttp
from getstream import Stream

async def main():
    # Initialize the SDK with API credentials
    client = Stream(
        api_key="your-api-key", 
        api_secret="your-api-secret"
    )

    # Create a call object
    call = client.video.call("default", "call-id")

    # Connect the OpenAI agent to the call
    openai_api_key = "your-openai-api-key"

    # Use the context manager to handle the connection
    async with call.connect_openai(openai_api_key, "support-agent") as connection:
        # Change the voice to 'alloy'
        await connection.session.update(session={"voice": "alloy"})

        # Set detailed instructions for the AI agent
        await connection.session.update(
            session={
                "instructions": """You are a friendly customer support agent named Nova.
                Help customers with questions and create tickets when needed."""
            }
        )

        # define a tool and call the python function when invoked
        await connection.session.update(session={"tools": [
            {
                "type": "function",
                "name": "submit_support_ticket",
                "description": "Creates a support ticket",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "customer_name": {
                            "type": "string",
                            "description": "Customer name"
                        },
                        "email": {
                            "type": "string",
                            "description": "Customer email"
                        },
                        "issue": {
                            "type": "string",
                            "description": "Issue description"
                        },
                    },
                    "required": ["customer_name", "email", "issue"],
                },
            },
        ]})

if __name__ == "__main__":
    asyncio.run(main())

OpenAI events

The OpenAI Realtime API provides an event system that allows you to handle various events during conversations. When using the Stream SDK with OpenAI integration, you have access to the events from the OpenAI Realtime API.

Handling Events

Here are some basic examples of handling events:

realtimeClient.on("conversation.updated", (event) => {
  console.log(`received conversation.updated`, event);
});

realtimeClient.on("conversation.item.completed", ({ item }) => {
  console.log(`received conversation.item.completed`, event);
});

realtimeClient.on("error", (error) => {
  console.log(`received error`, event);
});

async with call.connect_openai(openai_api_key, "lucy") as connection:
    # Process events using an async iterator
    async for event in connection:
        # Handle different event types
        if event.type == "conversation.updated":
            print(f"conversation.updated: {event}")

        elif event.type == "conversation.item.completed":
            print(f"conversation.item.completed: {event}")

        elif event.type == "error":
            print(f"error: {event}")

Sending Events

You can also send events, the full list of events is available on OpenAI Realtime API documentation.

// Send a text message from the user
realtimeClient.sendUserMessageContent([
  { type: "input_text", text: "What can you help me with today?" },
]);

# Inside an async function with an active connection:
# Send a text message from the user
await connection.conversation.item.create(
    item={
        "type": "message",
        "role": "user",
        "content": [
            {
                "type": "input_text",
                "text": "What can you help me with today?"
            }
        ]
    }
)


# Generate a response from the AI
await connection.response.create()

Stream Video Events

In addition to OpenAI events, the realtime client also receives events from Stream Video. This allows you to handle both AI-related events and video call events in the same event loop.

The following Stream Video events are available in the realtime client:

call.session_participant_joined - Triggered when a participant joins the call
call.session_participant_left - Triggered when a participant leaves the call
custom - Custom events that can be sent by participants
call.reaction_new - Triggered when a participant sends a reaction
call.session_ended - Triggered when the call session ends
call.ended - Triggered when the call ends

You can handle these events in the same event loop as OpenAI events:

// Listen for Stream Video events
realtimeClient.on("call.session_participant_joined", (event) => {
  console.log(`Participant joined: ${event.participant.user_id}`);
});

realtimeClient.on("call.session_participant_left", (event) => {
  console.log(`Participant left: ${event.participant.user_id}`);
});

realtimeClient.on("call.ended", (event) => {
  console.log("Call ended, disconnecting...");
  // Clean up resources
});

# Make sure you have the Stream package with OpenAI integration installed:
# pip install getstream[openai-realtime]

import asyncio
from getstream import Stream

async def main():
    # Initialize the SDK with API credentials
    client = Stream(api_key="your-api-key", api_secret="your-api-secret")

    # Create a call object
    call = client.video.call("default", "call-id")

    # Connect the OpenAI agent to the call
    openai_api_key = "your-openai-api-key"

    # Use the context manager to handle the connection
    async with call.connect_openai(openai_api_key, "assistant") as connection:
        # Process both OpenAI and Stream Video events
        async for event in connection:
            # Handle OpenAI events
            if event.type == "conversation.updated":
                print(f"OpenAI update: {event}")

            # Handle Stream Video events
            elif event.type == "call.session_participant_joined":
                participant = getattr(event, "participant", {})
                user_id = getattr(participant, "user_id", "unknown")
                print(f"Participant joined: {user_id}")

            elif event.type == "call.session_participant_left":
                participant = getattr(event, "participant", {})
                user_id = getattr(participant, "user_id", "unknown")
                print(f"Participant left: {user_id}")
                break

            elif event.type == "call.ended":
                print("Call ended, exiting event loop")
                break

if __name__ == "__main__":
    asyncio.run(main())

Stats

Overview