Build a Voice-Controlled GitHub Agent in Python

Turn any GitHub repo into a voice assistant: ask about branches, open issues, create pull requests, list contributors—all via natural conversation.

Powered by OpenAI's Realtime API for low-latency voice, GitHub's Model Context Protocol (MCP) for secure repo actions, and Vision Agents for seamless orchestration.

In the demo, the agent understands spoken repo names (even when spelled out), checks branch counts, and answers follow-up questions about open PRs.

Here's how to build it yourself in under three minutes.

What You'll Build

Voice-Controlled GitHub Agent in Python diagram

A voice-controlled GitHub assistant that can read and act on any public (or private) repository
Supports queries like: "How many branches in getstream/vision-agents?", "List open issues", "Create a PR", "Who contributed most?"
Real-time voice interaction with natural turn-taking
Secure GitHub access via personal access token and MCP

The Stack

LLM & Voice → OpenAI Realtime API (gpt-4o-realtime-preview)
GitHub Actions → Model Context Protocol (MCP) server
Real-Time Transport → Stream WebRTC
Orchestration → Vision Agents (open-source)

Requirements (API Keys & Tokens)

You'll need:

OpenAI API key (for Realtime API voice model)
Stream API key & secret (low-latency WebRTC)
GitHub Personal Access Token (with repo scope for private repos)

Step 1: Set Up the Project

shell

1
2
3
4
uv  init  github-voice-agent
cd  github-voice-agent
# Install Vision Agents and the OpenAI Python plugin
uv  add  "vision-agents[getstream, openai]"

Step 2: Full Working Code (main.py)

python

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import  logging
import  os

from  dotenv  import  load_dotenv

from  vision_agents.core.agents  import  Agent,  AgentLauncher
from  vision_agents.core  import  cli
from  vision_agents.core.mcp  import  MCPServerRemote
from  vision_agents.plugins.openai.openai_realtime  import  Realtime
from  vision_agents.plugins  import  getstream
from  vision_agents.core.events  import  CallSessionParticipantJoinedEvent
from  vision_agents.core.edge.types  import  User

# Load environment variables from .env file
load_dotenv()

# Set up logging
logging.basicConfig(level=logging.INFO)
logger  =  logging.getLogger(__name__)

async  def  create_agent(**kwargs)  ->  Agent:
    """Demonstrate OpenAI Realtime with GitHub MCP server integration."""

    # Get GitHub PAT from environment
    github_pat  =  os.getenv("GITHUB_PAT")
    if  not  github_pat:
        logger.error("GITHUB_PAT environment variable not found!")
        logger.error("Please set GITHUB_PAT in your .env file or environment")
        raise  ValueError("GITHUB_PAT environment variable not found")

    # Check OpenAI API key from environment
    openai_api_key  =  os.getenv("OPENAI_API_KEY")
    if  not  openai_api_key:
        logger.error("OPENAI_API_KEY environment variable not found!")
        logger.error("Please set OPENAI_API_KEY in your .env file or environment")
        raise  ValueError("OPENAI_API_KEY environment variable not found")

    # Create GitHub MCP server
    github_server  =  MCPServerRemote(
        url="https://api.githubcopilot.com/mcp/",
        headers={"Authorization":  f"Bearer {github_pat}"},
        timeout=10.0, # Shorter connection timeout
        session_timeout=300.0,
    )

    # Create OpenAI Realtime LLM (uses OPENAI_API_KEY from environment)
    llm  =  Realtime(model="gpt-4o-realtime-preview-2024-12-17")

    # Create real edge transport and agent user
    edge  =  getstream.Edge()
    agent_user  =  User(name="GitHub AI Assistant",  id="github-agent")

    # Create agent with GitHub MCP server and Gemini Realtime LLM
    agent  =  Agent(
        edge=edge,
        llm=llm,
        agent_user=agent_user,
        instructions="You are a helpful AI assistant with access to GitHub via MCP server. You can help with GitHub operations like creating issues, managing pull requests, searching repositories, and more. Keep responses conversational and helpful. When you need to perform GitHub operations, use the available MCP tools.",
        processors=[],
        mcp_servers=[github_server],
    )
    logger.info("Agent created with OpenAI Realtime and GitHub MCP server")
    logger.info(f"GitHub server: {github_server}")

    return  agent

async  def  join_call(agent:  Agent,  call_type:  str,  call_id:  str,  **kwargs)  ->  None:
    try:
        # Set up event handler for when participants join
        @agent.subscribe
        async  def  on_participant_joined(event:  CallSessionParticipantJoinedEvent):
            # Check MCP tools after connection
            available_functions  =  agent.llm.get_available_functions()
            mcp_functions  =  [
                f  for  f  in  available_functions  if  f["name"].startswith("mcp_")
            ]
            logger.info(
                f"✅ Found {len(mcp_functions)} MCP tools available for function calling"
            )
            await  agent.say(
                f"Hello {event.participant.user.name}! I'm your GitHub AI assistant powered by OpenAI Realtime. I have access to {len(mcp_functions)} GitHub tools and can help you with repositories, issues, pull requests, and more through voice commands!"
            )

        # ensure the agent user is created
        await  agent.create_user()

        # Create a call
        call  =  await  agent.create_call(call_type,  call_id)

        # Have the agent join the call/room
        logger.info("🎤 Agent joining call...")
        with  await  agent.join(call):
            logger.info(
                "✅ Agent is now live with OpenAI Realtime! You can talk to it in the browser."
            )
            logger.info("Try asking:")
            logger.info("  - 'What repositories do I have?'")
            logger.info("  - 'Create a new issue in my repository'")
            logger.info("  - 'Search for issues with the label bug'")
            logger.info("  - 'Show me recent pull requests'")
            logger.info("")
            logger.info(
                "The agent will use OpenAI Realtime's real-time function calling to interact with GitHub!"
            )

            # Run until the call ends
            await  agent.finish()
    except  Exception  as  e:
        logger.error(f"Error with OpenAI Realtime GitHub MCP demo: {e}")
        logger.error("Make sure your GITHUB_PAT and OPENAI_API_KEY are valid")
        import  traceback
        traceback.print_exc()

    # Clean up
    await  agent.close()
    logger.info("Demo completed!")
if  __name__  ==  "__main__":
    cli(AgentLauncher(create_agent=create_agent,  join_call=join_call))

Step 3: Set Credentials & Run

export  OPENAI_API_KEY=sk-...
export  STREAM_API_KEY=...
export  STREAM_API_SECRET=...
export  GITHUB_ACCESS_TOKEN=ghp_...

uv  run  main.py

A browser window opens with a video call UI. Join the call, allow mic access, and start talking:

Agent: "Could you spell out the repository name for me?"
You: "V-I-S-I-O-N-A-G-E-N-T-S"
Agent: "The getstream/vision-agents repo has nine branches."
You: "How many open pull requests?"
Agent: "There are X open pull requests..."

Why This Stack is Powerful

Vision Agents delivers sub-second voice latency with built-in reasoning plus MCP integration in under 100 lines of code. OpenAI's Realtime API handles voice streaming (speech-to-text and text-to-speech) and turn detection. GitHub MCP provides secure, structured repo access without brittle API wrappers.

Links & Resources

"""GitHub MCP Demo - Demonstrate function calling with OpenAI Realtime and GitHub MCP.

This demo shows how OpenAI Realtime can use GitHub MCP tools for real-time function calling
during live conversations. The agent can interact with GitHub repositories, issues, and more
using voice commands through the OpenAI Realtime API.
"""

import asyncio
import logging
import os
from uuid import uuid4
from dotenv import load_dotenv

from vision_agents.core.agents import Agent
from vision_agents.core.mcp import MCPServerRemote
from vision_agents.plugins.openai.openai_realtime import Realtime
from vision_agents.plugins import getstream
from vision_agents.core.events import CallSessionParticipantJoinedEvent
from vision_agents.core.edge.types import User

# Load environment variables from .env file
load_dotenv()

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

logging.getLogger('vision_agents.core.utils.video_forwarder').setLevel(logging.CRITICAL)
logging.getLogger('aiortc.mediastreams').setLevel(logging.CRITICAL)
logging.getLogger('aiortc.codecs.vpx').setLevel(logging.CRITICAL)
logging.getLogger('vision_agents.plugins.openai.rtc_manager').setLevel(logging.CRITICAL)


async def start_agent():
    """Demonstrate GitHub MCP server integration."""

    # Get GitHub PAT from environment
    github_pat = os.getenv("GITHUB_PAT")
    if not github_pat:
        logger.error("GITHUB_PAT environment variable not found!")
        logger.error("Please set GITHUB_PAT in your .env file or environment")
        return

    # Create GitHub MCP server
    github_server = MCPServerRemote(
        url="https://api.githubcopilot.com/mcp/",
        headers={"Authorization": f"Bearer {github_pat}"},
        timeout=10.0,  # Shorter connection timeout
        session_timeout=300.0,
    )

    # Get OpenAI API key from environment
    openai_api_key = os.getenv("OPENAI_API_KEY")
    if not openai_api_key:
        logger.error("OPENAI_API_KEY environment variable not found!")
        logger.error("Please set OPENAI_API_KEY in your .env file or environment")
        return

    # Get Stream API credentials from environment
    stream_api_key = os.getenv("STREAM_API_KEY")
    stream_api_secret = os.getenv("STREAM_API_SECRET")
    if not stream_api_key or not stream_api_secret:
        logger.error("STREAM_API_KEY and STREAM_API_SECRET environment variables not found!")
        logger.error("Please set STREAM_API_KEY and STREAM_API_SECRET in your .env file or environment")
        return

    # Create OpenAI Realtime LLM (supports function calling with MCP)
    llm = Realtime(model="gpt-4o-realtime-preview-2024-12-17")

    # Create real edge transport and agent user
    edge = getstream.Edge()
    agent_user = User(name="GitHub AI Assistant", id="github-agent")

    # Create agent with GitHub MCP server and OpenAI Realtime LLM
    agent = Agent(
        edge=edge,
        llm=llm,
        agent_user=agent_user,
        instructions="""You are a helpful AI assistant with access to GitHub via MCP server. 

IMPORTANT: Before calling any GitHub MCP tools, you MUST gather all required information from the user:
- For repository operations: Ask for the repository owner (username/org) and repository name
- For issue operations: Ask for owner, repo, and issue number
- For PR operations: Ask for owner, repo, and PR number

Never call MCP tools with missing parameters. If you don't have all required information, ask the user for it first.

Examples:
User: "Show me my repositories"
You: "I'd be happy to help! What's your GitHub username or organization name?"

User: "Create an issue"
You: "Sure! I'll need a few details. What's the repository owner and name? Also, what should the issue title and description be?"

Keep responses conversational, friendly, and helpful. Always confirm actions before executing them.""",
        processors=[],
        mcp_servers=[github_server],
    )

    logger.info("Agent created with OpenAI Realtime and GitHub MCP server")
    logger.info(f"GitHub server: {github_server}")

    try:
        # Set up event handler for when participants join
        @agent.subscribe
        async def on_participant_joined(event: CallSessionParticipantJoinedEvent):
            # Check MCP tools after connection
            available_functions = agent.llm.get_available_functions()
            mcp_functions = [
                f for f in available_functions if f["name"].startswith("mcp_")
            ]
            logger.info(
                f"✅ Found {len(mcp_functions)} MCP tools available for function calling"
            )
            await agent.say(
                f"Hello {event.participant.user.name}! I'm your GitHub AI assistant powered by OpenAI Realtime. I have access to {len(mcp_functions)} GitHub tools and can help you with repositories, issues, pull requests, and more through voice commands!"
            )

        # Create a call
        call = agent.edge.client.video.call("default", str(uuid4()))

        # Have the agent join the call/room
        logger.info("🎤 Agent joining call...")
        with await agent.join(call):
            # Open the demo UI
            logger.info("🌐 Opening browser with demo UI...")
            await agent.edge.open_demo(call)
            logger.info(
                "✅ Agent is now live with OpenAI Realtime! You can talk to it in the browser."
            )
            logger.info("Try asking:")
            logger.info("  - 'What repositories do I have?'")
            logger.info("  - 'Create a new issue in my repository'")
            logger.info("  - 'Search for issues with the label bug'")
            logger.info("  - 'Show me recent pull requests'")
            logger.info("")
            logger.info(
                "The agent will use OpenAI Realtime's real-time function calling to interact with GitHub!"
            )

            # Run until the call ends
            await agent.finish()

    except Exception as e:
        logger.error(f"Error with OpenAI Realtime GitHub MCP demo: {e}")
        logger.error("Make sure your GITHUB_PAT and OPENAI_API_KEY are valid")
        import traceback

        traceback.print_exc()

    # Clean up
    await agent.close()
    logger.info("Demo completed!")


if __name__ == "__main__":
    asyncio.run(start_agent())

Vision Agents GitHub
Vision Agents Docs
OpenAI Realtime API
Stream Video

Try it out yourself!

What did you make your GitHub agent do: triage issues, onboard new contributors, or something more creative? 🤖

Build a Voice-Controlled GitHub Agent in Python (MCP + Vision Agents)