Multimodal Integration with OpenAI

Nov 14 2024 · Python 3.12, OpenAI 1.52, JupyterLab, Visual Studio Code

Lesson 05: Building a Multimodal AI App

An Introductory Demo of Gradio

Episode complete

Play next episode

Next

Heads up... You’re accessing parts of this content for free, with some sections shown as obfuscated text.

Heads up... You’re accessing parts of this content for free, with some sections shown as obfuscated text.

Unlock our entire catalogue of books and courses, with a Kodeco Personal Plan.

Unlock now

You’ll start by building several simple Gradio apps, which will prepare you to build a multimodal AI app later. You’ll begin by building a simple Gradio app that takes a name and a time of day as inputs and returns a greeting message.

# Install the required libraries
!pip install openai requests python-dotenv matplotlib librosa
  ipyaudioworklet gradio Pillow

# Load the OpenAI library
from openai import OpenAI

# Set up relevant environment variables
# Make sure OPENAI_API_KEY=... exists in .env
from dotenv import load_dotenv

load_dotenv()

# Create the OpenAI connection object
client = OpenAI()
# Import the Gradio library
import gradio as gr

# Define a simple function that takes a name and a time of day as inputs
def greet(name, greeting_time):
    return "Good " +  greeting_time + ", " + name + "!"

# Create a Gradio interface for the function
demo = gr.Interface(
    fn=greet,  # The function to wrap a UI around
    inputs=[ # Define input components
        gr.Text(), # Input field for name
        # Dropdown for time of day
        gr.Dropdown(["morning", "evening", "night"])
    ],
    outputs=[
        gr.Text() # Define text output
    ], # Define output components
)

# Launch the Gradio app
demo.launch()
# Define a function that returns a greeting message and a
# hard-coded image URL
def greet(name, greeting_time):
    greeting = "Good " +  greeting_time + ", " + name + "!"
    image_url = "https://upload.wikimedia.org/wikipedia/commons/d/d6
      /An_Oberoi_Hotel_employee_doing_Namaste%2C_New_Delhi.jpg"
    return (greeting, image_url)

# Create a Gradio interface for the function
demo = gr.Interface(
    fn=greet,
    inputs=[ # Define input components
        gr.Text(), # Input field for name
        # Dropdown for time of day
        gr.Dropdown(["morning", "evening", "night"])
    ],
    outputs=[
        gr.Text(), # Define text output
        gr.Image() # Define image output
    ],
)

# Launch the Gradio app
demo.launch()
# Define a function that returns a greeting message,
# an image URL, and an audio file path
def greet(name, greeting_time, audio_path):
    greeting = "Good " +  greeting_time + ", " + name + "!"
    image_url = "https://upload.wikimedia.org/wikipedia/commons/d/d6
      /An_Oberoi_Hotel_employee_doing_Namaste%2C_New_Delhi.jpg"
    return (greeting, image_url, audio_path)

# Create a Gradio interface for the function
demo = gr.Interface(
    fn=greet,
    inputs=[
        gr.Text(), # Define input components
        # Input field for name
        gr.Dropdown(["morning", "evening", "night"]),
        # Audio input field
        gr.Audio(sources=["microphone"], type="filepath")
    ],
    outputs=[
        gr.Text(), # Define text output
        gr.Image(), # Define image output
        gr.Audio(type="filepath") # Define audio output
    ],
)

# Launch the Gradio app
demo.launch()
# Define a function that returns a greeting message, an image URL,
# and an audio file path
def greet(name, greeting_time, audio_path):
    greeting = "Good " +  greeting_time + ", " + name + "!"
    image_url = "https://upload.wikimedia.org/wikipedia/commons/d/d6
      /An_Oberoi_Hotel_employee_doing_Namaste%2C_New_Delhi.jpg"
    return (greeting, image_url, audio_path)

# Create a Gradio interface for the function with a title and description
demo = gr.Interface(
    fn=greet,
    inputs=[
        gr.Text(), # Define input components
        # Input field for name
        gr.Dropdown(["morning", "evening", "night"]),
        # Audio input field
        gr.Audio(sources=["microphone"], type="filepath")
    ],
    outputs=[
        gr.Text(), # Define text output
        gr.Image(), # Define image output
        gr.Audio(type="filepath") # Define audio output
    ],
    title="Greeting App",
    description="This is a billion-dollar greeting app."
)

# Launch the Gradio app
demo.launch()
See forum comments
Cinema mode Download course materials from Github
Previous: Introduction to Gradio Next: Generating Situational Prompts & Images