# File: app.py
import streamlit as st
import speech_recognition as sr
import openai
import pandas as pd
import docx
import PyPDF2
from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, AudioDataStream, ResultReason
import tempfile
import os
import base64
# Set your OpenAI API key
openai.api_key = "
"
azure_speech_key = ""
azure_service_region = ""
# Function to capture voice input
def capture_voice():
recognizer = sr.Recognizer()
with sr.Microphone() as source:
st.write("Listening...")
audio = recognizer.listen(source)
try:
st.write("Recognizing...")
text = recognizer.recognize_google(audio)
st.write(f"You said: {text}")
return text.lower() # Convert to lower case for easier comparison
except sr.UnknownValueError:
st.error("Sorry, I could not understand the audio.")
return None
except sr.RequestError:
st.error("Could not request results; check your network connection.")
return None
# Function to query OpenAI's GPT-3
def query_chat_model(prompt):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
],
max_tokens=150
)
return response.choices[0].message['content'].strip()
except openai.error.RateLimitError:
st.error("Rate limit exceeded. Please wait and try again later.")
return "Rate limit exceeded. Please try again later."
except Exception as e:
st.error(f"Error querying the OpenAI API: {e}")
return "An error occurred while querying the OpenAI API."
# Function to read content from an Excel file
def read_excel_file(file):
try:
df = pd.read_excel(file, engine='openpyxl')
return df
except Exception as e:
st.error(f"Error reading the Excel file: {e}")
return None
# Function to read content from a PDF file
def read_pdf_file(file):
try:
reader = PyPDF2.PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
except Exception as e:
st.error(f"Error reading the PDF file: {e}")
return None
# Function to read content from a Word file
def read_word_file(file):
try:
doc = docx.Document(file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
except Exception as e:
st.error(f"Error reading the Word file: {e}")
return None
# Function to read content from a TXT file
def read_txt_file(file):
try:
text = file.read().decode("utf-8")
return text
except Exception as e:
st.error(f"Error reading the TXT file: {e}")
return None
# Function to generate a response using GPT-3 and the provided content
def generate_response(query, content):
prompt = f"Using the following data: {content}, answer the question: {query}"
response = query_chat_model(prompt)
return response
# Function to convert text to speech and get audio file path
def text_to_speech(text):
if not text:
st.error("No text to speak.")
return None
try:
speech_config = SpeechConfig(subscription=azure_speech_key, region=azure_service_region)
speech_config.speech_synthesis_voice_name = "en-US-JennyNeural"
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
result = synthesizer.speak_text_async(text).get()
if result.reason == ResultReason.SynthesizingAudioCompleted:
audio_stream = AudioDataStream(result)
temp_audio_path = os.path.join(tempfile.gettempdir(), "output.wav")
audio_stream.save_to_wav_file(temp_audio_path)
return temp_audio_path
else:
st.error("Error during text-to-speech conversion")
return None
except Exception as e:
st.error(f"Error in text-to-speech conversion: {e}")
return None
# Function to play audio automatically
def play_audio(file_path):
try:
audio_file = open(file_path, 'rb').read()
audio_base64 = base64.b64encode(audio_file).decode('utf-8')
audio_html = f'<audio autoplay="true" controls><source src="data:audio/wav;base64,{audio_base64}" type="audio/wav"></audio>'
st.markdown(audio_html, unsafe_allow_html=True)
except Exception as e:
st.error(f"Error playing audio: {e}")
# Main function to integrate all functionalities
def main():
st.title("Voice Controlled GPT-3 with File Data")
response_container = st.container()
upload_container = st.container()
with response_container:
if st.button("Speak"):
voice_input = capture_voice()
st.session_state.voice_input = voice_input
if "response_text" in st.session_state:
st.write(f"Response: {st.session_state.response_text}")
audio_file_path = text_to_speech(st.session_state.response_text)
if audio_file_path:
play_audio(audio_file_path)
with upload_container:
uploaded_file = st.file_uploader("Upload a file", type=["xlsx", "pdf", "docx", "txt"])
file_content = None
if uploaded_file is not None:
file_type = uploaded_file.name.split('.')[-1]
if file_type == "xlsx":
df = read_excel_file(uploaded_file)
if df is not None:
file_content = df.to_string(index=False)
st.session_state.file_content = file_content
st.write("Excel Data:")
st.dataframe(df)
elif file_type == "pdf":
file_content = read_pdf_file(uploaded_file)
if file_content:
st.session_state.file_content = file_content
st.write("PDF Content:")
st.write(file_content)
elif file_type == "docx":
file_content = read_word_file(uploaded_file)
if file_content:
st.session_state.file_content = file_content
st.write("Word Document Content:")
st.write(file_content)
elif file_type == "txt":
file_content = read_txt_file(uploaded_file)
if file_content:
st.session_state.file_content = file_content
st.write("Text File Content:")
st.write(file_content)
if "voice_input" in st.session_state and "file_content" in st.session_state:
response_text = generate_response(st.session_state.voice_input, st.session_state.file_content)
st.session_state.response_text = response_text
with response_container:
st.write(f"Response: {response_text}")
audio_file_path = text_to_speech(response_text)
if audio_file_path:
play_audio(audio_file_path)
if __name__ == "__main__":
main()
This is working now , voice is not getting repeated.