Pastebin
Paste #28300: No description
< previous paste - next paste>
Pasted by Anonymous Coward
import speech_recognition as sr
import requests
import json
import time
def listen_for_speech():
"""
Records audio from the microphone and converts it to text.
Returns:
str: The transcribed text from speech
"""
# Initialize recognizer
recognizer = sr.Recognizer()
# Use the microphone as source
with sr.Microphone() as source:
print("Listening... Speak now.")
# Adjust for ambient noise
recognizer.adjust_for_ambient_noise(source)
# Listen for audio
audio = recognizer.listen(source)
try:
# Use Google's speech recognition
print("Processing speech...")
text = recognizer.recognize_google(audio)
print(f"You said: {text}")
return text
except sr.UnknownValueError:
print("Could not understand audio")
return None
except sr.RequestError as e:
print(f"Error with the speech recognition service; {e}")
return None
def query_gemini(current_sentence, prefix_string, api_key):
"""
Takes a sentence, prefixes it with a string, sends to Gemini model,
and stores the response in a variable.
Args:
current_sentence (str): The current sentence to process
prefix_string (str): The prefix to add before the sentence
api_key (str): Your Gemini API key
Returns:
str: The response from the Gemini model
"""
# Combine the prefix and the current sentence
prompt = f"{prefix_string} {current_sentence}"
# Prepare the request
url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"
headers = {
"Content-Type": "application/json"
}
# Add API key as query parameter
url = f"{url}?key={api_key}"
# Prepare the payload
payload = {
"contents": [{"parts": [{"text": prompt}]}]
}
# Send the request
response = requests.post(url, headers=headers, data=json.dumps(payload))
# Parse the response
if response.status_code == 200:
response_json = response.json()
# Extract text from the response
if 'candidates' in response_json and len(response_json['candidates']) > 0:
candidate = response_json['candidates'][0]
if 'content' in candidate and 'parts' in candidate['content']:
gemini_response = candidate['content']['parts'][0]['text']
return gemini_response
# Return error message if something went wrong
return f"Error: {response.status_code}, {response.text}"
def main():
# Replace with your API key
api_key = "AIzaSyCQf_SF4Sbwd-m-D2IYx8XTw21B18gBnIU"
# The prefix to add before the transcribed speech
prefix_string = "Respond concisely to this statement:"
while True:
# Get speech from microphone
current_sentence = listen_for_speech()
# If speech was recognized successfully
if current_sentence:
# Send to Gemini API
print("Sending to Gemini...")
result = query_gemini(current_sentence, prefix_string, api_key)
# Store and display the response
gemini_response = result
print(f"\nGemini response: {gemini_response}\n")
# Optional: Ask to continue
print("Press Enter to listen again or type 'exit' to quit: ")
user_input = input()
if user_input.lower() == 'exit':
break
else:
print("No speech detected. Try again.")
time.sleep(1)
if __name__ == "__main__":
main()
New Paste
Go to most recent paste.