import speech_recognition as sr import requests import json import time def listen_for_speech(): """ Records audio from the microphone and converts it to text. Returns: str: The transcribed text from speech """ # Initialize recognizer recognizer = sr.Recognizer() # Use the microphone as source with sr.Microphone() as source: print("Listening... Speak now.") # Adjust for ambient noise recognizer.adjust_for_ambient_noise(source) # Listen for audio audio = recognizer.listen(source) try: # Use Google's speech recognition print("Processing speech...") text = recognizer.recognize_google(audio) print(f"You said: {text}") return text except sr.UnknownValueError: print("Could not understand audio") return None except sr.RequestError as e: print(f"Error with the speech recognition service; {e}") return None def query_gemini(current_sentence, prefix_string, api_key): """ Takes a sentence, prefixes it with a string, sends to Gemini model, and stores the response in a variable. Args: current_sentence (str): The current sentence to process prefix_string (str): The prefix to add before the sentence api_key (str): Your Gemini API key Returns: str: The response from the Gemini model """ # Combine the prefix and the current sentence prompt = f"{prefix_string} {current_sentence}" # Prepare the request url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent" headers = { "Content-Type": "application/json" } # Add API key as query parameter url = f"{url}?key={api_key}" # Prepare the payload payload = { "contents": [{"parts": [{"text": prompt}]}] } # Send the request response = requests.post(url, headers=headers, data=json.dumps(payload)) # Parse the response if response.status_code == 200: response_json = response.json() # Extract text from the response if 'candidates' in response_json and len(response_json['candidates']) > 0: candidate = response_json['candidates'][0] if 'content' in candidate and 'parts' in candidate['content']: gemini_response = candidate['content']['parts'][0]['text'] return gemini_response # Return error message if something went wrong return f"Error: {response.status_code}, {response.text}" def main(): # Replace with your API key api_key = "AIzaSyCQf_SF4Sbwd-m-D2IYx8XTw21B18gBnIU" # The prefix to add before the transcribed speech prefix_string = "Respond concisely to this statement:" while True: # Get speech from microphone current_sentence = listen_for_speech() # If speech was recognized successfully if current_sentence: # Send to Gemini API print("Sending to Gemini...") result = query_gemini(current_sentence, prefix_string, api_key) # Store and display the response gemini_response = result print(f"\nGemini response: {gemini_response}\n") # Optional: Ask to continue print("Press Enter to listen again or type 'exit' to quit: ") user_input = input() if user_input.lower() == 'exit': break else: print("No speech detected. Try again.") time.sleep(1) if __name__ == "__main__": main()