import speech_recognition as sr
import requests
import json
import time

def listen_for_speech():
    """
    Records audio from the microphone and converts it to text.
    
    Returns:
        str: The transcribed text from speech
    """
    # Initialize recognizer
    recognizer = sr.Recognizer()
    
    # Use the microphone as source
    with sr.Microphone() as source:
        print("Listening... Speak now.")
        # Adjust for ambient noise
        recognizer.adjust_for_ambient_noise(source)
        # Listen for audio
        audio = recognizer.listen(source)
        
    try:
        # Use Google's speech recognition
        print("Processing speech...")
        text = recognizer.recognize_google(audio)
        print(f"You said: {text}")
        return text
    except sr.UnknownValueError:
        print("Could not understand audio")
        return None
    except sr.RequestError as e:
        print(f"Error with the speech recognition service; {e}")
        return None

def query_gemini(current_sentence, prefix_string, api_key):
    """
    Takes a sentence, prefixes it with a string, sends to Gemini model,
    and stores the response in a variable.
    
    Args:
        current_sentence (str): The current sentence to process
        prefix_string (str): The prefix to add before the sentence
        api_key (str): Your Gemini API key
        
    Returns:
        str: The response from the Gemini model
    """
    # Combine the prefix and the current sentence
    prompt = f"{prefix_string} {current_sentence}"
    
    # Prepare the request
    url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"
    headers = {
        "Content-Type": "application/json"
    }
    
    # Add API key as query parameter
    url = f"{url}?key={api_key}"
    
    # Prepare the payload
    payload = {
        "contents": [{"parts": [{"text": prompt}]}]
    }
    
    # Send the request
    response = requests.post(url, headers=headers, data=json.dumps(payload))
    
    # Parse the response
    if response.status_code == 200:
        response_json = response.json()
        # Extract text from the response
        if 'candidates' in response_json and len(response_json['candidates']) > 0:
            candidate = response_json['candidates'][0]
            if 'content' in candidate and 'parts' in candidate['content']:
                gemini_response = candidate['content']['parts'][0]['text']
                return gemini_response
    
    # Return error message if something went wrong
    return f"Error: {response.status_code}, {response.text}"

def main():
    # Replace with your API key
    api_key = "AIzaSyCQf_SF4Sbwd-m-D2IYx8XTw21B18gBnIU"
    
    # The prefix to add before the transcribed speech
    prefix_string = "Respond concisely to this statement:"
    
    while True:
        # Get speech from microphone
        current_sentence = listen_for_speech()
        
        # If speech was recognized successfully
        if current_sentence:
            # Send to Gemini API
            print("Sending to Gemini...")
            result = query_gemini(current_sentence, prefix_string, api_key)
            
            # Store and display the response
            gemini_response = result
            print(f"\nGemini response: {gemini_response}\n")
            
            # Optional: Ask to continue
            print("Press Enter to listen again or type 'exit' to quit: ")
            user_input = input()
            if user_input.lower() == 'exit':
                break
        else:
            print("No speech detected. Try again.")
            time.sleep(1)

if __name__ == "__main__":
    main()