Spaces:

yjernite
/

space-privacy

Running

File size: 6,858 Bytes

import logging
import os

from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from huggingface_hub.inference._generated.types import ChatCompletionOutput
from huggingface_hub.utils import HfHubHTTPError

# Configure logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

# Load environment variables from .env file
# load_dotenv() # Removed: This should be loaded only at the main entry point (app.py)
load_dotenv()  # Restored: Ensure env vars are loaded when this module is imported/used

HF_TOKEN = os.getenv("HF_TOKEN")
HF_INFERENCE_ENDPOINT_URL = os.getenv("HF_INFERENCE_ENDPOINT_URL")

# Default parameters for the LLM call
DEFAULT_MAX_TOKENS = 2048
DEFAULT_TEMPERATURE = 0.1  # Lower temperature for more deterministic analysis

# Special dictionary to indicate a 503 error
ERROR_503_DICT = {"error_type": "503", "message": "Service Unavailable"}


def query_qwen_endpoint(
    formatted_prompt: list[dict[str, str]], max_tokens: int = DEFAULT_MAX_TOKENS
) -> ChatCompletionOutput | dict | None:
    """
    Queries the specified Qwen Inference Endpoint with the formatted prompt.

    Args:
        formatted_prompt: A list of message dictionaries for the chat completion API.
        max_tokens: The maximum number of tokens to generate.

    Returns:
        The ChatCompletionOutput object from the inference client,
        a specific dictionary (ERROR_503_DICT) if a 503 error occurs,
        or None if another error occurs.
    """
    if not HF_INFERENCE_ENDPOINT_URL:
        logging.error("HF_INFERENCE_ENDPOINT_URL environment variable not set.")
        return None
    if not HF_TOKEN:
        logging.warning(
            "HF_TOKEN environment variable not set. Requests might fail if the endpoint requires authentication."
        )
        # Depending on endpoint config, it might still work without token

    logging.info(f"Querying Inference Endpoint: {HF_INFERENCE_ENDPOINT_URL}")
    client = InferenceClient(model=HF_INFERENCE_ENDPOINT_URL, token=HF_TOKEN)

    try:
        response = client.chat_completion(
            messages=formatted_prompt,
            max_tokens=max_tokens,
            temperature=DEFAULT_TEMPERATURE,
            # Qwen models often benefit from setting stop sequences if known,
            # but we'll rely on max_tokens and model's natural stopping for now.
            # stop=["<|im_end|>"] # Example stop token if needed for specific Qwen finetunes
        )
        logging.info("Successfully received response from Inference Endpoint.")
        return response
    except HfHubHTTPError as e:
        # Check specifically for 503 Service Unavailable
        if e.response is not None and e.response.status_code == 503:
            logging.warning(
                f"Encountered 503 Service Unavailable from endpoint: {HF_INFERENCE_ENDPOINT_URL}"
            )
            return ERROR_503_DICT  # Return special dict for 503
        else:
            # Handle other HTTP errors
            logging.error(f"HTTP error querying Inference Endpoint: {e}")
            if e.response is not None:
                logging.error(f"Response details: {e.response.text}")
            return None  # Return None for other HTTP errors
    except Exception as e:
        logging.error(f"An unexpected error occurred querying Inference Endpoint: {e}")
        print(f"An unexpected error occurred querying Inference Endpoint: {e}")
        return None


def parse_qwen_response(response: ChatCompletionOutput | dict | None) -> str:
    """
    Parses the response from the Qwen model to extract the generated text.
    Handles potential None or error dict inputs.

    Args:
        response: The ChatCompletionOutput object, ERROR_503_DICT, or None.

    Returns:
        The extracted response text as a string, or an error message string.
    """
    if response is None:
        return "Error: Failed to get response from the language model."

    # Check if it's our specific 503 error signal before trying to parse as ChatCompletionOutput
    if isinstance(response, dict) and response.get("error_type") == "503":
        return f"Error: {response['error_type']} {response['message']}"

    # Check if it's likely the expected ChatCompletionOutput structure
    if not hasattr(response, "choices"):
        logging.error(
            f"Unexpected response type received by parse_qwen_response: {type(response)}. Content: {response}"
        )
        return "Error: Received an unexpected response format from the language model endpoint."

    try:
        # Access the generated content according to the ChatCompletionOutput structure
        if response.choices and len(response.choices) > 0:
            content = response.choices[0].message.content
            if content:
                logging.info("Successfully parsed response content.")
                return content.strip()
            else:
                logging.warning("Response received, but content is empty.")
                return "Error: Received an empty response from the language model."
        else:
            logging.warning("Response received, but no choices found.")
            return "Error: No response choices found in the language model output."
    except AttributeError as e:
        # This might catch cases where response looks like the object but lacks expected attributes
        logging.error(
            f"Attribute error parsing response: {e}. Response structure might be unexpected."
        )
        logging.error(f"Raw response object: {response}")
        return "Error: Could not parse the structure of the language model response."
    except Exception as e:
        logging.error(f"An unexpected error occurred parsing the response: {e}")
        return "Error: An unexpected error occurred while parsing the language model response."


# Example Usage (for testing - requires .env setup and potentially prompts.py)
# if __name__ == '__main__':
#     # This example assumes you have a prompts.py that can generate a test prompt
#     try:
#         from prompts import format_code_for_analysis
#         # Create a dummy prompt for testing
#         test_files = {"app.py": "print('hello')"}
#         test_prompt = format_code_for_analysis("test/minimal", test_files)
#         print("--- Sending Test Prompt ---")
#         print(test_prompt)
#         api_response = query_qwen_endpoint(test_prompt)
#         print("\n--- Raw API Response ---")
#         print(api_response)
#         print("\n--- Parsed Response ---")
#         parsed_text = parse_qwen_response(api_response)
#         print(parsed_text)
#     except ImportError:
#         print("Could not import prompts.py for testing. Run this test from the project root.")
#     except Exception as e:
#         print(f"An error occurred during testing: {e}")