from flask import Flask, request, jsonify from llama_cpp import Llama # from transformers import AutoModelForCausalLM, AutoTokenizer llm = Llama.from_pretrained( repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF", filename="Llama-3.2-1B-Instruct-IQ3_M.gguf", ) # MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct" # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to("cpu") app = Flask(__name__) # DEFAULT_TEMPERATURE = 0.7 # DEFAULT_MAX_TOKENS = 150 # DEFAULT_TOP_P = 0.95 def generate_journal_suggestion(current_page): try: suggestion_prompt = ( f"""Pe baza înregistrării din jurnal: '{current_page}', generează o singură întrebare pe care utilizatorul ar putea să și-o pună într-un jurnal. Întrebarea ar trebui să încurajeze reflecția personală mai profundă, explorarea sentimentelor sau clarificarea obiectivelor.""" ) # input_ids = tokenizer(suggestion_prompt, return_tensors="pt").input_ids.to("cpu") # output_ids = model.generate( # input_ids, # max_length=DEFAULT_MAX_TOKENS, # temperature=DEFAULT_TEMPERATURE, # top_p=DEFAULT_TOP_P, # do_sample=True, # ) suggestion_response = llm.create_chat_completion( messages = [ { "role": "user", "content": suggestion_prompt } ] ) return suggestion_response # suggestion_response = tokenizer.decode(output_ids[0], skip_special_tokens=True) except Exception as e: return f"Error: {str(e)}" @app.route("/", methods=["POST", "GET"]) def home(): return "Hi!" @app.route("/chat", methods=["POST"]) def chat(): data = request.json message = data.get("message", "") system_message = data.get("system_message", "You are a friendly chatbot.") journal_page = data.get("journal_page", "") suggestion = "" if journal_page: suggestion = generate_journal_suggestion(journal_page) return jsonify({"journal_suggestion": suggestion}) if __name__ == "__main__": app.run(debug=True)