Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,19 @@
|
|
1 |
from flask import Flask, request, jsonify
|
2 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
DEFAULT_TEMPERATURE = 0.7
|
11 |
-
DEFAULT_MAX_TOKENS = 150
|
12 |
-
DEFAULT_TOP_P = 0.95
|
13 |
|
14 |
def generate_journal_suggestion(current_page):
|
15 |
try:
|
@@ -18,16 +22,26 @@ def generate_journal_suggestion(current_page):
|
|
18 |
Întrebarea ar trebui să încurajeze reflecția personală mai profundă, explorarea sentimentelor sau clarificarea obiectivelor."""
|
19 |
)
|
20 |
|
21 |
-
input_ids = tokenizer(suggestion_prompt, return_tensors="pt").input_ids.to("cpu")
|
22 |
-
|
23 |
-
output_ids = model.generate(
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
)
|
30 |
-
suggestion_response
|
|
|
31 |
|
32 |
except Exception as e:
|
33 |
return f"Error: {str(e)}"
|
@@ -51,4 +65,6 @@ def chat():
|
|
51 |
return jsonify({"journal_suggestion": suggestion})
|
52 |
|
53 |
if __name__ == "__main__":
|
54 |
-
|
|
|
|
|
|
1 |
from flask import Flask, request, jsonify
|
2 |
+
from llama_cpp import Llama
|
3 |
+
# from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
+
llm = Llama.from_pretrained(
|
5 |
+
repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF",
|
6 |
+
filename="Llama-3.2-1B-Instruct-IQ3_M.gguf",
|
7 |
+
)
|
8 |
+
# MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
|
9 |
|
10 |
+
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
11 |
+
# model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to("cpu")
|
12 |
|
13 |
+
# app = Flask(__name__)
|
14 |
+
# DEFAULT_TEMPERATURE = 0.7
|
15 |
+
# DEFAULT_MAX_TOKENS = 150
|
16 |
+
# DEFAULT_TOP_P = 0.95
|
|
|
|
|
|
|
17 |
|
18 |
def generate_journal_suggestion(current_page):
|
19 |
try:
|
|
|
22 |
Întrebarea ar trebui să încurajeze reflecția personală mai profundă, explorarea sentimentelor sau clarificarea obiectivelor."""
|
23 |
)
|
24 |
|
25 |
+
# input_ids = tokenizer(suggestion_prompt, return_tensors="pt").input_ids.to("cpu")
|
26 |
+
|
27 |
+
# output_ids = model.generate(
|
28 |
+
# input_ids,
|
29 |
+
# max_length=DEFAULT_MAX_TOKENS,
|
30 |
+
# temperature=DEFAULT_TEMPERATURE,
|
31 |
+
# top_p=DEFAULT_TOP_P,
|
32 |
+
# do_sample=True,
|
33 |
+
# )
|
34 |
+
|
35 |
+
suggestion_response = llm.create_chat_completion(
|
36 |
+
messages = [
|
37 |
+
{
|
38 |
+
"role": "user",
|
39 |
+
"content": "What is the capital of France?"
|
40 |
+
}
|
41 |
+
]
|
42 |
)
|
43 |
+
return suggestion_response
|
44 |
+
# suggestion_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
45 |
|
46 |
except Exception as e:
|
47 |
return f"Error: {str(e)}"
|
|
|
65 |
return jsonify({"journal_suggestion": suggestion})
|
66 |
|
67 |
if __name__ == "__main__":
|
68 |
+
# app.run(debug=True)
|
69 |
+
|
70 |
+
|