srbmihaicode commited on
Commit
1d470e1
·
verified ·
1 Parent(s): 647defb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -19
app.py CHANGED
@@ -1,15 +1,19 @@
1
  from flask import Flask, request, jsonify
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
 
3
 
4
- MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
 
5
 
6
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
7
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to("cpu")
8
-
9
- app = Flask(__name__)
10
- DEFAULT_TEMPERATURE = 0.7
11
- DEFAULT_MAX_TOKENS = 150
12
- DEFAULT_TOP_P = 0.95
13
 
14
  def generate_journal_suggestion(current_page):
15
  try:
@@ -18,16 +22,26 @@ def generate_journal_suggestion(current_page):
18
  Întrebarea ar trebui să încurajeze reflecția personală mai profundă, explorarea sentimentelor sau clarificarea obiectivelor."""
19
  )
20
 
21
- input_ids = tokenizer(suggestion_prompt, return_tensors="pt").input_ids.to("cpu")
22
-
23
- output_ids = model.generate(
24
- input_ids,
25
- max_length=DEFAULT_MAX_TOKENS,
26
- temperature=DEFAULT_TEMPERATURE,
27
- top_p=DEFAULT_TOP_P,
28
- do_sample=True,
 
 
 
 
 
 
 
 
 
29
  )
30
- suggestion_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
31
 
32
  except Exception as e:
33
  return f"Error: {str(e)}"
@@ -51,4 +65,6 @@ def chat():
51
  return jsonify({"journal_suggestion": suggestion})
52
 
53
  if __name__ == "__main__":
54
- app.run(debug=True)
 
 
 
1
  from flask import Flask, request, jsonify
2
+ from llama_cpp import Llama
3
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ llm = Llama.from_pretrained(
5
+ repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF",
6
+ filename="Llama-3.2-1B-Instruct-IQ3_M.gguf",
7
+ )
8
+ # MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
9
 
10
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
+ # model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to("cpu")
12
 
13
+ # app = Flask(__name__)
14
+ # DEFAULT_TEMPERATURE = 0.7
15
+ # DEFAULT_MAX_TOKENS = 150
16
+ # DEFAULT_TOP_P = 0.95
 
 
 
17
 
18
  def generate_journal_suggestion(current_page):
19
  try:
 
22
  Întrebarea ar trebui să încurajeze reflecția personală mai profundă, explorarea sentimentelor sau clarificarea obiectivelor."""
23
  )
24
 
25
+ # input_ids = tokenizer(suggestion_prompt, return_tensors="pt").input_ids.to("cpu")
26
+
27
+ # output_ids = model.generate(
28
+ # input_ids,
29
+ # max_length=DEFAULT_MAX_TOKENS,
30
+ # temperature=DEFAULT_TEMPERATURE,
31
+ # top_p=DEFAULT_TOP_P,
32
+ # do_sample=True,
33
+ # )
34
+
35
+ suggestion_response = llm.create_chat_completion(
36
+ messages = [
37
+ {
38
+ "role": "user",
39
+ "content": "What is the capital of France?"
40
+ }
41
+ ]
42
  )
43
+ return suggestion_response
44
+ # suggestion_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
45
 
46
  except Exception as e:
47
  return f"Error: {str(e)}"
 
65
  return jsonify({"journal_suggestion": suggestion})
66
 
67
  if __name__ == "__main__":
68
+ # app.run(debug=True)
69
+
70
+