Spaces:

MsChabane
/

SmartDoc

Running

App Files Files Community

chabane commited on 1 day ago

Commit

4d312cc

1 Parent(s): 571f848

modify the prompt for the user in plot

Browse files

Files changed (1) hide show

main.py +40 -16

main.py CHANGED Viewed

@@ -56,7 +56,7 @@ try:
     deepseek_model = AutoModelForCausalLM.from_pretrained(deepseek_dir, trust_remote_code=True)
     generator = pipeline("text-generation", model=deepseek_model, tokenizer=deepseek_tokenizer)
-    print("[Info] facebook/bart-large-cnn is installed")
 except Exception as exp:
     print("Can't load the model deepseek-ai/deepseek-coder-1.3b-instruct")
     print(f"[Error] {str(exp)}")
@@ -165,14 +165,34 @@ async def plot(user_need:str=Form(...),file:UploadFile=File(...)):
         df = pd.read_excel(io=io.BytesIO(file_bytes))
-        message = f"""
-You are a helpful assistant that helps users write Python code.
-## Requirements:
--you will be given a task and you will write the code to solve the task.
--you have a dataset called **df** contains the following information:
-df.columns:{df.columns.to_list()}
-df.dtypes:{df.dtypes.to_dict()}
 -you have to write the code to solve the task using the dataset df.
 -you can use pandas to manipulate the dataframe.
 -you can use matplotlib to plot the data.
@@ -186,23 +206,27 @@ df.dtypes:{df.dtypes.to_dict()}
 -you have to write the code in a markdown code block.
 -make sure that the type of the chart is compatible with the dtypes of the columns
 -use only the column specified in the task.
 -you have to extract the column names and the plot type  from the prompt bellow  and use them in the code.
 -if the user task is not clear or there is an error like the column names are not in the dataframe, raise an
 error.
-##Prompt: {user_need}.
-        """
-        print(" MESSAGE \n"+message)
         global generator
-        output = generator(message, max_length=1000)
-        print(f'OUTPUT {output[0]["generated_text"]}')
-        match = re.search(r'```python(.*?)```', output[0]["generated_text"], re.DOTALL)
         code =''
         if not match:
           return JSONResponse(content={"error": "No Code was Generated"},status_code=403)
         code = match.group(1).replace("plt.show()\n","")
-        print(code)
         safe_globals={
             "plt": plt,

     deepseek_model = AutoModelForCausalLM.from_pretrained(deepseek_dir, trust_remote_code=True)
     generator = pipeline("text-generation", model=deepseek_model, tokenizer=deepseek_tokenizer)
+    print("[Info] deepseek-ai/deepseek-coder-1.3b-instruct is installed")
 except Exception as exp:
     print("Can't load the model deepseek-ai/deepseek-coder-1.3b-instruct")
     print(f"[Error] {str(exp)}")
         df = pd.read_excel(io=io.BytesIO(file_bytes))
+#        message = f"""
+#You are a helpful assistant that helps users write Python code.
+### Requirements:
+#-you will be given a task and you will write the code to solve the task.
+#-you have a dataset called **df** contains the following information:
+#df.columns:{df.columns.to_list()}
+#df.dtypes:{df.dtypes.to_dict()}
+#
+#-you have to write the code to solve the task using the dataset df.
+#-you can use pandas to manipulate the dataframe.
+#-you can use matplotlib to plot the data.
+#-you can use seaborn to plot the data.
+#-don't use print or input statements in the code.
+#-don't use any other libraries except pandas, matplotlib, seaborn.
+#-don't use any other functions except the ones provided in the libraries.
+#-don't write the code for the dataframe creation.
+#-check if the columns has a nan values and raise exception if yes .
+#-exclude plt.show() from the code.
+#-you have to write the code in a markdown code block.
+#-make sure that the type of the chart is compatible with the dtypes of the columns
+#-use only the column specified in the task.
+#-you have to extract the column names and the plot type  from the prompt bellow  and use them in the code.
+#-if the user task is not clear or there is an error like the column names are not in the dataframe, raise an
+#error.
+###Prompt: {user_need}.
+#        """
+        prompt = [
+    {"role": "system", "content": f'''You are a helpful assistant that helps users write Python code.
 -you have to write the code to solve the task using the dataset df.
 -you can use pandas to manipulate the dataframe.
 -you can use matplotlib to plot the data.
 -you have to write the code in a markdown code block.
 -make sure that the type of the chart is compatible with the dtypes of the columns
 -use only the column specified in the task.
+-you have an information about the dataframe called df contains the following information:
+df.columns:{df.columns.to_list()}
+df.dtypes:{df.dtypes.to_dict()}
 -you have to extract the column names and the plot type  from the prompt bellow  and use them in the code.
 -if the user task is not clear or there is an error like the column names are not in the dataframe, raise an
 error.
+    '''},
+{"role": "user", "content": user_need},
+]
         global generator
+        output = generator(prompt, max_length=1000)
+        match = re.search(r'```python(.*?)```', output[0]['generated_text'][2]['content'], re.DOTALL)
         code =''
         if not match:
           return JSONResponse(content={"error": "No Code was Generated"},status_code=403)
         code = match.group(1).replace("plt.show()\n","")
         safe_globals={
             "plt": plt,