vumichien commited on
Commit
e75b314
·
1 Parent(s): 04afa0e

change model, name mapping

Browse files
data/sampleData.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/sample_name_sentence_embeddings(cl-nagoya-sup-simcse-ja-for-standard-name-v1_1).pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5da36a0b6c4a23bcc2cc567da344d0b94c2efe6d7986f7a1b7e5f6b7ff721c4a
3
+ size 18017443
data/sample_name_sentence_similarities(cl-nagoya-sup-simcse-ja-for-standard-name-v1_1).pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d942620d2940849fdee0f6cec443a5dd1f7f608144d4f1cee5ff66dd39797035
3
+ size 137593306
main.py CHANGED
@@ -65,13 +65,13 @@ async def lifespan(app: FastAPI):
65
 
66
  # Load pre-computed embeddings and similarities
67
  with open(
68
- f"data/sample_name_sentence_embeddings(cl-nagoya-sup-simcse-ja-base).pkl",
69
  "rb",
70
  ) as f:
71
  sample_name_sentence_embeddings = pickle.load(f)
72
 
73
  with open(
74
- f"data/sample_name_sentence_similarities(cl-nagoya-sup-simcse-ja-base).pkl",
75
  "rb",
76
  ) as f:
77
  sample_name_sentence_similarities = pickle.load(f)
@@ -156,14 +156,15 @@ async def predict(file: UploadFile = File(...)):
156
  )
157
  df_predicted = nameMappingHelper.map_standard_names()
158
  # Create output dataframe and save to CSV - Fix SettingWithCopyWarning by creating a copy
159
- columns_to_keep = ["ファイル名", "シート名", "行", "科目", "名称"]
160
- output_df = inputData.dataframe[columns_to_keep].copy()
161
-
 
162
  # Use .loc to avoid SettingWithCopyWarning
163
  output_df.loc[:, COL_STANDARD_SUBJECT] = df_predicted[COL_STANDARD_SUBJECT]
164
- output_df.loc[:, COL_STANDARD_NAME] = df_predicted[COL_STANDARD_NAME]
165
  output_df.loc[:, "参考_名称"] = df_predicted["参考_名称"]
166
- output_df.loc[:, "出力_名称_類似度"] = df_predicted["出力_名称_類似度"]
167
 
168
  # Save with utf_8_sig encoding for Japanese Excel compatibility
169
  output_df.to_csv(output_file_path, index=False, encoding="utf_8_sig")
 
65
 
66
  # Load pre-computed embeddings and similarities
67
  with open(
68
+ f"data/sample_name_sentence_embeddings(cl-nagoya-sup-simcse-ja-for-standard-name-v1_1).pkl",
69
  "rb",
70
  ) as f:
71
  sample_name_sentence_embeddings = pickle.load(f)
72
 
73
  with open(
74
+ f"data/sample_name_sentence_similarities(cl-nagoya-sup-simcse-ja-for-standard-name-v1_1).pkl",
75
  "rb",
76
  ) as f:
77
  sample_name_sentence_similarities = pickle.load(f)
 
156
  )
157
  df_predicted = nameMappingHelper.map_standard_names()
158
  # Create output dataframe and save to CSV - Fix SettingWithCopyWarning by creating a copy
159
+ # columns_to_keep = ["ファイル名", "シート名", "行", "科目", "名称"]
160
+ # output_df = inputData.dataframe[columns_to_keep].copy()
161
+ output_df = inputData.dataframe.copy()
162
+ print(df_predicted.columns)
163
  # Use .loc to avoid SettingWithCopyWarning
164
  output_df.loc[:, COL_STANDARD_SUBJECT] = df_predicted[COL_STANDARD_SUBJECT]
165
+ output_df.loc[:, "出力_項目名"] = df_predicted["出力_項目名"]
166
  output_df.loc[:, "参考_名称"] = df_predicted["参考_名称"]
167
+ output_df.loc[:, "出力_確率度"] = df_predicted["出力_確率度"]
168
 
169
  # Save with utf_8_sig encoding for Japanese Excel compatibility
170
  output_df.to_csv(output_file_path, index=False, encoding="utf_8_sig")