winamnd commited on
Commit
cf84747
·
verified ·
1 Parent(s): a30c719

Rename save_data.py to save_results.py

Browse files
Files changed (2) hide show
  1. save_data.py +0 -144
  2. save_results.py +39 -0
save_data.py DELETED
@@ -1,144 +0,0 @@
1
- import os
2
- import numpy as np
3
- import json
4
- import shutil
5
- import requests
6
- import re as r
7
- from urllib.request import urlopen
8
- from datetime import datetime
9
- from datasets import Image
10
- from PIL import Image
11
- from huggingface_hub import Repository, upload_file
12
-
13
- HF_TOKEN = os.environ.get("HF_TOKEN")
14
- DATASET_NAME = "OCR-img-to-text"
15
- DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
16
- DATA_FILENAME = "ocr_data.csv"
17
- DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
18
- DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
19
- print("is none?", HF_TOKEN is None)
20
- REPOSITORY_DIR = "data"
21
- LOCAL_DIR = 'data_local'
22
- os.makedirs(LOCAL_DIR,exist_ok=True)
23
-
24
- try:
25
- hf_hub_download(
26
- repo_id=DATASET_REPO_ID,
27
- filename=DATA_FILENAME,
28
- cache_dir=DATA_DIRNAME,
29
- force_filename=DATA_FILENAME
30
- )
31
-
32
- except:
33
- print("file not found")
34
-
35
- try:
36
- repo = Repository(local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
37
- repo.git_pull()
38
- except Exception as e:
39
- print("Error occurred during git pull:", e)
40
-
41
- # repo = Repository(local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
42
- # repo.git_pull()
43
-
44
- def getIP():
45
- ip_address = ''
46
- try:
47
- d = str(urlopen('http://checkip.dyndns.com/')
48
- .read())
49
-
50
- return r.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(d).group(1)
51
- except Exception as e:
52
- print("Error while getting IP address -->",e)
53
- return ip_address
54
-
55
- def get_location(ip_addr):
56
- location = {}
57
- try:
58
- ip=ip_addr
59
-
60
- req_data={
61
- "ip":ip,
62
- "token":"pkml123"
63
- }
64
- url = "https://demos.pragnakalp.com/get-ip-location"
65
-
66
- # req_data=json.dumps(req_data)
67
- # print("req_data",req_data)
68
- headers = {'Content-Type': 'application/json'}
69
-
70
- response = requests.request("POST", url, headers=headers, data=json.dumps(req_data))
71
- response = response.json()
72
- print("response======>>",response)
73
- return response
74
- except Exception as e:
75
- print("Error while getting location -->",e)
76
- return location
77
-
78
- """
79
- Save generated details
80
- """
81
- def dump_json(thing,file):
82
- with open(file,'w+',encoding="utf8") as f:
83
- json.dump(thing,f)
84
-
85
- def flag(Method,text_output,input_image):
86
-
87
- print("saving data------------------------")
88
- # try:
89
- adversarial_number = 0
90
- adversarial_number = 0 if None else adversarial_number
91
-
92
- ip_address= getIP()
93
- print("ip_address :",ip_address)
94
- location = get_location(ip_address)
95
- print("location :",location)
96
-
97
- metadata_name = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
98
- SAVE_FILE_DIR = os.path.join(LOCAL_DIR,metadata_name)
99
- os.makedirs(SAVE_FILE_DIR,exist_ok=True)
100
- image_output_filename = os.path.join(SAVE_FILE_DIR,'image.png')
101
- print("image_output_filename :",image_output_filename)
102
- print(input_image)
103
- try:
104
- Image.fromarray(input_image).save(image_output_filename)
105
- # input_image.save(image_output_filename)
106
- except Exception:
107
- raise Exception(f"Had issues saving np array image to file")
108
-
109
- # Write metadata.json to file
110
- json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
111
- metadata= {'id':metadata_name,'method':Method,'file_name':'image.png',
112
- 'generated_text':text_output,'ip':ip_address, 'location':location
113
- }
114
-
115
- dump_json(metadata,json_file_path)
116
-
117
- # Simply upload the image file and metadata using the hub's upload_file
118
- # Upload the image
119
- repo_image_path = os.path.join(REPOSITORY_DIR,os.path.join(metadata_name,'image.png'))
120
-
121
- _ = upload_file(path_or_fileobj = image_output_filename,
122
- path_in_repo =repo_image_path,
123
- repo_id=DATASET_REPO_ID,
124
- repo_type='dataset',
125
- token=HF_TOKEN
126
- )
127
-
128
- # Upload the metadata
129
- repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join(metadata_name,'metadata.jsonl'))
130
- _ = upload_file(path_or_fileobj = json_file_path,
131
- path_in_repo =repo_json_path,
132
- repo_id= DATASET_REPO_ID,
133
- repo_type='dataset',
134
- token=HF_TOKEN
135
- )
136
- adversarial_number+=1
137
- repo.git_pull()
138
-
139
- url = 'http://pragnakalpdev35.pythonanywhere.com/HF_space_image_to_text'
140
- myobj = {'Method': Method,'text_output':text_output,'img':input_image.tolist(),'ip_address':ip_address, 'loc':location}
141
- x = requests.post(url, json = myobj)
142
- print("mail status code",x.status_code)
143
-
144
- return "*****Logs save successfully!!!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
save_results.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import csv
4
+
5
+ def save_results_to_repo(text, label, repo_path="./wnmnd/ocr-llm-test"):
6
+ data = {"text": text, "label": label}
7
+
8
+ try:
9
+ # Ensure the repository exists
10
+ if not os.path.exists(repo_path):
11
+ os.makedirs(repo_path)
12
+ print(f"Folder created at: {repo_path}")
13
+
14
+ # Define the full file paths for JSON and CSV
15
+ results_json = os.path.join(repo_path, "ocr_results.json")
16
+ results_csv = os.path.join(repo_path, "ocr_results.csv")
17
+
18
+ # Save to JSON
19
+ if not os.path.exists(results_json):
20
+ with open(results_json, "w") as f:
21
+ json.dump([], f)
22
+ with open(results_json, "r+") as f:
23
+ content = json.load(f)
24
+ content.append(data)
25
+ f.seek(0)
26
+ json.dump(content, f, indent=4)
27
+
28
+ # Save to CSV
29
+ file_exists = os.path.exists(results_csv)
30
+ with open(results_csv, "a", newline="") as f:
31
+ writer = csv.DictWriter(f, fieldnames=["text", "label"])
32
+ if not file_exists:
33
+ writer.writeheader()
34
+ writer.writerow(data)
35
+
36
+ print(f"Results saved: {data}")
37
+
38
+ except Exception as e:
39
+ print(f"Error saving results: {e}")