dropbop commited on
Commit
2a6e97e
·
verified ·
1 Parent(s): 7b69b78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -38
app.py CHANGED
@@ -5,10 +5,12 @@ import random
5
  import pandas as pd
6
  import os
7
  from itertools import islice
 
 
 
8
 
9
  # Configuration
10
  chunk_size = 100 # Size of the chunks to shuffle
11
- label_file = os.path.join(os.path.dirname(__file__), "labels.csv") # Save CSV in the same directory as the script
12
 
13
  # Load the Satellogic dataset (streaming)
14
  dataset = ev.load_dataset("satellogic", streaming=True)
@@ -16,14 +18,11 @@ data_iter = iter(dataset)
16
  shuffled_chunk = [] # Initialize an empty list to hold the current chunk
17
  chunk_iter = None # Initialize the chunk iterator
18
 
19
- # Initialize or load labels DataFrame
20
- if os.path.exists(label_file):
21
- labels_df = pd.read_csv(label_file)
22
- else:
23
- labels_df = pd.DataFrame(columns=["image_id", "bounds", "rating", "google_maps_link"])
24
 
25
  def get_next_image():
26
- global data_iter, labels_df, shuffled_chunk, chunk_iter
27
 
28
  while True:
29
  # If we don't have a current chunk or it's exhausted, get a new one
@@ -50,7 +49,8 @@ def get_next_image():
50
  google_maps_link = utils.get_google_map_link(sample, "satellogic")
51
  image_id = str(bounds)
52
 
53
- if image_id not in labels_df["image_id"].values:
 
54
  return image, image_id, bounds, google_maps_link
55
  except StopIteration:
56
  # Current chunk is exhausted, reset chunk variables to get a new one in the next iteration
@@ -58,42 +58,58 @@ def get_next_image():
58
  chunk_iter = None
59
 
60
  def rate_image(image_id, bounds, rating):
61
- global labels_df
62
 
63
- new_row = pd.DataFrame(
64
  {
65
- "image_id": [image_id],
66
- "bounds": [bounds],
67
- "rating": [rating],
68
- "google_maps_link": [""], # this isn't necessary to pass to the function since we aren't updating it here.
69
  }
70
  )
71
- labels_df = pd.concat([labels_df, new_row], ignore_index=True)
72
- labels_df.to_csv(label_file, index=False)
73
 
74
  next_image, next_image_id, next_bounds, next_google_maps_link = get_next_image()
75
  return next_image, next_image_id, next_bounds, next_google_maps_link
76
 
 
 
 
 
 
 
77
  # Gradio interface
78
- iface = gr.Interface(
79
- fn=rate_image,
80
- inputs=[
81
- gr.Textbox(label="Image ID", visible=False),
82
- gr.Textbox(label="Bounds", visible=False),
83
- gr.Radio(["Cool", "Not Cool"], label="Rating"),
84
- #gr.Textbox(label="Google Maps Link"), # Remove google maps link as an input
85
- ],
86
- outputs=[
87
- gr.Image(label="Satellite Image"),
88
- gr.Textbox(label="Image ID", visible=False),
89
- gr.Textbox(label="Bounds", visible=False),
90
- gr.Textbox(label="Google Maps Link", visible=True), # Add google maps link as an output
91
- ],
92
- title="TerraNomaly - Satellite Image Labeling",
93
- description="Rate satellite images as 'Cool' or 'Not Cool'.",
94
- live=False,
95
- )
96
-
97
- iface.launch(
98
- share=True
99
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import pandas as pd
6
  import os
7
  from itertools import islice
8
+ import json
9
+ import pyarrow as pa
10
+ import pyarrow.parquet as pq
11
 
12
  # Configuration
13
  chunk_size = 100 # Size of the chunks to shuffle
 
14
 
15
  # Load the Satellogic dataset (streaming)
16
  dataset = ev.load_dataset("satellogic", streaming=True)
 
18
  shuffled_chunk = [] # Initialize an empty list to hold the current chunk
19
  chunk_iter = None # Initialize the chunk iterator
20
 
21
+ # Initialize an empty list to hold labels temporarily
22
+ labels_list = []
 
 
 
23
 
24
  def get_next_image():
25
+ global data_iter, labels_list, shuffled_chunk, chunk_iter
26
 
27
  while True:
28
  # If we don't have a current chunk or it's exhausted, get a new one
 
49
  google_maps_link = utils.get_google_map_link(sample, "satellogic")
50
  image_id = str(bounds)
51
 
52
+ # Check if this image has already been labeled (based on image_id)
53
+ if not any(label["image_id"] == image_id for label in labels_list):
54
  return image, image_id, bounds, google_maps_link
55
  except StopIteration:
56
  # Current chunk is exhausted, reset chunk variables to get a new one in the next iteration
 
58
  chunk_iter = None
59
 
60
  def rate_image(image_id, bounds, rating):
61
+ global labels_list
62
 
63
+ labels_list.append(
64
  {
65
+ "image_id": image_id,
66
+ "bounds": bounds,
67
+ "rating": rating,
68
+ "google_maps_link": "", # Adding google maps link to the data to be downloaded
69
  }
70
  )
 
 
71
 
72
  next_image, next_image_id, next_bounds, next_google_maps_link = get_next_image()
73
  return next_image, next_image_id, next_bounds, next_google_maps_link
74
 
75
+ def save_labels():
76
+ global labels_list
77
+ table = pa.Table.from_pylist(labels_list)
78
+ pq.write_table(table, "labeled_data.parquet")
79
+ return "labeled_data.parquet"
80
+
81
  # Gradio interface
82
+ with gr.Blocks() as iface:
83
+ with gr.Row():
84
+ with gr.Column():
85
+ image_out = gr.Image(label="Satellite Image")
86
+ image_id_out = gr.Textbox(label="Image ID", visible=False)
87
+ bounds_out = gr.Textbox(label="Bounds", visible=False)
88
+ google_maps_link_out = gr.Textbox(label="Google Maps Link", visible=True)
89
+ with gr.Column():
90
+ rating_radio = gr.Radio(["Cool", "Not Cool"], label="Rating")
91
+ submit_button = gr.Button("Submit Rating")
92
+ download_button = gr.Button("Download Labels")
93
+ download_file = gr.File(label="Download")
94
+
95
+ submit_button.click(
96
+ rate_image,
97
+ inputs=[image_id_out, bounds_out, rating_radio],
98
+ outputs=[image_out, image_id_out, bounds_out, google_maps_link_out]
99
+ )
100
+
101
+ download_button.click(
102
+ save_labels,
103
+ inputs=None,
104
+ outputs=[download_file]
105
+ )
106
+
107
+ # Get the first image and its details
108
+ initial_image, initial_image_id, initial_bounds, initial_google_maps_link = get_next_image()
109
+
110
+ image_out.value = initial_image
111
+ image_id_out.value = initial_image_id
112
+ bounds_out.value = initial_bounds
113
+ google_maps_link_out.value = initial_google_maps_link
114
+
115
+ iface.launch(share=True)