davanstrien HF Staff commited on
Commit
816049c
·
1 Parent(s): 59282bc

add single scoring tab

Browse files
Files changed (1) hide show
  1. app.py +95 -2
app.py CHANGED
@@ -48,6 +48,17 @@ async def get_model_labels(model, client):
48
  return None
49
 
50
 
 
 
 
 
 
 
 
 
 
 
 
51
  async def _try_load_model_card(hub_id, client=None):
52
  if not client:
53
  client = AsyncClient(headers=headers)
@@ -68,6 +79,26 @@ async def _try_load_model_card(hub_id, client=None):
68
  return card_text, length
69
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def _try_parse_card_data(hub_json_data):
72
  data = {}
73
  keys = ["license", "language", "datasets"]
@@ -134,6 +165,58 @@ class ModelMetadata:
134
  return None
135
 
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  COMMON_SCORES = {
138
  "license": {
139
  "required": True,
@@ -343,9 +426,13 @@ def _basic_check(data: Optional[ModelMetadata]):
343
  return orjson.dumps(data_dict)
344
 
345
 
346
- def basic_check(hub_id):
347
  return _basic_check(hub_id)
348
 
 
 
 
 
349
 
350
  def create_query_url(query, skip=0):
351
  return f"https://huggingface.co/api/search/full-text?q={query}&limit=100&skip={skip}&type=model"
@@ -579,7 +666,7 @@ with gr.Blocks() as demo:
579
  [query, min_metadata_score, mim_model_card_length],
580
  [filter_results, results_markdown],
581
  )
582
- with gr.Tab("Metadata quality details"):
583
  with gr.Row():
584
  gr.Markdown(
585
  """# How metadata quality is scored?
@@ -616,5 +703,11 @@ with gr.Blocks() as demo:
616
  gr.Json(json.dumps(text_class_scores_example))
617
  with gr.Accordion(label="Full overview of all scores", open=False):
618
  gr.Json(json.dumps(SCORES))
 
 
 
 
 
 
619
 
620
  demo.launch()
 
48
  return None
49
 
50
 
51
+ def get_model_labels_sync(model, client=None):
52
+ if not client:
53
+ client = Client(headers=headers)
54
+ try:
55
+ url = hf_hub_url(repo_id=model, filename="config.json")
56
+ resp = client.get(url, timeout=2)
57
+ return list(resp.json()["label2id"].keys())
58
+ except (KeyError, JSONDecodeError, AttributeError):
59
+ return None
60
+
61
+
62
  async def _try_load_model_card(hub_id, client=None):
63
  if not client:
64
  client = AsyncClient(headers=headers)
 
79
  return card_text, length
80
 
81
 
82
+ def _try_load_model_card_sync(hub_id, client=None):
83
+ if not client:
84
+ client = Client(headers=headers)
85
+ try:
86
+ url = hf_hub_url(
87
+ repo_id=hub_id, filename="README.md"
88
+ ) # We grab card this way rather than via client library to improve performance
89
+ resp = client.get(url)
90
+ if resp.status_code == 200:
91
+ card_text = resp.text
92
+ length = len(card_text)
93
+ elif resp.status_code == 404:
94
+ card_text = None
95
+ length = 0
96
+ except httpx.ConnectError:
97
+ card_text = None
98
+ length = None
99
+ return card_text, length
100
+
101
+
102
  def _try_parse_card_data(hub_json_data):
103
  data = {}
104
  keys = ["license", "language", "datasets"]
 
165
  return None
166
 
167
 
168
+ @dataclass(eq=False)
169
+ class ModelMetadataSync:
170
+ hub_id: str
171
+ tags: Optional[List[str]]
172
+ license: Optional[str]
173
+ library_name: Optional[str]
174
+ datasets: Optional[List[str]]
175
+ pipeline_tag: Optional[str]
176
+ labels: Optional[List[str]]
177
+ languages: Optional[Union[str, List[str]]]
178
+ model_card_text: Optional[str] = None
179
+ model_card_length: Optional[int] = None
180
+ likes: Optional[int] = None
181
+ downloads: Optional[int] = None
182
+ created_at: Optional[datetime] = None
183
+
184
+ @classmethod
185
+ def from_hub(cls, hub_id, client=None):
186
+ try:
187
+ if not client:
188
+ client = httpx.Client(headers=headers)
189
+ url = f"https://huggingface.co/api/models/{hub_id}"
190
+ resp = client.get(url)
191
+ hub_json_data = resp.json()
192
+ card_text, length = _try_load_model_card_sync(hub_id)
193
+ data = _try_parse_card_data(hub_json_data)
194
+ library_name = hub_json_data.get("library_name")
195
+ pipeline_tag = hub_json_data.get("pipeline_tag")
196
+ downloads = hub_json_data.get("downloads")
197
+ likes = hub_json_data.get("likes")
198
+ tags = hub_json_data.get("tags")
199
+ labels = get_model_labels_sync(hub_id, client)
200
+ return ModelMetadata(
201
+ hub_id=hub_id,
202
+ languages=data["language"],
203
+ tags=tags,
204
+ license=data["license"],
205
+ library_name=library_name,
206
+ datasets=data["datasets"],
207
+ pipeline_tag=pipeline_tag,
208
+ labels=labels,
209
+ model_card_text=card_text,
210
+ downloads=downloads,
211
+ likes=likes,
212
+ model_card_length=length,
213
+ )
214
+ except Exception as e:
215
+ print(f"Failed to create ModelMetadata for model {hub_id}: {str(e)}")
216
+ return None
217
+
218
+
219
+
220
  COMMON_SCORES = {
221
  "license": {
222
  "required": True,
 
426
  return orjson.dumps(data_dict)
427
 
428
 
429
+ def basic_check(hub_id): # add types
430
  return _basic_check(hub_id)
431
 
432
+ @cached(sync_cache)
433
+ def basic_check_from_hub_id(hub_id):
434
+ model_data = ModelMetadataSync.from_hub(hub_id)
435
+ return orjson.loads(basic_check(model_data))
436
 
437
  def create_query_url(query, skip=0):
438
  return f"https://huggingface.co/api/search/full-text?q={query}&limit=100&skip={skip}&type=model"
 
666
  [query, min_metadata_score, mim_model_card_length],
667
  [filter_results, results_markdown],
668
  )
669
+ with gr.Tab("Metadata quality details)"):
670
  with gr.Row():
671
  gr.Markdown(
672
  """# How metadata quality is scored?
 
703
  gr.Json(json.dumps(text_class_scores_example))
704
  with gr.Accordion(label="Full overview of all scores", open=False):
705
  gr.Json(json.dumps(SCORES))
706
+ with gr.Tab("Score models"):
707
+ model_id_to_score = gr.Textbox(
708
+ placeholder="bert-base-uncased", label="Model ID"
709
+ )
710
+ score_model = gr.Button("Score model")
711
+ score_model.click(basic_check_from_hub_id, model_id_to_score, [gr.Json()])
712
 
713
  demo.launch()