Spaces:
Runtime error
Runtime error
Commit
·
816049c
1
Parent(s):
59282bc
add single scoring tab
Browse files
app.py
CHANGED
@@ -48,6 +48,17 @@ async def get_model_labels(model, client):
|
|
48 |
return None
|
49 |
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
async def _try_load_model_card(hub_id, client=None):
|
52 |
if not client:
|
53 |
client = AsyncClient(headers=headers)
|
@@ -68,6 +79,26 @@ async def _try_load_model_card(hub_id, client=None):
|
|
68 |
return card_text, length
|
69 |
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
def _try_parse_card_data(hub_json_data):
|
72 |
data = {}
|
73 |
keys = ["license", "language", "datasets"]
|
@@ -134,6 +165,58 @@ class ModelMetadata:
|
|
134 |
return None
|
135 |
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
COMMON_SCORES = {
|
138 |
"license": {
|
139 |
"required": True,
|
@@ -343,9 +426,13 @@ def _basic_check(data: Optional[ModelMetadata]):
|
|
343 |
return orjson.dumps(data_dict)
|
344 |
|
345 |
|
346 |
-
def basic_check(hub_id):
|
347 |
return _basic_check(hub_id)
|
348 |
|
|
|
|
|
|
|
|
|
349 |
|
350 |
def create_query_url(query, skip=0):
|
351 |
return f"https://huggingface.co/api/search/full-text?q={query}&limit=100&skip={skip}&type=model"
|
@@ -579,7 +666,7 @@ with gr.Blocks() as demo:
|
|
579 |
[query, min_metadata_score, mim_model_card_length],
|
580 |
[filter_results, results_markdown],
|
581 |
)
|
582 |
-
with gr.Tab("Metadata quality details"):
|
583 |
with gr.Row():
|
584 |
gr.Markdown(
|
585 |
"""# How metadata quality is scored?
|
@@ -616,5 +703,11 @@ with gr.Blocks() as demo:
|
|
616 |
gr.Json(json.dumps(text_class_scores_example))
|
617 |
with gr.Accordion(label="Full overview of all scores", open=False):
|
618 |
gr.Json(json.dumps(SCORES))
|
|
|
|
|
|
|
|
|
|
|
|
|
619 |
|
620 |
demo.launch()
|
|
|
48 |
return None
|
49 |
|
50 |
|
51 |
+
def get_model_labels_sync(model, client=None):
|
52 |
+
if not client:
|
53 |
+
client = Client(headers=headers)
|
54 |
+
try:
|
55 |
+
url = hf_hub_url(repo_id=model, filename="config.json")
|
56 |
+
resp = client.get(url, timeout=2)
|
57 |
+
return list(resp.json()["label2id"].keys())
|
58 |
+
except (KeyError, JSONDecodeError, AttributeError):
|
59 |
+
return None
|
60 |
+
|
61 |
+
|
62 |
async def _try_load_model_card(hub_id, client=None):
|
63 |
if not client:
|
64 |
client = AsyncClient(headers=headers)
|
|
|
79 |
return card_text, length
|
80 |
|
81 |
|
82 |
+
def _try_load_model_card_sync(hub_id, client=None):
|
83 |
+
if not client:
|
84 |
+
client = Client(headers=headers)
|
85 |
+
try:
|
86 |
+
url = hf_hub_url(
|
87 |
+
repo_id=hub_id, filename="README.md"
|
88 |
+
) # We grab card this way rather than via client library to improve performance
|
89 |
+
resp = client.get(url)
|
90 |
+
if resp.status_code == 200:
|
91 |
+
card_text = resp.text
|
92 |
+
length = len(card_text)
|
93 |
+
elif resp.status_code == 404:
|
94 |
+
card_text = None
|
95 |
+
length = 0
|
96 |
+
except httpx.ConnectError:
|
97 |
+
card_text = None
|
98 |
+
length = None
|
99 |
+
return card_text, length
|
100 |
+
|
101 |
+
|
102 |
def _try_parse_card_data(hub_json_data):
|
103 |
data = {}
|
104 |
keys = ["license", "language", "datasets"]
|
|
|
165 |
return None
|
166 |
|
167 |
|
168 |
+
@dataclass(eq=False)
|
169 |
+
class ModelMetadataSync:
|
170 |
+
hub_id: str
|
171 |
+
tags: Optional[List[str]]
|
172 |
+
license: Optional[str]
|
173 |
+
library_name: Optional[str]
|
174 |
+
datasets: Optional[List[str]]
|
175 |
+
pipeline_tag: Optional[str]
|
176 |
+
labels: Optional[List[str]]
|
177 |
+
languages: Optional[Union[str, List[str]]]
|
178 |
+
model_card_text: Optional[str] = None
|
179 |
+
model_card_length: Optional[int] = None
|
180 |
+
likes: Optional[int] = None
|
181 |
+
downloads: Optional[int] = None
|
182 |
+
created_at: Optional[datetime] = None
|
183 |
+
|
184 |
+
@classmethod
|
185 |
+
def from_hub(cls, hub_id, client=None):
|
186 |
+
try:
|
187 |
+
if not client:
|
188 |
+
client = httpx.Client(headers=headers)
|
189 |
+
url = f"https://huggingface.co/api/models/{hub_id}"
|
190 |
+
resp = client.get(url)
|
191 |
+
hub_json_data = resp.json()
|
192 |
+
card_text, length = _try_load_model_card_sync(hub_id)
|
193 |
+
data = _try_parse_card_data(hub_json_data)
|
194 |
+
library_name = hub_json_data.get("library_name")
|
195 |
+
pipeline_tag = hub_json_data.get("pipeline_tag")
|
196 |
+
downloads = hub_json_data.get("downloads")
|
197 |
+
likes = hub_json_data.get("likes")
|
198 |
+
tags = hub_json_data.get("tags")
|
199 |
+
labels = get_model_labels_sync(hub_id, client)
|
200 |
+
return ModelMetadata(
|
201 |
+
hub_id=hub_id,
|
202 |
+
languages=data["language"],
|
203 |
+
tags=tags,
|
204 |
+
license=data["license"],
|
205 |
+
library_name=library_name,
|
206 |
+
datasets=data["datasets"],
|
207 |
+
pipeline_tag=pipeline_tag,
|
208 |
+
labels=labels,
|
209 |
+
model_card_text=card_text,
|
210 |
+
downloads=downloads,
|
211 |
+
likes=likes,
|
212 |
+
model_card_length=length,
|
213 |
+
)
|
214 |
+
except Exception as e:
|
215 |
+
print(f"Failed to create ModelMetadata for model {hub_id}: {str(e)}")
|
216 |
+
return None
|
217 |
+
|
218 |
+
|
219 |
+
|
220 |
COMMON_SCORES = {
|
221 |
"license": {
|
222 |
"required": True,
|
|
|
426 |
return orjson.dumps(data_dict)
|
427 |
|
428 |
|
429 |
+
def basic_check(hub_id): # add types
|
430 |
return _basic_check(hub_id)
|
431 |
|
432 |
+
@cached(sync_cache)
|
433 |
+
def basic_check_from_hub_id(hub_id):
|
434 |
+
model_data = ModelMetadataSync.from_hub(hub_id)
|
435 |
+
return orjson.loads(basic_check(model_data))
|
436 |
|
437 |
def create_query_url(query, skip=0):
|
438 |
return f"https://huggingface.co/api/search/full-text?q={query}&limit=100&skip={skip}&type=model"
|
|
|
666 |
[query, min_metadata_score, mim_model_card_length],
|
667 |
[filter_results, results_markdown],
|
668 |
)
|
669 |
+
with gr.Tab("Metadata quality details)"):
|
670 |
with gr.Row():
|
671 |
gr.Markdown(
|
672 |
"""# How metadata quality is scored?
|
|
|
703 |
gr.Json(json.dumps(text_class_scores_example))
|
704 |
with gr.Accordion(label="Full overview of all scores", open=False):
|
705 |
gr.Json(json.dumps(SCORES))
|
706 |
+
with gr.Tab("Score models"):
|
707 |
+
model_id_to_score = gr.Textbox(
|
708 |
+
placeholder="bert-base-uncased", label="Model ID"
|
709 |
+
)
|
710 |
+
score_model = gr.Button("Score model")
|
711 |
+
score_model.click(basic_check_from_hub_id, model_id_to_score, [gr.Json()])
|
712 |
|
713 |
demo.launch()
|