Spaces:

librarian-bots
/

MetaRefine

Runtime error

App Files Files Community

davanstrien HF Staff commited on Jul 15, 2023

Commit

816049c

1 Parent(s): 59282bc

add single scoring tab

Browse files

Files changed (1) hide show

app.py +95 -2

app.py CHANGED Viewed

@@ -48,6 +48,17 @@ async def get_model_labels(model, client):
         return None
 async def _try_load_model_card(hub_id, client=None):
     if not client:
         client = AsyncClient(headers=headers)
@@ -68,6 +79,26 @@ async def _try_load_model_card(hub_id, client=None):
     return card_text, length
 def _try_parse_card_data(hub_json_data):
     data = {}
     keys = ["license", "language", "datasets"]
@@ -134,6 +165,58 @@ class ModelMetadata:
             return None
 COMMON_SCORES = {
     "license": {
         "required": True,
@@ -343,9 +426,13 @@ def _basic_check(data: Optional[ModelMetadata]):
     return orjson.dumps(data_dict)
-def basic_check(hub_id):
     return _basic_check(hub_id)
 def create_query_url(query, skip=0):
     return f"https://huggingface.co/api/search/full-text?q={query}&limit=100&skip={skip}&type=model"
@@ -579,7 +666,7 @@ with gr.Blocks() as demo:
             [query, min_metadata_score, mim_model_card_length],
             [filter_results, results_markdown],
         )
-    with gr.Tab("Metadata quality details"):
         with gr.Row():
             gr.Markdown(
                 """# How metadata quality is scored?
@@ -616,5 +703,11 @@ with gr.Blocks() as demo:
             gr.Json(json.dumps(text_class_scores_example))
         with gr.Accordion(label="Full overview of all scores", open=False):
             gr.Json(json.dumps(SCORES))
 demo.launch()

         return None
+def get_model_labels_sync(model, client=None):
+    if not client:
+        client = Client(headers=headers)
+    try:
+        url = hf_hub_url(repo_id=model, filename="config.json")
+        resp = client.get(url, timeout=2)
+        return list(resp.json()["label2id"].keys())
+    except (KeyError, JSONDecodeError, AttributeError):
+        return None
 async def _try_load_model_card(hub_id, client=None):
     if not client:
         client = AsyncClient(headers=headers)
     return card_text, length
+def _try_load_model_card_sync(hub_id, client=None):
+    if not client:
+        client = Client(headers=headers)
+    try:
+        url = hf_hub_url(
+            repo_id=hub_id, filename="README.md"
+        )  # We grab card this way rather than via client library to improve performance
+        resp = client.get(url)
+        if resp.status_code == 200:
+            card_text = resp.text
+            length = len(card_text)
+        elif resp.status_code == 404:
+            card_text = None
+            length = 0
+    except httpx.ConnectError:
+        card_text = None
+        length = None
+    return card_text, length
 def _try_parse_card_data(hub_json_data):
     data = {}
     keys = ["license", "language", "datasets"]
             return None
+@dataclass(eq=False)
+class ModelMetadataSync:
+    hub_id: str
+    tags: Optional[List[str]]
+    license: Optional[str]
+    library_name: Optional[str]
+    datasets: Optional[List[str]]
+    pipeline_tag: Optional[str]
+    labels: Optional[List[str]]
+    languages: Optional[Union[str, List[str]]]
+    model_card_text: Optional[str] = None
+    model_card_length: Optional[int] = None
+    likes: Optional[int] = None
+    downloads: Optional[int] = None
+    created_at: Optional[datetime] = None
+    @classmethod
+    def from_hub(cls, hub_id, client=None):
+        try:
+            if not client:
+                client = httpx.Client(headers=headers)
+            url = f"https://huggingface.co/api/models/{hub_id}"
+            resp = client.get(url)
+            hub_json_data = resp.json()
+            card_text, length = _try_load_model_card_sync(hub_id)
+            data = _try_parse_card_data(hub_json_data)
+            library_name = hub_json_data.get("library_name")
+            pipeline_tag = hub_json_data.get("pipeline_tag")
+            downloads = hub_json_data.get("downloads")
+            likes = hub_json_data.get("likes")
+            tags = hub_json_data.get("tags")
+            labels = get_model_labels_sync(hub_id, client)
+            return ModelMetadata(
+                hub_id=hub_id,
+                languages=data["language"],
+                tags=tags,
+                license=data["license"],
+                library_name=library_name,
+                datasets=data["datasets"],
+                pipeline_tag=pipeline_tag,
+                labels=labels,
+                model_card_text=card_text,
+                downloads=downloads,
+                likes=likes,
+                model_card_length=length,
+            )
+        except Exception as e:
+            print(f"Failed to create ModelMetadata for model {hub_id}: {str(e)}")
+            return None
 COMMON_SCORES = {
     "license": {
         "required": True,
     return orjson.dumps(data_dict)
+def basic_check(hub_id):  # add types
     return _basic_check(hub_id)
+@cached(sync_cache)
+def basic_check_from_hub_id(hub_id):
+    model_data = ModelMetadataSync.from_hub(hub_id)
+    return orjson.loads(basic_check(model_data))
 def create_query_url(query, skip=0):
     return f"https://huggingface.co/api/search/full-text?q={query}&limit=100&skip={skip}&type=model"
             [query, min_metadata_score, mim_model_card_length],
             [filter_results, results_markdown],
         )
+    with gr.Tab("Metadata quality details)"):
         with gr.Row():
             gr.Markdown(
                 """# How metadata quality is scored?
             gr.Json(json.dumps(text_class_scores_example))
         with gr.Accordion(label="Full overview of all scores", open=False):
             gr.Json(json.dumps(SCORES))
+    with gr.Tab("Score models"):
+        model_id_to_score = gr.Textbox(
+            placeholder="bert-base-uncased", label="Model ID"
+        )
+        score_model = gr.Button("Score model")
+        score_model.click(basic_check_from_hub_id, model_id_to_score, [gr.Json()])
 demo.launch()