davanstrien HF Staff commited on
Commit
213c06e
·
1 Parent(s): fd05104

add httpx caching

Browse files
Files changed (1) hide show
  1. app.py +10 -57
app.py CHANGED
@@ -25,10 +25,16 @@ import httpx
25
  import orjson
26
  import httpx
27
  from functools import lru_cache
 
 
 
28
 
29
- from sys import platform
30
 
31
- CACHE_DIR = "./cache" if platform == "darwin" else "/data/"
 
 
 
32
 
33
  disable_progress_bars()
34
 
@@ -69,7 +75,7 @@ def _try_load_model_card(hub_id):
69
  url = hf_hub_url(
70
  repo_id=hub_id, filename="README.md"
71
  ) # We grab card this way rather than via client library to improve performance
72
- card_text = httpx.get(url).text
73
  length = len(card_text)
74
  except EntryNotFoundError:
75
  card_text = None
@@ -355,63 +361,10 @@ def create_query_url(query, skip=0):
355
  # @cache.memoize(expire=60 * 60 * 24 * 3) # expires after 3 days
356
  def get_results(query) -> Dict[Any, Any]:
357
  url = create_query_url(query)
358
- r = httpx.get(url)
359
  return r.json()
360
 
361
 
362
- # result = {
363
- # "repoId": "621ffdc036468d709f175eb5",
364
- # "repoOwnerId": "60d099234330bad169e611f0",
365
- # "isPrivate": False,
366
- # "type": "model",
367
- # "likes": 0,
368
- # "isReadmeFile": True,
369
- # "readmeStartLine": 8,
370
- # "updatedAt": 1687806057107,
371
- # "repoName": "hate_speech_en",
372
- # "repoOwner": "IMSyPP",
373
- # "tags": "pytorch, bert, text-classification, en, transformers, license:mit, has_space",
374
- # "name": "IMSyPP/hate_speech_en",
375
- # "fileName": "README.md",
376
- # "formatted": {
377
- # "repoName": [{"text": "hate_speech_en", "type": "text"}],
378
- # "repoOwner": [{"text": "IMSyPP", "type": "text"}],
379
- # "fileContent": [
380
- # {"text": "\n# ", "type": "text"},
381
- # {"text": "Hate", "type": "highlight"},
382
- # {"text": " ", "type": "text"},
383
- # {"text": "Speech", "type": "highlight"},
384
- # {
385
- # "text": " Classifier for Social Media Content in English Language\n\nA monolingual model for ",
386
- # "type": "text",
387
- # },
388
- # {"text": "hate", "type": "highlight"},
389
- # {"text": " ", "type": "text"},
390
- # {"text": "speech", "type": "highlight"},
391
- # {
392
- # "text": " classification of social media content in English language. The model was trained on 103190 YouTube comments and tested on an independent test set of 20554 YouTube comments. It is based on English BERT base pre-trained language model.\n\n## Tokenizer\n\nDuring training the text was preprocessed using the original English BERT base tokenizer. We suggest the same tokenizer is used for inference.\n\n## Model output\n\nThe model classifies each input into one of four distinct classes:\n* 0 - acceptable\n* 1 - inappropriate\n* 2 - offensive\n* 3 - violent",
393
- # "type": "text",
394
- # },
395
- # ],
396
- # "tags": [
397
- # {
398
- # "text": "pytorch, bert, text-classification, en, transformers, license:mit, has_space",
399
- # "type": "text",
400
- # }
401
- # ],
402
- # "name": [{"text": "IMSyPP/hate_speech_en", "type": "text"}],
403
- # "fileName": [{"text": "README.md", "type": "text"}],
404
- # },
405
- # "authorData": {
406
- # "avatarUrl": "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1624284535629-60d08803565dd1d0867f7a37.png?w=200&h=200&f=face",
407
- # "fullname": "IMSyPP EU REC AG project 875263 - Innovative Monitoring Systems and Prevention Policies of Online Hate Speech",
408
- # "name": "IMSyPP",
409
- # "type": "org",
410
- # "isHf": False,
411
- # },
412
- # }
413
-
414
-
415
  @backoff.on_exception(
416
  backoff.expo,
417
  Exception,
 
25
  import orjson
26
  import httpx
27
  from functools import lru_cache
28
+ from httpx import Client
29
+ from httpx_caching import CachingClient
30
+ from httpx_caching import OneDayCacheHeuristic
31
 
32
+ client = Client()
33
 
34
+ client = CachingClient(client, heuristic=OneDayCacheHeuristic())
35
+
36
+
37
+ # CACHE_DIR = "./cache" if platform == "darwin" else "/data/"
38
 
39
  disable_progress_bars()
40
 
 
75
  url = hf_hub_url(
76
  repo_id=hub_id, filename="README.md"
77
  ) # We grab card this way rather than via client library to improve performance
78
+ card_text = client.get(url).text
79
  length = len(card_text)
80
  except EntryNotFoundError:
81
  card_text = None
 
361
  # @cache.memoize(expire=60 * 60 * 24 * 3) # expires after 3 days
362
  def get_results(query) -> Dict[Any, Any]:
363
  url = create_query_url(query)
364
+ r = client.get(url)
365
  return r.json()
366
 
367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  @backoff.on_exception(
369
  backoff.expo,
370
  Exception,