Spaces:

Jegree
/

testDeployment

Running

App Files Files Community

Jegree commited on Aug 16, 2024

Commit

c479acb

verified ·

1 Parent(s): c781724

Upload 2 files

Browse files

Files changed (2) hide show

app.py +2 -2
models.py +88 -88

app.py CHANGED Viewed

@@ -226,7 +226,7 @@ def create_app():
     )
     def get_gpu_kind():
       device = jax.devices()[0]
-      if not gradio_helpers.should_mock():
         raise gr.Error('GPU not visible to JAX!')
       return f'GPU={device.device_kind}'
     demo.load(get_gpu_kind, None, gpu_kind)
@@ -248,4 +248,4 @@ if __name__ == '__main__':
   for name, (repo, filename, revision) in models.MODELS.items():
     gradio_helpers.register_download(name, repo, filename, revision)
-  create_app().queue().launch()

     )
     def get_gpu_kind():
       device = jax.devices()[0]
+      if not gradio_helpers.should_mock() and device.platform != 'gpu':
         raise gr.Error('GPU not visible to JAX!')
       return f'GPU={device.device_kind}'
     demo.load(get_gpu_kind, None, gpu_kind)
   for name, (repo, filename, revision) in models.MODELS.items():
     gradio_helpers.register_download(name, repo, filename, revision)
+  create_app().queue().launch(share = True)

models.py CHANGED Viewed

@@ -1,88 +1,88 @@
-"""Model-related code and constants."""
-import dataclasses
-import os
-import re
-import PIL.Image
-# pylint: disable=g-bad-import-order
-import gradio_helpers
-import paligemma_bv
-ORGANIZATION = 'google'
-BASE_MODELS = [
-    ('paligemma-3b-mix-224-jax', 'paligemma-3b-mix-224'),
-    ('paligemma-3b-mix-448-jax', 'paligemma-3b-mix-448'),
-]
-MODELS = {
-    # **{
-    #     model_name: (
-    #         f'{ORGANIZATION}/{repo}',
-    #         f'{model_name}.bf16.npz',
-    #         'bfloat16',  # Model repo revision.
-    #     )
-    #     for repo, model_name in BASE_MODELS
-    # },
-    'testPaligemma':('Jegree/myPaligem', 'fine-tuned-paligemma-3b-pt-224.f16.npz', 'main'),
-}
-MODELS_INFO = {
-    'paligemma-3b-mix-224': (
-        'JAX/FLAX PaliGemma 3B weights, finetuned with 224x224 input images and 256 token input/output '
-        'text sequences on a mixture of downstream academic datasets. The models are available in float32, '
-        'bfloat16 and float16 format for research purposes only.'
-    ),
-    'paligemma-3b-mix-448': (
-        'JAX/FLAX PaliGemma 3B weights, finetuned with 448x448 input images and 512 token input/output '
-        'text sequences on a mixture of downstream academic datasets. The models are available in float32, '
-        'bfloat16 and float16 format for research purposes only.'
-    ),
-}
-MODELS_RES_SEQ = {
-    'paligemma-3b-mix-224': (224, 256),
-    'paligemma-3b-mix-448': (448, 512),
-}
-# "CPU basic" has 16G RAM, "T4 small" has 15 GB RAM.
-# Below value should be smaller than "available RAM - one model".
-# A single bf16 is about 5860 MB.
-MAX_RAM_CACHE = int(float(os.environ.get('RAM_CACHE_GB', '0')) * 1e9)
-config = paligemma_bv.PaligemmaConfig(
-    ckpt='',  # will be set below
-    res=224,
-    text_len=64,
-    tokenizer='gemma(tokensets=("loc", "seg"))',
-    vocab_size=256_000 + 1024 + 128,
-)
-def get_cached_model(
-    model_name: str,
-) -> tuple[paligemma_bv.PaliGemmaModel, paligemma_bv.ParamsCpu]:
-  """Returns model and params, using RAM cache."""
-  res, seq = MODELS_RES_SEQ[model_name]
-  model_path = gradio_helpers.get_paths()[model_name]
-  config_ = dataclasses.replace(config, ckpt=model_path, res=res, text_len=seq)
-  model, params_cpu = gradio_helpers.get_memory_cache(
-      config_,
-      lambda: paligemma_bv.load_model(config_),
-      max_cache_size_bytes=MAX_RAM_CACHE,
-  )
-  return model, params_cpu
-def generate(
-    model_name: str, sampler: str, image: PIL.Image.Image, prompt: str
-) -> str:
-  """Generates output with specified `model_name`, `sampler`."""
-  model, params_cpu = get_cached_model(model_name)
-  batch = model.shard_batch(model.prepare_batch([image], [prompt]))
-  with gradio_helpers.timed('sharding'):
-    params = model.shard_params(params_cpu)
-  with gradio_helpers.timed('computation', start_message=True):
-    tokens = model.predict(params, batch, sampler=sampler)
-  return model.tokenizer.to_str(tokens[0])

+"""Model-related code and constants."""
+import dataclasses
+import os
+import re
+import PIL.Image
+# pylint: disable=g-bad-import-order
+import gradio_helpers
+import paligemma_bv
+ORGANIZATION = 'google'
+BASE_MODELS = [
+    ('paligemma-3b-mix-224-jax', 'paligemma-3b-mix-224'),
+    ('paligemma-3b-mix-448-jax', 'paligemma-3b-mix-448'),
+]
+MODELS = {
+    # **{
+    #     model_name: (
+    #         f'{ORGANIZATION}/{repo}',
+    #         f'{model_name}.bf16.npz',
+    #         'bfloat16',  # Model repo revision.
+    #     )
+    #     for repo, model_name in BASE_MODELS
+    # },
+    'myPaligem':('Jegree/myPaligem', 'fine-tuned-paligemma-3b-pt-224.f16.npz', 'main'),
+}
+MODELS_INFO = {
+    'paligemma-3b-mix-224': (
+        'JAX/FLAX PaliGemma 3B weights, finetuned with 224x224 input images and 256 token input/output '
+        'text sequences on a mixture of downstream academic datasets. The models are available in float32, '
+        'bfloat16 and float16 format for research purposes only.'
+    ),
+    'paligemma-3b-mix-448': (
+        'JAX/FLAX PaliGemma 3B weights, finetuned with 448x448 input images and 512 token input/output '
+        'text sequences on a mixture of downstream academic datasets. The models are available in float32, '
+        'bfloat16 and float16 format for research purposes only.'
+    ),
+}
+MODELS_RES_SEQ = {
+    'paligemma-3b-mix-224': (224, 256),
+    'paligemma-3b-mix-448': (448, 512),
+}
+# "CPU basic" has 16G RAM, "T4 small" has 15 GB RAM.
+# Below value should be smaller than "available RAM - one model".
+# A single bf16 is about 5860 MB.
+MAX_RAM_CACHE = int(float(os.environ.get('RAM_CACHE_GB', '0')) * 1e9)
+config = paligemma_bv.PaligemmaConfig(
+    ckpt='',  # will be set below
+    res=224,
+    text_len=64,
+    tokenizer='gemma(tokensets=("loc", "seg"))',
+    vocab_size=256_000 + 1024 + 128,
+)
+def get_cached_model(
+    model_name: str,
+) -> tuple[paligemma_bv.PaliGemmaModel, paligemma_bv.ParamsCpu]:
+  """Returns model and params, using RAM cache."""
+  res, seq = MODELS_RES_SEQ[model_name]
+  model_path = gradio_helpers.get_paths()[model_name]
+  config_ = dataclasses.replace(config, ckpt=model_path, res=res, text_len=seq)
+  model, params_cpu = gradio_helpers.get_memory_cache(
+      config_,
+      lambda: paligemma_bv.load_model(config_),
+      max_cache_size_bytes=MAX_RAM_CACHE,
+  )
+  return model, params_cpu
+def generate(
+    model_name: str, sampler: str, image: PIL.Image.Image, prompt: str
+) -> str:
+  """Generates output with specified `model_name`, `sampler`."""
+  model, params_cpu = get_cached_model(model_name)
+  batch = model.shard_batch(model.prepare_batch([image], [prompt]))
+  with gradio_helpers.timed('sharding'):
+    params = model.shard_params(params_cpu)
+  with gradio_helpers.timed('computation', start_message=True):
+    tokens = model.predict(params, batch, sampler=sampler)
+  return model.tokenizer.to_str(tokens[0])