Spaces:

kimhyunwoo
/

bitnet

Running

App Files Files Community

kimhyunwoo commited on 12 days ago

Commit

7ca8994

verified ·

1 Parent(s): 5ae42ef

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -138

app.py CHANGED Viewed

@@ -1,18 +1,10 @@
-# 필요한 라이브러리를 설치하는 명령어입니다.
-# 이 부분은 스크립트 실행 초반에 한 번 실행됩니다.
 import os
-print("Installing required transformers branch...")
 os.system("pip install git+https://github.com/shumingma/transformers.git")
-print("Installation complete.")
-# 필요한 라이브러리들을 import 합니다.
 import threading
 import torch
 import torch._dynamo
-import gradio as gr
-import spaces # Hugging Face Spaces 관련 유틸리티
-# torch._dynamo 설정 (선택 사항, 성능 향상 시도)
 torch._dynamo.config.suppress_errors = True
 from transformers import (
@@ -20,37 +12,20 @@ from transformers import (
     AutoTokenizer,
     TextIteratorStreamer,
 )
-# --- 모델 로드 ---
-# 모델 경로 설정 (Hugging Face 모델 ID)
 model_id = "microsoft/bitnet-b1.58-2B-4T"
-# 모델 로드 시 경고 메시지를 최소화하기 위해 로깅 레벨 설정
-os.environ["TRANSFORMERS_VERBOSITY"] = "error"
-# AutoModelForCausalLM과 AutoTokenizer를 로드합니다.
-# trust_remote_code=True가 필요하며, device_map="auto"를 사용하여 자동으로 디바이스 설정
-try:
-    print(f"모델 로딩 중: {model_id}...")
-    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        torch_dtype=torch.bfloat16, # bf16 사용 (GPU 권장)
-        device_map="auto", # 사용 가능한 디바이스에 자동으로 모델 배치
-        trust_remote_code=True
-    )
-    print(f"모델 디바이스: {model.device}")
-    print("모델 로드 완료.")
-except Exception as e:
-    print(f"모델 로드 중 오류 발생: {e}")
-    tokenizer = None
-    model = None
-    print("모델 로드에 실패했습니다. 애플리케이션이 제대로 동작하지 않을 수 있습니다.")
-# --- 텍스트 생성 함수 (Gradio ChatInterface용) ---
-@spaces.GPU # 이 함수가 GPU 자원을 사용하도록 명시 (Hugging Face Spaces)
 def respond(
     message: str,
     history: list[tuple[str, str]],
@@ -59,109 +34,98 @@ def respond(
     temperature: float,
     top_p: float,
 ):
-    if model is None or tokenizer is None:
-        yield "모델 로드에 실패하여 텍스트 생성을 할 수 없습니다."
-        return # 생성기 함수이므로 return 대신 빈 yield 또는 그냥 return
-    try:
-        # 메시지 형식을 모델의 chat template에 맞게 구성
-        messages = [{"role": "system", "content": system_message}]
-        for user_msg, bot_msg in history:
-            if user_msg:
-                messages.append({"role": "user", "content": user_msg})
-            if bot_msg:
-                messages.append({"role": "assistant", "content": bot_msg})
-        messages.append({"role": "user", "content": message})
-        prompt = tokenizer.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
-        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-        # 텍스트 스트리밍을 위한 streamer 설정
-        streamer = TextIteratorStreamer(
-            tokenizer, skip_prompt=True, skip_special_tokens=True
-        )
-        generate_kwargs = dict(
-            **inputs,
-            streamer=streamer,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id # 패딩 토큰 ID 설정
-        )
-        # 모델 생성을 별도의 스레드에서 실행
-        thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
-        thread.start()
-        # 스트리머에서 생성된 텍스트를 읽어와 yield
-        response = ""
-        for new_text in streamer:
-            response += new_text
-            yield response # 실시간으로 응답을 Gradio 인터페이스로 전달
-    except Exception as e:
-        yield f"텍스트 생성 중 오류 발생: {e}"
-        # 오류 발생 시 스레드 처리 로직 추가 고려 필요 (선택 사항)
-# --- Gradio 인터페이스 설정 ---
-if model is not None and tokenizer is not None:
-    demo = gr.ChatInterface(
-        fn=respond,
-        title="Bitnet-b1.58-2B-4T Chatbot",
-        description="Microsoft Bitnet-b1.58-2B-4T 모델을 사용한 채팅 데모입니다.",
-        examples=[
-            [
-                "안녕하세요! 자기소개 해주세요.",
-                "당신은 유능한 AI 비서입니다.", # System message 예시
-                512, # Max new tokens 예시
-                0.7, # Temperature 예시
-                0.95, # Top-p 예시
-            ],
-             [
-                "파이썬으로 간단한 웹 서버 만드는 코드 알려줘",
-                "당신은 유능한 AI 개발자입니다.", # System message 예시
-                1024, # Max new tokens 예시
-                0.8, # Temperature 예시
-                0.9, # Top-p 예시
-            ],
         ],
-         additional_inputs=[
-            gr.Textbox(
-                value="당신은 유능한 AI 비서입니다.", # 기본 시스템 메시지
-                label="System message",
-                lines=1
-            ),
-            gr.Slider(
-                minimum=1,
-                maximum=4096, # 모델 최대 컨텍스트 길이 고려 (또는 더 길게 설정)
-                value=512,
-                step=1,
-                label="Max new tokens"
-            ),
-            gr.Slider(
-                minimum=0.1,
-                maximum=2.0, # Temperature 범위 조정 (필요시)
-                value=0.7,
-                step=0.1,
-                label="Temperature"
-            ),
-            gr.Slider(
-                minimum=0.0, # Top-p 범위 조정 (필요시)
-                maximum=1.0,
-                value=0.95,
-                step=0.05,
-                label="Top-p (nucleus sampling)"
-            ),
         ],
-    )
-    # Gradio 앱 실행
-    # Hugging Face Spaces에서는 share=True가 자동으로 설정됩니다.
-    # debug=True로 설정하면 상세 로그를 볼 수 있습니다.
-    demo.launch(debug=True)
-else:
-    print("모델 로드 실패로 인해 Gradio 인터페이스를 실행할 수 없습니다.")

 import os
 os.system("pip install git+https://github.com/shumingma/transformers.git")
 import threading
 import torch
 import torch._dynamo
 torch._dynamo.config.suppress_errors = True
 from transformers import (
     AutoTokenizer,
     TextIteratorStreamer,
 )
+import gradio as gr
+import spaces
 model_id = "microsoft/bitnet-b1.58-2B-4T"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
+print(model.device)
+@spaces.GPU
 def respond(
     message: str,
     history: list[tuple[str, str]],
     temperature: float,
     top_p: float,
 ):
+    """
+    Generate a chat response using streaming with TextIteratorStreamer.
+    Args:
+        message: User's current message.
+        history: List of (user, assistant) tuples from previous turns.
+        system_message: Initial system prompt guiding the assistant.
+        max_tokens: Maximum number of tokens to generate.
+        temperature: Sampling temperature.
+        top_p: Nucleus sampling probability.
+    Yields:
+        The growing response text as new tokens are generated.
+    """
+    messages = [{"role": "system", "content": system_message}]
+    for user_msg, bot_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if bot_msg:
+            messages.append({"role": "assistant", "content": bot_msg})
+    messages.append({"role": "user", "content": message})
+    prompt = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    streamer = TextIteratorStreamer(
+        tokenizer, skip_prompt=True, skip_special_tokens=True
+    )
+    generate_kwargs = dict(
+        **inputs,
+        streamer=streamer,
+        max_new_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        do_sample=True,
+    )
+    thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
+    thread.start()
+    response = ""
+    for new_text in streamer:
+        response += new_text
+        yield response
+demo = gr.ChatInterface(
+    fn=respond,
+    title="Bitnet-b1.58-2B-4T Chatbot",
+    description="This chat application is powered by Microsoft's SOTA Bitnet-b1.58-2B-4T and designed for natural and fast conversations.",
+    examples=[
+        [
+            "Hello! How are you?",
+            "You are a helpful AI assistant for everyday tasks.",
+            512,
+            0.7,
+            0.95,
         ],
+        [
+            "Can you code a snake game in Python?",
+            "You are a helpful AI assistant for coding.",
+            2048,
+            0.7,
+            0.95,
         ],
+    ],
+    additional_inputs=[
+        gr.Textbox(
+            value="You are a helpful AI assistant.",
+            label="System message"
+        ),
+        gr.Slider(
+            minimum=1,
+            maximum=8192,
+            value=2048,
+            step=1,
+            label="Max new tokens"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)"
+        ),
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()