pdufour commited on
Commit
54648ea
·
verified ·
1 Parent(s): 25b860b

Create index.js

Browse files
Files changed (1) hide show
  1. index.js +130 -0
index.js ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { AutoProcessor, Qwen2VLForConditionalGeneration, RawImage } from "@huggingface/transformers";
2
+
3
+
4
+ const EXAMPLE_URL = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg";
5
+
6
+ const exampleButton = document.getElementById('example');
7
+ const promptInput = document.querySelector('input[type="text"]');
8
+ const status = document.getElementById('status');
9
+ const thumb = document.getElementById('thumb');
10
+ const uploadInput = document.getElementById('upload');
11
+ const form = document.getElementById('form');
12
+ const output = document.getElementById('llm-output');
13
+
14
+ let currentImage = '';
15
+ let currentQuery = '';
16
+ const model_id = "onnx-community/Qwen2-VL-2B-Instruct";
17
+ let processor;
18
+ let model;
19
+
20
+ async function initializeSessions() {
21
+ status.textContent = 'Loading model...';
22
+ container.classList.add('disabled');
23
+
24
+ processor = await AutoProcessor.from_pretrained(model_id);
25
+ model = await Qwen2VLForConditionalGeneration.from_pretrained(model_id, { dtype: 'q4f16', device: 'webgpu' });
26
+
27
+ status.textContent = 'Ready';
28
+ status.classList.add('ready');
29
+
30
+ uploadInput.disabled = false;
31
+ promptInput.disabled = false;
32
+ container.classList.remove('disabled');
33
+ }
34
+
35
+ async function handleQuery(imageUrl, query) {
36
+ try {
37
+ status.textContent = 'Analyzing...';
38
+
39
+ const result = await imageTextToText(imageUrl, query, (out) => {
40
+ console.log({ out });
41
+ output.textContent = out;
42
+ });
43
+ } catch (err) {
44
+ status.textContent = 'Error processing request';
45
+ console.error(err);
46
+ }
47
+ }
48
+
49
+
50
+ export async function imageTextToText(
51
+ imagePath,
52
+ query,
53
+ cb,
54
+ ) {
55
+
56
+ const image = await (await RawImage.read(imagePath)).resize(448, 448);
57
+ const conversation = [
58
+ {
59
+ role: "user",
60
+ content: [
61
+ { type: "image" },
62
+ { type: "text", text: query, },
63
+ ],
64
+ images: [image],
65
+ },
66
+ ];
67
+ const text = processor.apply_chat_template(conversation, { add_generation_prompt: true });
68
+ const inputs = await processor(text, image);
69
+
70
+ const outputs = await model.generate({
71
+ ...inputs,
72
+ max_new_tokens: 128,
73
+ });
74
+
75
+ const decoded = processor.batch_decode(
76
+ outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
77
+ { skip_special_tokens: true },
78
+ );
79
+
80
+ cb(decoded);
81
+
82
+ return decoded;
83
+ }
84
+
85
+ async function updatePreview(url) {
86
+ const image = await RawImage.fromURL(url);
87
+ const ar = image.width / image.height;
88
+ const [cw, ch] = (ar > 1) ? [320, 320 / ar] : [320 * ar, 320];
89
+ thumb.style.width = `${cw}px`;
90
+ thumb.style.height = `${ch}px`;
91
+ thumb.style.backgroundImage = `url(${url})`;
92
+ thumb.innerHTML = '';
93
+ }
94
+
95
+ await initializeSessions();
96
+
97
+ // UI Event Handlers
98
+ exampleButton.addEventListener('click', (e) => {
99
+ e.preventDefault();
100
+ currentImage = EXAMPLE_URL;
101
+ updatePreview(currentImage);
102
+ });
103
+
104
+ uploadInput.addEventListener('change', (e) => {
105
+ const file = e.target.files[0];
106
+ if (!file) return;
107
+
108
+ const reader = new FileReader();
109
+ reader.onload = (e2) => {
110
+ currentImage = e2.target.result;
111
+ updatePreview(currentImage);
112
+ };
113
+ reader.readAsDataURL(file);
114
+ });
115
+
116
+ promptInput.addEventListener('keypress', (e) => {
117
+ currentQuery = e.target.value;
118
+ });
119
+
120
+ form.addEventListener('submit', (e) => {
121
+ e.preventDefault();
122
+
123
+ if (!currentImage || !currentQuery) {
124
+ status.textContent = 'Please select an image and type a prompt';
125
+ } else {
126
+ promptInput.disabled = true;
127
+ uploadInput.disabled = true;
128
+ handleQuery(currentImage, currentQuery);
129
+ }
130
+ });