File size: 4,263 Bytes
af5e18d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import httpx
import asyncio

class TogetherAPI:

    cookies = {
        'intercom-id-evnv2y8k': 'fea4d452-f9be-42e0-93e3-1e47a3836362',
        'intercom-device-id-evnv2y8k': '2bb3e469-0159-4b6b-a33e-1aea4b51ccb1',
        '__stripe_mid': 'e0f7c1ba-56c6-44d4-ba1d-cf4611453eb43cf922',
        'state-csrf': '6f2o8nqgee2dfqdmhaxipe',
        'together_auth_cookie': '%7B%22expires%22%3A%222026-04-09T15%3A14%3A08.985Z%22%2C%22session%22%3A%220eae08c6fd1b79a22476a317d440a2104d74cd3ba333e40771b5ce50a90784297eb82eff36263debca2ee0658abe3e43cab97f87794421111d4bdec56b43dd2595ee22a165c123ba3d0f807759555b5f6d3f51b7c248e7cefcdf0f0b897f62b25b2a569e2cb89633032f15dca9818f39ed49f3ac2d7e0bc3d24517c62c78b1e4%22%7D',
        '__stripe_sid': '979e00a2-06ed-45be-9a95-88d7e7580f625ccce4',
        'intercom-session-evnv2y8k': 'TzZzSzBNRG8xdHJtTVprMm1zUXFob0M2ekhFV3VmeDZFcW5UVldlYmFYc3RsRjFmdWJidjU1ZXVSZzNOSW9QTE82OUx6anlvMWVncmlTd2ZvOERDUXN4OUdoSEM5ZzRnQmh4d2o5S3JKeDA9LS00S3JOclNpNzU0VkVBaTNRNWhSMm93PT0=--2719775e99e920753d35527a45a6731bac5e8f8f',
        'AMP_7112ee0414': 'JTdCJTIyZGV2aWNlSWQlMjIlM0ElMjJmY2ZmNjE3Ny00Yzg0LTRlOTItYTFhMC1kM2Y1ZjllOTFkYTglMjIlMkMlMjJ1c2VySWQlMjIlM0ElMjI2N2I1ZDkwNDNkZTIyN2Q0OGIzMWEwZTMlMjIlMkMlMjJzZXNzaW9uSWQlMjIlM0ExNzQ0MjExNjQyMjEwJTJDJTIyb3B0T3V0JTIyJTNBZmFsc2UlMkMlMjJsYXN0RXZlbnRUaW1lJTIyJTNBMTc0NDIxMTc1ODAwOSUyQyUyMmxhc3RFdmVudElkJTIyJTNBMjMyJTJDJTIycGFnZUNvdW50ZXIlMjIlM0E1JTdE',
    }

    headers = {
        'accept': 'application/json',
        'accept-language': 'en-US,en;q=0.9,ja;q=0.8',
        'authorization': 'Bearer 4d900964e385651ea685af6f6cd5573a17b421f50657f73f903525177915a7e2',
        'content-type': 'application/json',
        'priority': 'u=1, i',
        'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'x-stainless-arch': 'unknown',
        'x-stainless-lang': 'js',
        'x-stainless-os': 'Unknown',
        'x-stainless-package-version': '0.11.1',
        'x-stainless-retry-count': '0',
        'x-stainless-runtime': 'browser:chrome',
        'x-stainless-runtime-version': '135.0.0',
        'referer': 'https://api.together.ai/playground/v2/chat/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8',
    }

    def __init__(self):
        self.base_url = "https://api.together.ai/inference"

    def get_model_list(self):
       models = ['meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8', 'meta-llama/Llama-4-Scout-17B-16E-Instruct', 'deepseek-ai/DeepSeek-R1', 'deepseek-ai/DeepSeek-V3', 'Qwen/Qwen2.5-VL-72B-Instruct', 'google/gemma-2-27b-it']
       return models
    
    async def generate(self, json_data: dict):
        max_retries = 5
        for attempt in range(max_retries):
            async with httpx.AsyncClient(timeout=None) as client:
                try:
                    request_ctx = client.stream(
                        "POST",
                        "https://api.together.ai/inference",
                        cookies=TogetherAPI.cookies,
                        headers=TogetherAPI.headers,
                        json=json_data
                    )

                    async with request_ctx as response:
                        if response.status_code == 200:
                            async for line in response.aiter_lines():
                                if line:
                                    yield f"{line}\n"
                            return
                        elif response.status_code == 429:
                            if attempt < max_retries - 1:
                                await asyncio.sleep(0.5)
                                continue
                            yield "data: [Rate limited, max retries]\n\n"
                            return
                        else:
                            yield f"data: [Unexpected status code: {response.status_code}]\n\n"
                            return
                except Exception as e:
                    yield f"data: [Connection error: {str(e)}]\n\n"
                    return

        yield "data: [Max retries reached]\n\n"