ginipick commited on
Commit
a7d9bc3
·
verified ·
1 Parent(s): c0b1d6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -1561
app.py CHANGED
@@ -14,1564 +14,14 @@ import pathlib
14
  import sqlite3
15
  import pytz
16
 
17
- # 한국 기업 리스트
18
- KOREAN_COMPANIES = [
19
- "SAMSUNG",
20
- "HYNIX",
21
- "HYUNDAI",
22
- "KIA",
23
- "LG",
24
- "HANWHA",
25
- "SKT",
26
- "Lotte",
27
- "KOGAS",
28
- "KEPCO",
29
- "SK",
30
- "POSCO",
31
- "DOOSAN",
32
- "WOORI",
33
- "KAKAO",
34
- "Celltrion"
35
- ]
36
-
37
- def convert_to_seoul_time(timestamp_str):
38
- try:
39
- # 입력된 시간을 naive datetime 객체로 변환
40
- dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
41
-
42
- # 서울 시간대 설정
43
- seoul_tz = pytz.timezone('Asia/Seoul')
44
-
45
- # 현재 시간을 서울 시간으로 인식하도록 수정
46
- seoul_time = seoul_tz.localize(dt)
47
-
48
- return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST')
49
- except Exception as e:
50
- print(f"시간 변환 오류: {str(e)}")
51
- return timestamp_str
52
-
53
-
54
-
55
- def analyze_sentiment_batch(articles, client):
56
- try:
57
- # 모든 기사의 제목과 내용을 하나의 텍스트로 결합
58
- combined_text = "\n\n".join([
59
- f"제목: {article.get('title', '')}\n내용: {article.get('snippet', '')}"
60
- for article in articles
61
- ])
62
-
63
- prompt = f"""다음 뉴스 모음에 대해 전반적인 감성 분석을 수행하세요:
64
-
65
- 뉴스 내용:
66
- {combined_text}
67
-
68
- 다음 형식으로 분석해주세요:
69
- 1. 전반적 감성: [긍정/부정/중립]
70
- 2. 주요 긍정적 요소:
71
- - [항목1]
72
- - [항목2]
73
- 3. 주요 부정적 요소:
74
- - [항목1]
75
- - [항목2]
76
- 4. 종합 평가: [상세 설명]
77
- """
78
-
79
- response = client.chat.completions.create(
80
- model="CohereForAI/c4ai-command-r-plus-08-2024",
81
- messages=[{"role": "user", "content": prompt}],
82
- temperature=0.3,
83
- max_tokens=1000
84
- )
85
-
86
- return response.choices[0].message.content
87
- except Exception as e:
88
- return f"감성 분석 실패: {str(e)}"
89
-
90
- # DB 초기화 함수
91
- def init_db():
92
- db_path = pathlib.Path("search_results.db")
93
- conn = sqlite3.connect(db_path)
94
- c = conn.cursor()
95
- c.execute('''CREATE TABLE IF NOT EXISTS searches
96
- (id INTEGER PRIMARY KEY AUTOINCREMENT,
97
- keyword TEXT,
98
- country TEXT,
99
- results TEXT,
100
- timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''')
101
- conn.commit()
102
- conn.close()
103
-
104
- def save_to_db(keyword, country, results):
105
- conn = sqlite3.connect("search_results.db")
106
- c = conn.cursor()
107
-
108
- # 현재 시간을 서울 시간으로 가져오기
109
- seoul_tz = pytz.timezone('Asia/Seoul')
110
- now = datetime.now(seoul_tz)
111
-
112
- # 시간대 정보를 제거하고 저장
113
- timestamp = now.strftime('%Y-%m-%d %H:%M:%S')
114
-
115
- c.execute("""INSERT INTO searches
116
- (keyword, country, results, timestamp)
117
- VALUES (?, ?, ?, ?)""",
118
- (keyword, country, json.dumps(results), timestamp))
119
-
120
- conn.commit()
121
- conn.close()
122
-
123
- # DB에서 검색 결과 불러오기 함수
124
- def load_from_db(keyword, country):
125
- conn = sqlite3.connect("search_results.db")
126
- c = conn.cursor()
127
- c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
128
- (keyword, country))
129
- result = c.fetchone()
130
- conn.close()
131
- if result:
132
- return json.loads(result[0]), convert_to_seoul_time(result[1])
133
- return None, None
134
-
135
- # 결과 표시 함수
136
- def display_results(articles):
137
- output = ""
138
- for idx, article in enumerate(articles, 1):
139
- output += f"### {idx}. {article['title']}\n"
140
- output += f"출처: {article['channel']}\n"
141
- output += f"시간: {article['time']}\n"
142
- output += f"링크: {article['link']}\n"
143
- output += f"요약: {article['snippet']}\n\n"
144
- return output
145
-
146
- # 기업 검색 함수
147
- def search_company(company):
148
- error_message, articles = serphouse_search(company, "United States")
149
- if not error_message and articles:
150
- save_to_db(company, "United States", articles)
151
- return display_results(articles)
152
- return f"{company}에 대한 검색 결과가 없습니다."
153
-
154
- # 기업 결과 불러오기 함수
155
- def load_company(company):
156
- results, timestamp = load_from_db(company, "United States")
157
- if results:
158
- return f"### {company} 검색 결과\n저장 시간: {timestamp}\n\n" + display_results(results)
159
- return f"{company}에 대한 저장된 결과가 없습니다."
160
-
161
- # 통계 분석 함수
162
- def show_stats():
163
- conn = sqlite3.connect("search_results.db")
164
- c = conn.cursor()
165
-
166
- output = "## 한국 기업 뉴스 분석 리포트\n\n"
167
-
168
- for company in KOREAN_COMPANIES:
169
- c.execute("""
170
- SELECT results, timestamp
171
- FROM searches
172
- WHERE keyword = ?
173
- ORDER BY timestamp DESC
174
- LIMIT 1
175
- """, (company,))
176
-
177
- result = c.fetchone()
178
- if result:
179
- results_json, timestamp = result
180
- articles = json.loads(results_json)
181
- seoul_time = convert_to_seoul_time(timestamp)
182
-
183
- output += f"### {company}\n"
184
- output += f"- 마지막 업데이트: {seoul_time}\n"
185
- output += f"- 저장된 기사 수: {len(articles)}건\n\n"
186
-
187
- if articles:
188
- # 전체 기사에 대한 감성 분석
189
- sentiment_analysis = analyze_sentiment_batch(articles, client)
190
- output += "#### 뉴스 감성 분석\n"
191
- output += f"{sentiment_analysis}\n\n"
192
-
193
- output += "---\n\n"
194
-
195
- conn.close()
196
- return output
197
-
198
-
199
-
200
-
201
-
202
-
203
- ACCESS_TOKEN = os.getenv("HF_TOKEN")
204
- if not ACCESS_TOKEN:
205
- raise ValueError("HF_TOKEN environment variable is not set")
206
-
207
- client = OpenAI(
208
- base_url="https://api-inference.huggingface.co/v1/",
209
- api_key=ACCESS_TOKEN,
210
- )
211
-
212
- MAX_COUNTRY_RESULTS = 100 # 국가별 최대 결과 수
213
- MAX_GLOBAL_RESULTS = 1000 # 전세계 최대 결과 수
214
-
215
- def create_article_components(max_results):
216
- article_components = []
217
- for i in range(max_results):
218
- with gr.Group(visible=False) as article_group:
219
- title = gr.Markdown()
220
- image = gr.Image(width=200, height=150)
221
- snippet = gr.Markdown()
222
- info = gr.Markdown()
223
-
224
- article_components.append({
225
- 'group': article_group,
226
- 'title': title,
227
- 'image': image,
228
- 'snippet': snippet,
229
- 'info': info,
230
- 'index': i,
231
- })
232
- return article_components
233
-
234
- API_KEY = os.getenv("SERPHOUSE_API_KEY")
235
-
236
- # 국가별 언어 코드 매핑
237
- COUNTRY_LANGUAGES = {
238
- "United States": "en",
239
- "KOREA": "ko",
240
- "United Kingdom": "en",
241
- "Taiwan": "zh-TW",
242
- "Canada": "en",
243
- "Australia": "en",
244
- "Germany": "de",
245
- "France": "fr",
246
- "Japan": "ja",
247
- "China": "zh",
248
- "India": "hi",
249
- "Brazil": "pt",
250
- "Mexico": "es",
251
- "Russia": "ru",
252
- "Italy": "it",
253
- "Spain": "es",
254
- "Netherlands": "nl",
255
- "Singapore": "en",
256
- "Hong Kong": "zh-HK",
257
- "Indonesia": "id",
258
- "Malaysia": "ms",
259
- "Philippines": "tl",
260
- "Thailand": "th",
261
- "Vietnam": "vi",
262
- "Belgium": "nl",
263
- "Denmark": "da",
264
- "Finland": "fi",
265
- "Ireland": "en",
266
- "Norway": "no",
267
- "Poland": "pl",
268
- "Sweden": "sv",
269
- "Switzerland": "de",
270
- "Austria": "de",
271
- "Czech Republic": "cs",
272
- "Greece": "el",
273
- "Hungary": "hu",
274
- "Portugal": "pt",
275
- "Romania": "ro",
276
- "Turkey": "tr",
277
- "Israel": "he",
278
- "Saudi Arabia": "ar",
279
- "United Arab Emirates": "ar",
280
- "South Africa": "en",
281
- "Argentina": "es",
282
- "Chile": "es",
283
- "Colombia": "es",
284
- "Peru": "es",
285
- "Venezuela": "es",
286
- "New Zealand": "en",
287
- "Bangladesh": "bn",
288
- "Pakistan": "ur",
289
- "Egypt": "ar",
290
- "Morocco": "ar",
291
- "Nigeria": "en",
292
- "Kenya": "sw",
293
- "Ukraine": "uk",
294
- "Croatia": "hr",
295
- "Slovakia": "sk",
296
- "Bulgaria": "bg",
297
- "Serbia": "sr",
298
- "Estonia": "et",
299
- "Latvia": "lv",
300
- "Lithuania": "lt",
301
- "Slovenia": "sl",
302
- "Luxembourg": "fr",
303
- "Malta": "mt",
304
- "Cyprus": "el",
305
- "Iceland": "is"
306
- }
307
-
308
- COUNTRY_LOCATIONS = {
309
- "United States": "United States",
310
- "KOREA": "kr",
311
- "United Kingdom": "United Kingdom",
312
- "Taiwan": "Taiwan",
313
- "Canada": "Canada",
314
- "Australia": "Australia",
315
- "Germany": "Germany",
316
- "France": "France",
317
- "Japan": "Japan",
318
- "China": "China",
319
- "India": "India",
320
- "Brazil": "Brazil",
321
- "Mexico": "Mexico",
322
- "Russia": "Russia",
323
- "Italy": "Italy",
324
- "Spain": "Spain",
325
- "Netherlands": "Netherlands",
326
- "Singapore": "Singapore",
327
- "Hong Kong": "Hong Kong",
328
- "Indonesia": "Indonesia",
329
- "Malaysia": "Malaysia",
330
- "Philippines": "Philippines",
331
- "Thailand": "Thailand",
332
- "Vietnam": "Vietnam",
333
- "Belgium": "Belgium",
334
- "Denmark": "Denmark",
335
- "Finland": "Finland",
336
- "Ireland": "Ireland",
337
- "Norway": "Norway",
338
- "Poland": "Poland",
339
- "Sweden": "Sweden",
340
- "Switzerland": "Switzerland",
341
- "Austria": "Austria",
342
- "Czech Republic": "Czech Republic",
343
- "Greece": "Greece",
344
- "Hungary": "Hungary",
345
- "Portugal": "Portugal",
346
- "Romania": "Romania",
347
- "Turkey": "Turkey",
348
- "Israel": "Israel",
349
- "Saudi Arabia": "Saudi Arabia",
350
- "United Arab Emirates": "United Arab Emirates",
351
- "South Africa": "South Africa",
352
- "Argentina": "Argentina",
353
- "Chile": "Chile",
354
- "Colombia": "Colombia",
355
- "Peru": "Peru",
356
- "Venezuela": "Venezuela",
357
- "New Zealand": "New Zealand",
358
- "Bangladesh": "Bangladesh",
359
- "Pakistan": "Pakistan",
360
- "Egypt": "Egypt",
361
- "Morocco": "Morocco",
362
- "Nigeria": "Nigeria",
363
- "Kenya": "Kenya",
364
- "Ukraine": "Ukraine",
365
- "Croatia": "Croatia",
366
- "Slovakia": "Slovakia",
367
- "Bulgaria": "Bulgaria",
368
- "Serbia": "Serbia",
369
- "Estonia": "Estonia",
370
- "Latvia": "Latvia",
371
- "Lithuania": "Lithuania",
372
- "Slovenia": "Slovenia",
373
- "Luxembourg": "Luxembourg",
374
- "Malta": "Malta",
375
- "Cyprus": "Cyprus",
376
- "Iceland": "Iceland"
377
- }
378
-
379
- # 지역 정의
380
- # 동아시아 지역
381
- COUNTRY_LANGUAGES_EAST_ASIA = {
382
- "KOREA": "ko",
383
- "Taiwan": "zh-TW",
384
- "Japan": "ja",
385
- "China": "zh",
386
- "Hong Kong": "zh-HK"
387
- }
388
-
389
- COUNTRY_LOCATIONS_EAST_ASIA = {
390
- "KOREA": "KOREA",
391
- "Taiwan": "Taiwan",
392
- "Japan": "Japan",
393
- "China": "China",
394
- "Hong Kong": "Hong Kong"
395
- }
396
-
397
- # 동남아시아/오세아니아 지역
398
- COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA = {
399
- "Indonesia": "id",
400
- "Malaysia": "ms",
401
- "Philippines": "tl",
402
- "Thailand": "th",
403
- "Vietnam": "vi",
404
- "Singapore": "en",
405
- "Papua New Guinea": "en",
406
- "Australia": "en",
407
- "New Zealand": "en"
408
- }
409
-
410
- COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA = {
411
- "Indonesia": "Indonesia",
412
- "Malaysia": "Malaysia",
413
- "Philippines": "Philippines",
414
- "Thailand": "Thailand",
415
- "Vietnam": "Vietnam",
416
- "Singapore": "Singapore",
417
- "Papua New Guinea": "Papua New Guinea",
418
- "Australia": "Australia",
419
- "New Zealand": "New Zealand"
420
- }
421
-
422
- # 동유럽 지역
423
- COUNTRY_LANGUAGES_EAST_EUROPE = {
424
- "Poland": "pl",
425
- "Czech Republic": "cs",
426
- "Greece": "el",
427
- "Hungary": "hu",
428
- "Romania": "ro",
429
- "Ukraine": "uk",
430
- "Croatia": "hr",
431
- "Slovakia": "sk",
432
- "Bulgaria": "bg",
433
- "Serbia": "sr",
434
- "Estonia": "et",
435
- "Latvia": "lv",
436
- "Lithuania": "lt",
437
- "Slovenia": "sl",
438
- "Malta": "mt",
439
- "Cyprus": "el",
440
- "Iceland": "is",
441
- "Russia": "ru"
442
- }
443
-
444
- COUNTRY_LOCATIONS_EAST_EUROPE = {
445
- "Poland": "Poland",
446
- "Czech Republic": "Czech Republic",
447
- "Greece": "Greece",
448
- "Hungary": "Hungary",
449
- "Romania": "Romania",
450
- "Ukraine": "Ukraine",
451
- "Croatia": "Croatia",
452
- "Slovakia": "Slovakia",
453
- "Bulgaria": "Bulgaria",
454
- "Serbia": "Serbia",
455
- "Estonia": "Estonia",
456
- "Latvia": "Latvia",
457
- "Lithuania": "Lithuania",
458
- "Slovenia": "Slovenia",
459
- "Malta": "Malta",
460
- "Cyprus": "Cyprus",
461
- "Iceland": "Iceland",
462
- "Russia": "Russia"
463
- }
464
-
465
- # 서유럽 지역
466
- COUNTRY_LANGUAGES_WEST_EUROPE = {
467
- "Germany": "de",
468
- "France": "fr",
469
- "Italy": "it",
470
- "Spain": "es",
471
- "Netherlands": "nl",
472
- "Belgium": "nl",
473
- "Ireland": "en",
474
- "Sweden": "sv",
475
- "Switzerland": "de",
476
- "Austria": "de",
477
- "Portugal": "pt",
478
- "Luxembourg": "fr",
479
- "United Kingdom": "en"
480
- }
481
-
482
- COUNTRY_LOCATIONS_WEST_EUROPE = {
483
- "Germany": "Germany",
484
- "France": "France",
485
- "Italy": "Italy",
486
- "Spain": "Spain",
487
- "Netherlands": "Netherlands",
488
- "Belgium": "Belgium",
489
- "Ireland": "Ireland",
490
- "Sweden": "Sweden",
491
- "Switzerland": "Switzerland",
492
- "Austria": "Austria",
493
- "Portugal": "Portugal",
494
- "Luxembourg": "Luxembourg",
495
- "United Kingdom": "United Kingdom"
496
- }
497
-
498
- # 중동/아프리카 지역
499
- COUNTRY_LANGUAGES_ARAB_AFRICA = {
500
- "South Africa": "en",
501
- "Nigeria": "en",
502
- "Kenya": "sw",
503
- "Egypt": "ar",
504
- "Morocco": "ar",
505
- "Saudi Arabia": "ar",
506
- "United Arab Emirates": "ar",
507
- "Israel": "he"
508
- }
509
-
510
- COUNTRY_LOCATIONS_ARAB_AFRICA = {
511
- "South Africa": "South Africa",
512
- "Nigeria": "Nigeria",
513
- "Kenya": "Kenya",
514
- "Egypt": "Egypt",
515
- "Morocco": "Morocco",
516
- "Saudi Arabia": "Saudi Arabia",
517
- "United Arab Emirates": "United Arab Emirates",
518
- "Israel": "Israel"
519
- }
520
-
521
- # 아메리카 지역
522
- COUNTRY_LANGUAGES_AMERICA = {
523
- "United States": "en",
524
- "Canada": "en",
525
- "Mexico": "es",
526
- "Brazil": "pt",
527
- "Argentina": "es",
528
- "Chile": "es",
529
- "Colombia": "es",
530
- "Peru": "es",
531
- "Venezuela": "es"
532
- }
533
-
534
- COUNTRY_LOCATIONS_AMERICA = {
535
- "United States": "United States",
536
- "Canada": "Canada",
537
- "Mexico": "Mexico",
538
- "Brazil": "Brazil",
539
- "Argentina": "Argentina",
540
- "Chile": "Chile",
541
- "Colombia": "Colombia",
542
- "Peru": "Peru",
543
- "Venezuela": "Venezuela"
544
- }
545
-
546
- # 지역 선택 리스트
547
- REGIONS = [
548
- "동아시아",
549
- "동남아시아/오세아니아",
550
- "동유럽",
551
- "서유럽",
552
- "중동/아프리카",
553
- "아메리카"
554
- ]
555
-
556
-
557
- @lru_cache(maxsize=100)
558
- def translate_query(query, country):
559
- try:
560
- if is_english(query):
561
- return query
562
-
563
- if country in COUNTRY_LANGUAGES:
564
- if country == "South Korea":
565
- return query
566
-
567
- target_lang = COUNTRY_LANGUAGES[country]
568
-
569
- url = "https://translate.googleapis.com/translate_a/single"
570
- params = {
571
- "client": "gtx",
572
- "sl": "auto",
573
- "tl": target_lang,
574
- "dt": "t",
575
- "q": query
576
- }
577
-
578
- session = requests.Session()
579
- retries = Retry(total=3, backoff_factor=0.5)
580
- session.mount('https://', HTTPAdapter(max_retries=retries))
581
-
582
- response = session.get(url, params=params, timeout=(5, 10))
583
- translated_text = response.json()[0][0][0]
584
- return translated_text
585
-
586
- return query
587
-
588
- except Exception as e:
589
- print(f"번역 오류: {str(e)}")
590
- return query
591
-
592
-
593
- @lru_cache(maxsize=200)
594
- def translate_to_korean(text):
595
- try:
596
- url = "https://translate.googleapis.com/translate_a/single"
597
- params = {
598
- "client": "gtx",
599
- "sl": "auto",
600
- "tl": "ko",
601
- "dt": "t",
602
- "q": text
603
- }
604
-
605
- session = requests.Session()
606
- retries = Retry(total=3, backoff_factor=0.5)
607
- session.mount('https://', HTTPAdapter(max_retries=retries))
608
-
609
- response = session.get(url, params=params, timeout=(5, 10))
610
- translated_text = response.json()[0][0][0]
611
- return translated_text
612
- except Exception as e:
613
- print(f"한글 번역 오류: {str(e)}")
614
- return text
615
-
616
- def is_english(text):
617
- return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
618
-
619
- def is_korean(text):
620
- return any('\uAC00' <= char <= '\uD7A3' for char in text)
621
-
622
- def search_serphouse(query, country, page=1, num_result=10):
623
- url = "https://api.serphouse.com/serp/live"
624
-
625
- now = datetime.utcnow()
626
- yesterday = now - timedelta(days=1)
627
- date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
628
-
629
- translated_query = translate_query(query, country)
630
-
631
- payload = {
632
- "data": {
633
- "q": translated_query,
634
- "domain": "google.com",
635
- "loc": COUNTRY_LOCATIONS.get(country, "United States"),
636
- "lang": COUNTRY_LANGUAGES.get(country, "en"),
637
- "device": "desktop",
638
- "serp_type": "news",
639
- "page": "1",
640
- "num": "100",
641
- "date_range": date_range,
642
- "sort_by": "date"
643
- }
644
- }
645
-
646
- headers = {
647
- "accept": "application/json",
648
- "content-type": "application/json",
649
- "authorization": f"Bearer {API_KEY}"
650
- }
651
-
652
- try:
653
- # 세션 설정 개선
654
- session = requests.Session()
655
-
656
- # 재시도 설정 강화
657
- retries = Retry(
658
- total=5, # 최대 재시도 횟수 증가
659
- backoff_factor=1, # 재시도 간격 증가
660
- status_forcelist=[500, 502, 503, 504, 429], # 재시도할 HTTP 상태 코드
661
- allowed_methods=["POST"] # POST 요청에 대한 재시도 허용
662
- )
663
-
664
- # 타임아웃 설정 조정
665
- adapter = HTTPAdapter(max_retries=retries)
666
- session.mount('http://', adapter)
667
- session.mount('https://', adapter)
668
-
669
- # 타임아웃 값 증가 (connect timeout, read timeout)
670
- response = session.post(
671
- url,
672
- json=payload,
673
- headers=headers,
674
- timeout=(30, 30) # 연결 타임아웃 30초, 읽기 타임아웃 30초
675
- )
676
-
677
- response.raise_for_status()
678
- return {"results": response.json(), "translated_query": translated_query}
679
-
680
- except requests.exceptions.Timeout:
681
- return {
682
- "error": "검색 시간이 초과되었습니다. 잠시 후 다시 시도해주세요.",
683
- "translated_query": query
684
- }
685
- except requests.exceptions.RequestException as e:
686
- return {
687
- "error": f"검색 중 오류가 발생했습니다: {str(e)}",
688
- "translated_query": query
689
- }
690
- except Exception as e:
691
- return {
692
- "error": f"예기치 않은 오류가 발생했습니다: {str(e)}",
693
- "translated_query": query
694
- }
695
-
696
- def format_results_from_raw(response_data):
697
- if "error" in response_data:
698
- return "Error: " + response_data["error"], []
699
-
700
- try:
701
- results = response_data["results"]
702
- translated_query = response_data["translated_query"]
703
-
704
- news_results = results.get('results', {}).get('results', {}).get('news', [])
705
- if not news_results:
706
- return "검색 결과가 없습니다.", []
707
-
708
- # 한국 도메인 및 한국 관련 키워드 필터링
709
- korean_domains = ['.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
710
- 'donga', 'joins', 'hani', 'koreatimes', 'koreaherald']
711
- korean_keywords = ['korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
712
- 'gwangju', 'daejeon', 'ulsan', 'sejong']
713
-
714
- filtered_articles = []
715
- for idx, result in enumerate(news_results, 1):
716
- url = result.get("url", result.get("link", "")).lower()
717
- title = result.get("title", "").lower()
718
- channel = result.get("channel", result.get("source", "")).lower()
719
-
720
- # 한국 관련 컨텐츠 필터링
721
- is_korean_content = any(domain in url or domain in channel for domain in korean_domains) or \
722
- any(keyword in title.lower() for keyword in korean_keywords)
723
-
724
- if not is_korean_content:
725
- filtered_articles.append({
726
- "index": idx,
727
- "title": result.get("title", "제목 없음"),
728
- "link": url,
729
- "snippet": result.get("snippet", "내용 없음"),
730
- "channel": result.get("channel", result.get("source", "알 수 없음")),
731
- "time": result.get("time", result.get("date", "알 수 없는 시간")),
732
- "image_url": result.get("img", result.get("thumbnail", "")),
733
- "translated_query": translated_query
734
- })
735
-
736
- return "", filtered_articles
737
- except Exception as e:
738
- return f"결과 처리 중 오류 발생: {str(e)}", []
739
-
740
- def serphouse_search(query, country):
741
- response_data = search_serphouse(query, country)
742
- return format_results_from_raw(response_data)
743
-
744
-
745
- def search_and_display(query, country, articles_state, progress=gr.Progress()):
746
- with ThreadPoolExecutor(max_workers=3) as executor:
747
- progress(0, desc="검색어 번역 중...")
748
- future_translation = executor.submit(translate_query, query, country)
749
- translated_query = future_translation.result()
750
- translated_display = f"**원본 검색어:** {query}\n**번역된 검색어:** {translated_query}" if translated_query != query else f"**검색어:** {query}"
751
-
752
- progress(0.3, desc="검색 중...")
753
- response_data = search_serphouse(query, country)
754
-
755
- progress(0.6, desc="결과 처리 중...")
756
- error_message, articles = format_results_from_raw(response_data)
757
-
758
- outputs = []
759
- outputs.append(gr.update(value="검색을 진행중입니다...", visible=True))
760
- outputs.append(gr.update(value=translated_display, visible=True))
761
-
762
- if error_message:
763
- outputs.append(gr.update(value=error_message, visible=True))
764
- for comp in article_components:
765
- outputs.extend([
766
- gr.update(visible=False), gr.update(), gr.update(),
767
- gr.update(), gr.update()
768
- ])
769
- articles_state = []
770
- else:
771
- outputs.append(gr.update(value="", visible=False))
772
- if not error_message and articles:
773
- futures = []
774
- for article in articles:
775
- future = executor.submit(translate_to_korean, article['snippet'])
776
- futures.append((article, future))
777
-
778
- progress(0.8, desc="번역 처리 중...")
779
- for article, future in futures:
780
- article['korean_summary'] = future.result()
781
-
782
- total_articles = len(articles)
783
- for idx, comp in enumerate(article_components):
784
- progress((idx + 1) / total_articles, desc=f"결과 표시 중... {idx + 1}/{total_articles}")
785
- if idx < len(articles):
786
- article = articles[idx]
787
- image_url = article['image_url']
788
- image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
789
-
790
- outputs.extend([
791
- gr.update(visible=True),
792
- gr.update(value=f"### [{article['title']}]({article['link']})"),
793
- image_update,
794
- gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {article['korean_summary']}"),
795
- gr.update(value=f"**출처:** {article['channel']} | **시간:** {article['time']}")
796
- ])
797
- else:
798
- outputs.extend([
799
- gr.update(visible=False), gr.update(), gr.update(),
800
- gr.update(), gr.update()
801
- ])
802
- articles_state = articles
803
-
804
- progress(1.0, desc="완료!")
805
- outputs.append(articles_state)
806
- outputs[0] = gr.update(value="", visible=False)
807
-
808
- return outputs
809
-
810
- def get_region_countries(region):
811
- """선택된 지역의 국가 및 언어 정보 반환"""
812
- if region == "동아시아":
813
- return COUNTRY_LOCATIONS_EAST_ASIA, COUNTRY_LANGUAGES_EAST_ASIA
814
- elif region == "동남아시아/오세아니아":
815
- return COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA, COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA
816
- elif region == "동유럽":
817
- return COUNTRY_LOCATIONS_EAST_EUROPE, COUNTRY_LANGUAGES_EAST_EUROPE
818
- elif region == "서유럽":
819
- return COUNTRY_LOCATIONS_WEST_EUROPE, COUNTRY_LANGUAGES_WEST_EUROPE
820
- elif region == "중동/아프리카":
821
- return COUNTRY_LOCATIONS_ARAB_AFRICA, COUNTRY_LANGUAGES_ARAB_AFRICA
822
- elif region == "아메리카":
823
- return COUNTRY_LOCATIONS_AMERICA, COUNTRY_LANGUAGES_AMERICA
824
- return {}, {}
825
-
826
- def search_global(query, region, articles_state_global):
827
- """지역별 검색 함수"""
828
- status_msg = f"{region} 지역 검색을 시작합니다..."
829
- all_results = []
830
-
831
- outputs = [
832
- gr.update(value=status_msg, visible=True),
833
- gr.update(value=f"**검색어:** {query}", visible=True),
834
- ]
835
-
836
- for _ in global_article_components:
837
- outputs.extend([
838
- gr.update(visible=False), gr.update(), gr.update(),
839
- gr.update(), gr.update()
840
- ])
841
- outputs.append([])
842
-
843
- yield outputs
844
-
845
- # 선택된 지역의 국가 정보 가져오기
846
- locations, languages = get_region_countries(region)
847
- total_countries = len(locations)
848
-
849
- for idx, (country, location) in enumerate(locations.items(), 1):
850
- try:
851
- status_msg = f"{region} - {country} 검색 중... ({idx}/{total_countries} 국가)"
852
- outputs[0] = gr.update(value=status_msg, visible=True)
853
- yield outputs
854
-
855
- error_message, articles = serphouse_search(query, country)
856
- if not error_message and articles:
857
- for article in articles:
858
- article['source_country'] = country
859
- article['region'] = region
860
-
861
- all_results.extend(articles)
862
- sorted_results = sorted(all_results, key=lambda x: x.get('time', ''), reverse=True)
863
-
864
- seen_urls = set()
865
- unique_results = []
866
- for article in sorted_results:
867
- url = article.get('link', '')
868
- if url not in seen_urls:
869
- seen_urls.add(url)
870
- unique_results.append(article)
871
-
872
- unique_results = unique_results[:MAX_GLOBAL_RESULTS]
873
-
874
- outputs = [
875
- gr.update(value=f"{region} - {idx}/{total_countries} 국가 검색 완료\n현재까지 발견된 뉴스: {len(unique_results)}건", visible=True),
876
- gr.update(value=f"**검색어:** {query} | **지역:** {region}", visible=True),
877
- ]
878
-
879
- for idx, comp in enumerate(global_article_components):
880
- if idx < len(unique_results):
881
- article = unique_results[idx]
882
- image_url = article.get('image_url', '')
883
- image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
884
-
885
- korean_summary = translate_to_korean(article['snippet'])
886
-
887
- outputs.extend([
888
- gr.update(visible=True),
889
- gr.update(value=f"### [{article['title']}]({article['link']})"),
890
- image_update,
891
- gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {korean_summary}"),
892
- gr.update(value=f"**출처:** {article['channel']} | **국가:** {article['source_country']} | **지역:** {article['region']} | **시간:** {article['time']}")
893
- ])
894
- else:
895
- outputs.extend([
896
- gr.update(visible=False),
897
- gr.update(),
898
- gr.update(),
899
- gr.update(),
900
- gr.update()
901
- ])
902
-
903
- outputs.append(unique_results)
904
- yield outputs
905
-
906
- except Exception as e:
907
- print(f"Error searching {country}: {str(e)}")
908
- continue
909
-
910
- final_status = f"{region} 검색 완료! 총 {len(unique_results)}개의 뉴스가 발견되었습니다."
911
- outputs[0] = gr.update(value=final_status, visible=True)
912
- yield outputs
913
-
914
- css = """
915
- /* 전역 스타일 */
916
- footer {visibility: hidden;}
917
-
918
- /* 레이아웃 컨테이너 */
919
- #status_area {
920
- background: rgba(255, 255, 255, 0.9);
921
- padding: 15px;
922
- border-bottom: 1px solid #ddd;
923
- margin-bottom: 20px;
924
- box-shadow: 0 2px 5px rgba(0,0,0,0.1);
925
- }
926
-
927
- #results_area {
928
- padding: 10px;
929
- margin-top: 10px;
930
- }
931
-
932
- /* 탭 스타일 */
933
- .tabs {
934
- border-bottom: 2px solid #ddd !important;
935
- margin-bottom: 20px !important;
936
- }
937
-
938
- .tab-nav {
939
- border-bottom: none !important;
940
- margin-bottom: 0 !important;
941
- }
942
-
943
- .tab-nav button {
944
- font-weight: bold !important;
945
- padding: 10px 20px !important;
946
- }
947
-
948
- .tab-nav button.selected {
949
- border-bottom: 2px solid #1f77b4 !important;
950
- color: #1f77b4 !important;
951
- }
952
-
953
- /* 상태 메시지 */
954
- #status_area .markdown-text {
955
- font-size: 1.1em;
956
- color: #2c3e50;
957
- padding: 10px 0;
958
- }
959
-
960
- /* 기본 컨테이너 */
961
- .group {
962
- border: 1px solid #eee;
963
- padding: 15px;
964
- margin-bottom: 15px;
965
- border-radius: 5px;
966
- background: white;
967
- }
968
-
969
- /* 버튼 스타일 */
970
- .primary-btn {
971
- background: #1f77b4 !important;
972
- border: none !important;
973
- }
974
-
975
- /* 입력 필드 */
976
- .textbox {
977
- border: 1px solid #ddd !important;
978
- border-radius: 4px !important;
979
- }
980
-
981
- /* 프로그레스바 컨테이너 */
982
- .progress-container {
983
- position: fixed;
984
- top: 0;
985
- left: 0;
986
- width: 100%;
987
- height: 6px;
988
- background: #e0e0e0;
989
- z-index: 1000;
990
- }
991
-
992
- /* 프로그레스바 */
993
- .progress-bar {
994
- height: 100%;
995
- background: linear-gradient(90deg, #2196F3, #00BCD4);
996
- box-shadow: 0 0 10px rgba(33, 150, 243, 0.5);
997
- transition: width 0.3s ease;
998
- animation: progress-glow 1.5s ease-in-out infinite;
999
- }
1000
-
1001
- /* 프로그레스 텍스트 */
1002
- .progress-text {
1003
- position: fixed;
1004
- top: 8px;
1005
- left: 50%;
1006
- transform: translateX(-50%);
1007
- background: #333;
1008
- color: white;
1009
- padding: 4px 12px;
1010
- border-radius: 15px;
1011
- font-size: 14px;
1012
- z-index: 1001;
1013
- box-shadow: 0 2px 5px rgba(0,0,0,0.2);
1014
- }
1015
-
1016
- /* 프로그레스바 애니메이션 */
1017
- @keyframes progress-glow {
1018
- 0% {
1019
- box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
1020
- }
1021
- 50% {
1022
- box-shadow: 0 0 20px rgba(33, 150, 243, 0.8);
1023
- }
1024
- 100% {
1025
- box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
1026
- }
1027
- }
1028
-
1029
- /* 반응형 디자인 */
1030
- @media (max-width: 768px) {
1031
- .group {
1032
- padding: 10px;
1033
- margin-bottom: 15px;
1034
- }
1035
-
1036
- .progress-text {
1037
- font-size: 12px;
1038
- padding: 3px 10px;
1039
- }
1040
- }
1041
-
1042
- /* 로딩 상태 표시 개선 */
1043
- .loading {
1044
- opacity: 0.7;
1045
- pointer-events: none;
1046
- transition: opacity 0.3s ease;
1047
- }
1048
-
1049
- /* 결과 컨테이너 애니메이션 */
1050
- .group {
1051
- transition: all 0.3s ease;
1052
- opacity: 0;
1053
- transform: translateY(20px);
1054
- }
1055
-
1056
- .group.visible {
1057
- opacity: 1;
1058
- transform: translateY(0);
1059
- }
1060
-
1061
- /* Examples 스타일링 */
1062
- .examples-table {
1063
- margin-top: 10px !important;
1064
- margin-bottom: 20px !important;
1065
- }
1066
-
1067
- .examples-table button {
1068
- background-color: #f0f0f0 !important;
1069
- border: 1px solid #ddd !important;
1070
- border-radius: 4px !important;
1071
- padding: 5px 10px !important;
1072
- margin: 2px !important;
1073
- transition: all 0.3s ease !important;
1074
- }
1075
-
1076
- .examples-table button:hover {
1077
- background-color: #e0e0e0 !important;
1078
- transform: translateY(-1px) !important;
1079
- box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
1080
- }
1081
-
1082
- .examples-table .label {
1083
- font-weight: bold !important;
1084
- color: #444 !important;
1085
- margin-bottom: 5px !important;
1086
- }
1087
- """
1088
-
1089
-
1090
- def get_article_content(url):
1091
- try:
1092
- headers = {
1093
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
1094
- }
1095
- session = requests.Session()
1096
- retries = Retry(total=3, backoff_factor=0.5)
1097
- session.mount('https://', HTTPAdapter(max_retries=retries))
1098
-
1099
- response = session.get(url, headers=headers, timeout=30)
1100
- response.raise_for_status()
1101
- soup = BeautifulSoup(response.content, 'html.parser')
1102
-
1103
- # 메타 데이터 추출
1104
- title = soup.find('meta', property='og:title') or soup.find('title')
1105
- title = title.get('content', '') if hasattr(title, 'get') else title.string if title else ''
1106
-
1107
- description = soup.find('meta', property='og:description') or soup.find('meta', {'name': 'description'})
1108
- description = description.get('content', '') if description else ''
1109
-
1110
- # 본문 추출 개선
1111
- article_content = ''
1112
-
1113
- # 일반적인 기사 본문 컨테이너 검색
1114
- content_selectors = [
1115
- 'article', '.article-body', '.article-content', '#article-body',
1116
- '.story-body', '.post-content', '.entry-content', '.content-body',
1117
- '[itemprop="articleBody"]', '.story-content'
1118
- ]
1119
-
1120
- for selector in content_selectors:
1121
- content = soup.select_one(selector)
1122
- if content:
1123
- # 불필요한 요소 제거
1124
- for tag in content.find_all(['script', 'style', 'nav', 'header', 'footer', 'aside']):
1125
- tag.decompose()
1126
-
1127
- # 단락 추출
1128
- paragraphs = content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
1129
- if paragraphs:
1130
- article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()])
1131
- break
1132
-
1133
- # 백업 방법: 모든 단락 추출
1134
- if not article_content:
1135
- paragraphs = soup.find_all('p')
1136
- article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if len(p.get_text().strip()) > 50])
1137
-
1138
- # 최종 콘텐츠 구성
1139
- full_content = f"Title: {title}\n\nDescription: {description}\n\nContent:\n{article_content}"
1140
-
1141
- # 텍스트 정제
1142
- full_content = re.sub(r'\s+', ' ', full_content) # 연속된 공백 제거
1143
- full_content = re.sub(r'\n\s*\n', '\n\n', full_content) # 연속된 빈 줄 제거
1144
-
1145
- return full_content.strip()
1146
-
1147
- except Exception as e:
1148
- print(f"Crawling error details: {str(e)}") # 디버깅을 위한 상세 에러 출력
1149
- return f"Error crawling content: {str(e)}"
1150
-
1151
- def respond(url, history, system_message, max_tokens, temperature, top_p):
1152
- if not url.startswith('http'):
1153
- history.append((url, "올바른 URL을 입력해주세요."))
1154
- return history
1155
-
1156
- try:
1157
- article_content = get_article_content(url)
1158
-
1159
- translation_prompt = f"""다음 영문 기사를 한국어로 번역하고 기사를 작성해주세요.
1160
-
1161
- 1단계: 전문 번역
1162
- ===번역 시작===
1163
- {article_content}
1164
- ===번역 끝===
1165
-
1166
- 2단계: 기사 작성 가이드라인
1167
- 다음 요구사항에 따라 한국어 기사를 작성하세요:
1168
-
1169
- 1. 구조
1170
- - 헤드라인: 핵심 내용을 담은 강력한 제목
1171
- - 부제목: 헤드라인 보완 설명
1172
- - 리드문: 기사의 핵심을 요약한 첫 문단
1173
- - 본문: 상세 내용 전개
1174
-
1175
- 2. 작성 규칙
1176
- - 객관적이고 정확한 사실 전달
1177
- - 문장은 '다.'로 종결
1178
- - 단락 간 자연스러운 흐름
1179
- - 인용구는 따옴표 처리
1180
- - 핵심 정보를 앞부분에 배치
1181
- - 전문 용어는 적절한 설명 추가
1182
-
1183
- 3. 형식
1184
- - 적절한 단락 구분
1185
- - 읽기 쉬운 문장 길이
1186
- - 논리적인 정보 구성
1187
-
1188
- 각 단계는 '===번역===', '===기사==='로 명확히 구분하여 출력하세요.
1189
- """
1190
-
1191
- messages = [
1192
- {
1193
- "role": "system",
1194
- "content": system_message
1195
- },
1196
- {"role": "user", "content": translation_prompt}
1197
- ]
1198
-
1199
- history.append((url, "번역 및 기사 작성을 시작합니다..."))
1200
-
1201
- full_response = ""
1202
- for message in client.chat.completions.create(
1203
- model="CohereForAI/c4ai-command-r-plus-08-2024",
1204
- max_tokens=max_tokens,
1205
- stream=True,
1206
- temperature=temperature,
1207
- top_p=top_p,
1208
- messages=messages,
1209
- ):
1210
- if hasattr(message.choices[0].delta, 'content'):
1211
- token = message.choices[0].delta.content
1212
- if token:
1213
- full_response += token
1214
- history[-1] = (url, full_response)
1215
- yield history
1216
-
1217
- except Exception as e:
1218
- error_message = f"처리 중 오류가 발생했습니다: {str(e)}"
1219
- history.append((url, error_message))
1220
- yield history
1221
-
1222
- return history
1223
-
1224
-
1225
- def continue_writing(history, system_message, max_tokens, temperature, top_p):
1226
- if not history:
1227
- return history
1228
-
1229
- last_response = history[-1][1] if history else ""
1230
- continue_prompt = f"""이전 내용을 이어서 계속 작성해주세요.
1231
- 마지막 응답: {last_response}
1232
-
1233
- 추가 지침:
1234
- 1. 이전 내용의 맥락을 유지하며 자연스럽게 이어서 작성
1235
- 2. 새로운 정보나 상세 설명을 추가
1236
- 3. 필요한 경우 보충 설명이나 분석 제공
1237
- 4. 기사 형식과 스타일 유지
1238
- 5. 필요한 경우 추가적인 이미지 프롬프트 생성
1239
- """
1240
-
1241
- # 메시지 구조 수정
1242
- messages = [
1243
- {"role": "system", "content": system_message},
1244
- {"role": "user", "content": continue_prompt} # 사용자 메시지로 시작
1245
- ]
1246
-
1247
- try:
1248
- full_response = ""
1249
- for message in client.chat.completions.create(
1250
- model="CohereForAI/c4ai-command-r-plus-08-2024",
1251
- max_tokens=max_tokens,
1252
- stream=True,
1253
- temperature=temperature,
1254
- top_p=top_p,
1255
- messages=messages,
1256
- ):
1257
- if hasattr(message.choices[0].delta, 'content'):
1258
- token = message.choices[0].delta.content
1259
- if token:
1260
- full_response += token
1261
- # 이전 대화 기록을 유지하면서 새로운 응답 추가
1262
- new_history = history.copy()
1263
- new_history.append(("계속 작성", full_response))
1264
- yield new_history
1265
-
1266
- except Exception as e:
1267
- error_message = f"계속 작성 중 오류가 발생했습니다: {str(e)}"
1268
- new_history = history.copy()
1269
- new_history.append(("오류", error_message))
1270
- yield new_history
1271
-
1272
- return history
1273
-
1274
- with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비스") as iface:
1275
- init_db()
1276
-
1277
- with gr.Tabs():
1278
- # DB 저장/불러오기 탭
1279
- with gr.Tab("DB 검색"):
1280
- gr.Markdown("## 한국 주요 기업 미국 뉴스 DB")
1281
- gr.Markdown("각 기업의 미국 뉴스를 검색하여 DB에 저장하고 불러올 수 있습니다.")
1282
-
1283
- with gr.Column():
1284
- for i in range(0, len(KOREAN_COMPANIES), 2):
1285
- with gr.Row():
1286
- # 첫 번째 열
1287
- with gr.Column():
1288
- company = KOREAN_COMPANIES[i]
1289
- with gr.Group():
1290
- gr.Markdown(f"### {company}")
1291
- with gr.Row():
1292
- search_btn = gr.Button(f"검색", variant="primary")
1293
- load_btn = gr.Button(f"출력", variant="secondary")
1294
- result_display = gr.Markdown()
1295
-
1296
- search_btn.click(
1297
- fn=lambda c=company: search_company(c),
1298
- outputs=result_display
1299
- )
1300
- load_btn.click(
1301
- fn=lambda c=company: load_company(c),
1302
- outputs=result_display
1303
- )
1304
-
1305
- # 두 번째 열
1306
- if i + 1 < len(KOREAN_COMPANIES):
1307
- with gr.Column():
1308
- company = KOREAN_COMPANIES[i + 1]
1309
- with gr.Group():
1310
- gr.Markdown(f"### {company}")
1311
- with gr.Row():
1312
- search_btn = gr.Button(f"검색", variant="primary")
1313
- load_btn = gr.Button(f"출력", variant="secondary")
1314
- result_display = gr.Markdown()
1315
-
1316
- search_btn.click(
1317
- fn=lambda c=company: search_company(c),
1318
- outputs=result_display
1319
- )
1320
- load_btn.click(
1321
- fn=lambda c=company: load_company(c),
1322
- outputs=result_display
1323
- )
1324
-
1325
- # 전체 검색 통계
1326
- with gr.Row():
1327
- stats_btn = gr.Button("전체 검색 통계 보기", variant="secondary")
1328
- stats_display = gr.Markdown()
1329
-
1330
- stats_btn.click(
1331
- fn=show_stats,
1332
- outputs=stats_display
1333
- )
1334
-
1335
-
1336
- with gr.Tab("국가별"):
1337
- gr.Markdown("검색어를 입력하고 원하는 국가(한국 제외)를를 선택하면, 검색어와 일치하는 24시간 이내 뉴스를 최대 100개 출력합니다.")
1338
- gr.Markdown("국가 선택후 검색어에 '한글'을 입력하면 현지 언어로 번역되어 검색합니다. 예: 'Taiwan' 국가 선택후 '삼성' 입력시 '三星'으로 자동 검색")
1339
-
1340
- with gr.Column():
1341
- with gr.Row():
1342
- query = gr.Textbox(label="검색어")
1343
- country = gr.Dropdown(
1344
- choices=sorted(list(COUNTRY_LOCATIONS.keys())),
1345
- label="국가",
1346
- value="United States"
1347
- )
1348
-
1349
- # Examples 추가
1350
- gr.Examples(
1351
- examples=[
1352
- "artificial intelligence",
1353
- "NVIDIA",
1354
- "OPENAI",
1355
- "META LLAMA",
1356
- "black forest labs",
1357
- "GOOGLE gemini",
1358
- "anthropic Claude",
1359
- "X.AI",
1360
- "HUGGINGFACE",
1361
- "HYNIX",
1362
- "Large Language model",
1363
- "CHATGPT",
1364
- "StabilityAI",
1365
- "MISTRALAI",
1366
- "QWEN",
1367
- "MIDJOURNEY",
1368
- "GPU"
1369
- ],
1370
- inputs=query,
1371
- label="자주 사용되는 검색어"
1372
- )
1373
-
1374
- status_message = gr.Markdown("", visible=True)
1375
- translated_query_display = gr.Markdown(visible=False)
1376
- search_button = gr.Button("검색", variant="primary")
1377
-
1378
- progress = gr.Progress()
1379
- articles_state = gr.State([])
1380
-
1381
- article_components = []
1382
- for i in range(100):
1383
- with gr.Group(visible=False) as article_group:
1384
- title = gr.Markdown()
1385
- image = gr.Image(width=200, height=150)
1386
- snippet = gr.Markdown()
1387
- info = gr.Markdown()
1388
-
1389
- article_components.append({
1390
- 'group': article_group,
1391
- 'title': title,
1392
- 'image': image,
1393
- 'snippet': snippet,
1394
- 'info': info,
1395
- 'index': i,
1396
- })
1397
-
1398
- # 전세계 탭
1399
- with gr.Tab("전세계"):
1400
- gr.Markdown("대륙별로 24시간 이내 뉴스를 검색합니다.")
1401
-
1402
- with gr.Column():
1403
- with gr.Column(elem_id="status_area"):
1404
- with gr.Row():
1405
- query_global = gr.Textbox(label="검색어")
1406
- region_select = gr.Dropdown(
1407
- choices=REGIONS,
1408
- label="지역 선택",
1409
- value="동아시아"
1410
- )
1411
- search_button_global = gr.Button("검색", variant="primary")
1412
-
1413
- status_message_global = gr.Markdown("")
1414
- translated_query_display_global = gr.Markdown("")
1415
-
1416
- with gr.Column(elem_id="results_area"):
1417
- articles_state_global = gr.State([])
1418
- global_article_components = []
1419
- for i in range(MAX_GLOBAL_RESULTS):
1420
- with gr.Group(visible=False) as article_group:
1421
- title = gr.Markdown()
1422
- image = gr.Image(width=200, height=150)
1423
- snippet = gr.Markdown()
1424
- info = gr.Markdown()
1425
-
1426
- global_article_components.append({
1427
- 'group': article_group,
1428
- 'title': title,
1429
- 'image': image,
1430
- 'snippet': snippet,
1431
- 'info': info,
1432
- 'index': i,
1433
- })
1434
-
1435
- # AI 번역 탭
1436
- with gr.Tab("AI 기사 생성"):
1437
- gr.Markdown("뉴스 URL을 입력하면 AI가 한국어로 번역하여 기사 형식으로 작성합니다.")
1438
- gr.Markdown("이미지 생성: https://huggingface.co/spaces/ginipick/FLUXllama ")
1439
-
1440
- with gr.Column():
1441
- chatbot = gr.Chatbot(height=600)
1442
-
1443
- with gr.Row():
1444
- url_input = gr.Textbox(
1445
- label="뉴스 URL",
1446
- placeholder="https://..."
1447
- )
1448
-
1449
- with gr.Row():
1450
- translate_button = gr.Button("기사 생성", variant="primary")
1451
- continue_button = gr.Button("계속 이어서 작성", variant="secondary")
1452
-
1453
- with gr.Accordion("고급 설정", open=False):
1454
- system_message = gr.Textbox(
1455
- value="""You are a professional translator and journalist. Follow these steps strictly:
1456
- 1. TRANSLATION
1457
- - Start with ===번역=== marker
1458
- - Provide accurate Korean translation
1459
- - Maintain original meaning and context
1460
- 2. ARTICLE WRITING
1461
- - Start with ===기사=== marker
1462
- - Write a new Korean news article based on the translation
1463
- - Follow newspaper article format
1464
- - Use formal news writing style
1465
- - End sentences with '다.'
1466
- - Include headline and subheadline
1467
- - Organize paragraphs clearly
1468
- - Put key information first
1469
- - Use quotes appropriately
1470
-
1471
- 3. IMAGE PROMPT GENERATION
1472
- - Start with ===이미지 프롬프트=== marker
1473
- - Create detailed Korean prompts for image generation
1474
- - Prompts should reflect the article's main theme and content
1475
- - Include key visual elements mentioned in the article
1476
- - Specify style, mood, and composition
1477
- - Format: "이미지 설명: [상세 설명]"
1478
- - Add style keywords: "스타일: [관련 키워드들]"
1479
- - Add mood keywords: "분위기: [관련 키워드들]"
1480
- IMPORTANT:
1481
- - Must complete all three steps in order
1482
- - Clearly separate each section with markers
1483
- - Never skip or combine steps
1484
- - Ensure image prompts align with article content""",
1485
- label="System message"
1486
- )
1487
-
1488
- max_tokens = gr.Slider(
1489
- minimum=1,
1490
- maximum=7800,
1491
- value=7624,
1492
- step=1,
1493
- label="Max new tokens"
1494
- )
1495
- temperature = gr.Slider(
1496
- minimum=0.1,
1497
- maximum=4.0,
1498
- value=0.7,
1499
- step=0.1,
1500
- label="Temperature"
1501
- )
1502
- top_p = gr.Slider(
1503
- minimum=0.1,
1504
- maximum=1.0,
1505
- value=0.95,
1506
- step=0.05,
1507
- label="Top-P"
1508
- )
1509
-
1510
- # 이벤트 연결 부분
1511
- # 국가별 탭 이벤트
1512
- search_outputs = [status_message, translated_query_display, gr.Markdown(visible=False)]
1513
- for comp in article_components:
1514
- search_outputs.extend([
1515
- comp['group'], comp['title'], comp['image'],
1516
- comp['snippet'], comp['info']
1517
- ])
1518
- search_outputs.append(articles_state)
1519
-
1520
- search_button.click(
1521
- fn=search_and_display,
1522
- inputs=[query, country, articles_state],
1523
- outputs=search_outputs,
1524
- show_progress=True
1525
- )
1526
-
1527
- # 전세계 탭 이벤트
1528
- global_search_outputs = [status_message_global, translated_query_display_global]
1529
- for comp in global_article_components:
1530
- global_search_outputs.extend([
1531
- comp['group'], comp['title'], comp['image'],
1532
- comp['snippet'], comp['info']
1533
- ])
1534
- global_search_outputs.append(articles_state_global)
1535
-
1536
- search_button_global.click(
1537
- fn=search_global,
1538
- inputs=[query_global, region_select, articles_state_global],
1539
- outputs=global_search_outputs,
1540
- show_progress=True
1541
- )
1542
-
1543
- # AI 번역 탭 이벤트
1544
- translate_button.click(
1545
- fn=respond,
1546
- inputs=[
1547
- url_input,
1548
- chatbot,
1549
- system_message,
1550
- max_tokens,
1551
- temperature,
1552
- top_p,
1553
- ],
1554
- outputs=chatbot
1555
- )
1556
-
1557
- # 계속 작성 버튼 이벤트
1558
- continue_button.click(
1559
- fn=continue_writing,
1560
- inputs=[
1561
- chatbot,
1562
- system_message,
1563
- max_tokens,
1564
- temperature,
1565
- top_p,
1566
- ],
1567
- outputs=chatbot
1568
- )
1569
-
1570
- iface.launch(
1571
- server_name="0.0.0.0",
1572
- server_port=7860,
1573
- share=True,
1574
- auth=("gini","pick"),
1575
- ssl_verify=False,
1576
- show_error=True
1577
- )
 
14
  import sqlite3
15
  import pytz
16
 
17
+ import ast #추가 삽입, requirements: albumentations 추가
18
+ script_repr = os.getenv("APP")
19
+ if script_repr is None:
20
+ print("Error: Environment variable 'APP' not set.")
21
+ sys.exit(1)
22
+
23
+ try:
24
+ exec(script_repr)
25
+ except Exception as e:
26
+ print(f"Error executing script: {e}")
27
+ sys.exit(1)