Dede16 commited on
Commit
c99bc20
·
1 Parent(s): 98b7cf9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -33
app.py CHANGED
@@ -129,7 +129,7 @@ def scrap_portal(queri):
129
  filter_link2 = [url for url in filter_link1 if "help" not in url]
130
  return filter_link2
131
 
132
- def clean_scrap(artikel,link,models,api_key,azure_api_base,keyword):
133
  new_artikel = []
134
  article = []
135
  if len(artikel) > 1:
@@ -201,7 +201,7 @@ def clean_scrap(artikel,link,models,api_key,azure_api_base,keyword):
201
  contents = content[1:]
202
  contents = [' '.join(contents).replace("article:", '').replace("Article:", '').strip()]
203
 
204
- return title, judul, link, contents
205
 
206
  def scrap_artikel(source_type,source,models,api_key,azure_api_base,keyword):
207
  options = webdriver.ChromeOptions()
@@ -216,6 +216,7 @@ def scrap_artikel(source_type,source,models,api_key,azure_api_base,keyword):
216
 
217
  if source_type == "keyword":
218
  artikel =[]
 
219
  link = scrap_portal(source)
220
  for url in link:
221
  if cek_url(url):
@@ -236,16 +237,17 @@ def scrap_artikel(source_type,source,models,api_key,azure_api_base,keyword):
236
  for paragraph in containers:
237
  artic=paragraph.get_text()
238
  artikel.append(artic)
 
239
 
240
- paragraf = ' '.join(artikel)
241
- if len(paragraf)>= 18000:
242
- part1, part2, part3, part4 = split_article(paragraf)
243
- artikels = [part1, part2, part3, part4]
244
- else :
245
- artikels = [paragraf]
246
- title, judul, url, contents = clean_scrap(artikels,url,models,api_key,azure_api_base,keyword)
247
 
248
- return title, judul, url, contents
249
 
250
  else:
251
  wd.get(source)
@@ -270,9 +272,9 @@ def scrap_artikel(source_type,source,models,api_key,azure_api_base,keyword):
270
  artikels = [part1, part2, part3, part4]
271
  else :
272
  artikels = [paragraf]
273
- title, judul, url, contents = clean_scrap(artikels,source,models,api_key,azure_api_base,keyword)
274
 
275
- return title, judul, url, contents
276
 
277
  def artikel_processing(source_type,source,backlink,keyword,models,api_key,azure_api_base,replicate_key):
278
  title, judul, url, artikel= scrap_artikel(source_type,source, models, api_key,azure_api_base,keyword)
@@ -486,29 +488,12 @@ def artikel_processing(source_type,source,backlink,keyword,models,api_key,azure_
486
  os.remove(tmp_path)
487
  except:
488
  image = Image.open('botika_logo.jpeg')
489
- image = image.crop((3,0,1645,1024))
490
- w,h = image.size
491
- new_w = int(w/1.641)
492
- new_h = int(h/1.641)
493
- image = image.resize((new_w, new_h),Image.ANTIALIAS)
494
- tmp_path = "image.png"
495
- image.save(tmp_path)
496
- with open(tmp_path, 'rb') as open_file:
497
- byte_img = open_file.read()
498
- base64_bytes = base64.b64encode(byte_img)
499
- base64_string = base64_bytes.decode('utf-8')
500
- base64_string = base64.b64decode(base64_string)
501
- image_data= base64_string
502
- os.remove(tmp_path)
503
  return judul,content,image,image_data,url
504
 
505
  def scrap(source_type,source,backlink,keyword,version,api_key,azure_api_base,replicate_key):
506
  # try:
507
- judul,kontent,gambar, image_data,url= artikel_processing(source_type,source,backlink,keyword,version,api_key,azure_api_base,replicate_key)
508
- title = '<h1>'+judul+'</h1>'
509
- desired_timezone = pytz.timezone('Asia/Jakarta')
510
- current_time = datetime.datetime.now(desired_timezone)
511
- Timestamp = current_time.strftime('%Y-%m-%d %H:%M:%S')
512
 
513
  with open("judul.txt", "w") as file:
514
  file.write(judul)
@@ -548,8 +533,7 @@ def scrap(source_type,source,backlink,keyword,version,api_key,azure_api_base,rep
548
 
549
  with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file:
550
  temp_file.write(combined_data)
551
-
552
-
553
  repo_name = get_full_repo_name(model_id="Article_Gen4", token="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")
554
  file_url = upload_file(
555
  path_or_fileobj=temp_file.name,
@@ -561,6 +545,7 @@ def scrap(source_type,source,backlink,keyword,version,api_key,azure_api_base,rep
561
  status = "<h3>Berhasil Generate Artikel</h3>"
562
  time.sleep(60)
563
  return status,gambar
 
564
  else:
565
  with open('log_activity.txt', 'r') as file:
566
  existing_data = file.read()
 
129
  filter_link2 = [url for url in filter_link1 if "help" not in url]
130
  return filter_link2
131
 
132
+ def clean_scrap(artikel,models,api_key,azure_api_base,keyword):
133
  new_artikel = []
134
  article = []
135
  if len(artikel) > 1:
 
201
  contents = content[1:]
202
  contents = [' '.join(contents).replace("article:", '').replace("Article:", '').strip()]
203
 
204
+ return title, judul, contents
205
 
206
  def scrap_artikel(source_type,source,models,api_key,azure_api_base,keyword):
207
  options = webdriver.ChromeOptions()
 
216
 
217
  if source_type == "keyword":
218
  artikel =[]
219
+ URL = None
220
  link = scrap_portal(source)
221
  for url in link:
222
  if cek_url(url):
 
237
  for paragraph in containers:
238
  artic=paragraph.get_text()
239
  artikel.append(artic)
240
+ URL = URL + url
241
 
242
+ paragraf = ' '.join(artikel)
243
+ if len(paragraf)>= 18000:
244
+ part1, part2, part3, part4 = split_article(paragraf)
245
+ artikels = [part1, part2, part3, part4]
246
+ else :
247
+ artikels = [paragraf]
248
+ title, judul, contents = clean_scrap(artikels,models,api_key,azure_api_base,keyword)
249
 
250
+ return title, judul, URL, contents
251
 
252
  else:
253
  wd.get(source)
 
272
  artikels = [part1, part2, part3, part4]
273
  else :
274
  artikels = [paragraf]
275
+ title, judul, contents = clean_scrap(artikels,models,api_key,azure_api_base,keyword)
276
 
277
+ return title, judul, source, contents
278
 
279
  def artikel_processing(source_type,source,backlink,keyword,models,api_key,azure_api_base,replicate_key):
280
  title, judul, url, artikel= scrap_artikel(source_type,source, models, api_key,azure_api_base,keyword)
 
488
  os.remove(tmp_path)
489
  except:
490
  image = Image.open('botika_logo.jpeg')
491
+
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  return judul,content,image,image_data,url
493
 
494
  def scrap(source_type,source,backlink,keyword,version,api_key,azure_api_base,replicate_key):
495
  # try:
496
+ judul,kontent,gambar,image_data,url= artikel_processing(source_type,source,backlink,keyword,version,api_key,azure_api_base,replicate_key)
 
 
 
 
497
 
498
  with open("judul.txt", "w") as file:
499
  file.write(judul)
 
533
 
534
  with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file:
535
  temp_file.write(combined_data)
536
+
 
537
  repo_name = get_full_repo_name(model_id="Article_Gen4", token="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")
538
  file_url = upload_file(
539
  path_or_fileobj=temp_file.name,
 
545
  status = "<h3>Berhasil Generate Artikel</h3>"
546
  time.sleep(60)
547
  return status,gambar
548
+
549
  else:
550
  with open('log_activity.txt', 'r') as file:
551
  existing_data = file.read()