|
import os |
|
import time |
|
from selenium import webdriver |
|
from selenium.webdriver.common.by import By |
|
from selenium.webdriver.chrome.options import Options |
|
from selenium.webdriver.common.action_chains import ActionChains |
|
|
|
|
|
|
|
|
|
def githubDataset(url,query): |
|
|
|
download_folder = os.path.abspath(f"./downloads/{query}") |
|
os.makedirs(download_folder, exist_ok=True) |
|
chrome_options = Options() |
|
chrome_options.add_argument("--headless") |
|
chrome_options.add_experimental_option("prefs", { |
|
"download.default_directory": download_folder, |
|
"download.prompt_for_download": False, |
|
"download.directory_upgrade": True, |
|
"safebrowsing.enabled": True |
|
}) |
|
driver = webdriver.Chrome(options=chrome_options) |
|
|
|
driver.get(url) |
|
try: |
|
csv_links = driver.find_elements(By.XPATH, "//a[contains(@href, '.csv')]") |
|
for link in csv_links: |
|
csv_file_name = link.text |
|
if csv_file_name.endswith(".csv"): |
|
print(f"Found CSV file: {csv_file_name}") |
|
href=link.get_attribute("href") |
|
|
|
driver.get(href) |
|
time.sleep(5) |
|
|
|
download_button = driver.find_element(By.XPATH, "//button[contains(@class, 'Box-sc-g0xbh4-0 ivobqY prc-Button-ButtonBase-c50BI prc-Button-IconButton-szpyj')]") |
|
href2=download_button.get_attribute("href") |
|
if href2: |
|
driver.get(href2) |
|
print("Button clicked!!") |
|
else: |
|
download_button.click() |
|
time.sleep(7) |
|
break |
|
else: |
|
print("No CSV file found.") |
|
except Exception as e: |
|
print("No CSV File") |
|
print(e) |
|
finally: |
|
driver.quit() |
|
|
|
|
|
|
|
|
|
|
|
|