import os import time from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.action_chains import ActionChains def githubDataset(url,query): # time.sleep(3) download_folder = os.path.abspath(f"./downloads/{query}") os.makedirs(download_folder, exist_ok=True) chrome_options = Options() chrome_options.add_argument("--headless") # Uncomment to run headless (no UI) chrome_options.add_experimental_option("prefs", { "download.default_directory": download_folder, # Set the custom download folder "download.prompt_for_download": False, # Don't ask for confirmation to download "download.directory_upgrade": True, # Allow downloading into the custom folder "safebrowsing.enabled": True # Enable safe browsing (to avoid warnings during download) }) driver = webdriver.Chrome(options=chrome_options) driver.get(url) try: csv_links = driver.find_elements(By.XPATH, "//a[contains(@href, '.csv')]") for link in csv_links: csv_file_name = link.text if csv_file_name.endswith(".csv"): print(f"Found CSV file: {csv_file_name}") href=link.get_attribute("href") # print("hello : "+href) driver.get(href) time.sleep(5) download_button = driver.find_element(By.XPATH, "//button[contains(@class, 'Box-sc-g0xbh4-0 ivobqY prc-Button-ButtonBase-c50BI prc-Button-IconButton-szpyj')]") href2=download_button.get_attribute("href") if href2: driver.get(href2) print("Button clicked!!") else: download_button.click() time.sleep(7) break else: print("No CSV file found.") except Exception as e: print("No CSV File") print(e) finally: driver.quit() # print(f"CSV file should be downloaded to {download_folder}") # githubDataset("https://github.com/ageron/handson-ml2/tree/master/datasets/housing","housing") # githubDataset("https://github.com/nytimes/covid-19-data","housing")