from selenium import webdriver from selenium.webdriver.edge.service import Service as EdgeService from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.edge.options import Options import requests import os import time import re # 设置Edge选项 edge_options = Options() edge_options.add_argument('--headless') # 如果不需要看到浏览器运行界面,则取消此行注释 edge_options.add_argument('--disable-gpu') # 设置webdriver路径,注意替换为你自己的msedgedriver路径 webdriver_path = "D:/Drivers/msedgedriver.exe" service = EdgeService(executable_path=webdriver_path) driver = webdriver.Edge(service=service, options=edge_options) wait = WebDriverWait(driver, 10) # 等待元素加载的最大等待时间 save_dir = r"C:\Users\Administrator\Pictures\Captures" if not os.path.exists(save_dir): os.makedirs(save_dir) def sanitize_filename(filename): """Remove or replace characters that are invalid in filenames.""" return re.sub(r'[<>:"/\\|?*]', '_', filename) def download_images_from_page(page_url, index_offset=0): images = driver.find_elements(By.TAG_NAME, 'img') for img_index, img in enumerate(images): try: img_url = img.get_attribute('src') if img_url is None or img_url == '': print(f"跳过无src属性的图片 {img_index + index_offset}") continue response = requests.get(img_url) if response.status_code == 200: # 使用页面URL作为文件名的基础,并添加图片索引以确保唯一性 page_name = sanitize_filename(page_url.replace('https://', '').replace('http://', '')) save_filename = f"{page_name}_image_{img_index + index_offset}.png" save_path = os.path.join(save_dir, save_filename) with open(save_path, 'wb') as file: file.write(response.content) print(f"已保存: {save_path}") else: print(f"无法下载图片 {img_index + index_offset}, 状态码: {response.status_code}") except Exception as e: print(f"处理图片 {img_index + index_offset} 时出错: {e}") try: driver.get('https://tsl.uad360.com/') time.sleep(5) # 给页面加载留出时间 current_url = driver.current_url # 获取初始页面的所有图片 download_images_from_page(current_url) # 找到页面中的所有链接 links = driver.find_elements(By.TAG_NAME, 'a') total_links = len(links) for link_index, link in enumerate(links): try: link_url = link.get_attribute('href') if link_url is None or link_url == '': print(f"跳过无效链接 {link_index}") continue # 打开新标签页 driver.execute_script("window.open();") driver.switch_to.window(driver.window_handles[1]) # 访问链接指向的页面 driver.get(link_url) time.sleep(5) # 给页面加载留出时间 # 下载该页面的所有图片 download_images_from_page(link_url, index_offset=(link_index + 1) * total_links) # 关闭当前标签页并切换回原始标签页 driver.close() driver.switch_to.window(driver.window_handles[0]) except Exception as e: print(f"处理链接 {link_index} 时出错: {e}") finally: driver.quit()