截图.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. from selenium import webdriver
  2. from selenium.webdriver.edge.service import Service as EdgeService
  3. from selenium.webdriver.common.by import By
  4. from selenium.webdriver.support.ui import WebDriverWait
  5. from selenium.webdriver.support import expected_conditions as EC
  6. from selenium.webdriver.edge.options import Options
  7. import requests
  8. import os
  9. import time
  10. import re
  11. # 设置Edge选项
  12. edge_options = Options()
  13. edge_options.add_argument('--headless') # 如果不需要看到浏览器运行界面,则取消此行注释
  14. edge_options.add_argument('--disable-gpu')
  15. # 设置webdriver路径,注意替换为你自己的msedgedriver路径
  16. webdriver_path = "D:/Drivers/msedgedriver.exe"
  17. service = EdgeService(executable_path=webdriver_path)
  18. driver = webdriver.Edge(service=service, options=edge_options)
  19. wait = WebDriverWait(driver, 10) # 等待元素加载的最大等待时间
  20. save_dir = r"C:\Users\Administrator\Pictures\Captures"
  21. if not os.path.exists(save_dir):
  22. os.makedirs(save_dir)
  23. def sanitize_filename(filename):
  24. """Remove or replace characters that are invalid in filenames."""
  25. return re.sub(r'[<>:"/\\|?*]', '_', filename)
  26. def download_images_from_page(page_url, index_offset=0):
  27. images = driver.find_elements(By.TAG_NAME, 'img')
  28. for img_index, img in enumerate(images):
  29. try:
  30. img_url = img.get_attribute('src')
  31. if img_url is None or img_url == '':
  32. print(f"跳过无src属性的图片 {img_index + index_offset}")
  33. continue
  34. response = requests.get(img_url)
  35. if response.status_code == 200:
  36. # 使用页面URL作为文件名的基础,并添加图片索引以确保唯一性
  37. page_name = sanitize_filename(page_url.replace('https://', '').replace('http://', ''))
  38. save_filename = f"{page_name}_image_{img_index + index_offset}.png"
  39. save_path = os.path.join(save_dir, save_filename)
  40. with open(save_path, 'wb') as file:
  41. file.write(response.content)
  42. print(f"已保存: {save_path}")
  43. else:
  44. print(f"无法下载图片 {img_index + index_offset}, 状态码: {response.status_code}")
  45. except Exception as e:
  46. print(f"处理图片 {img_index + index_offset} 时出错: {e}")
  47. try:
  48. driver.get('https://tsl.uad360.com/')
  49. time.sleep(5) # 给页面加载留出时间
  50. current_url = driver.current_url
  51. # 获取初始页面的所有图片
  52. download_images_from_page(current_url)
  53. # 找到页面中的所有链接
  54. links = driver.find_elements(By.TAG_NAME, 'a')
  55. total_links = len(links)
  56. for link_index, link in enumerate(links):
  57. try:
  58. link_url = link.get_attribute('href')
  59. if link_url is None or link_url == '':
  60. print(f"跳过无效链接 {link_index}")
  61. continue
  62. # 打开新标签页
  63. driver.execute_script("window.open();")
  64. driver.switch_to.window(driver.window_handles[1])
  65. # 访问链接指向的页面
  66. driver.get(link_url)
  67. time.sleep(5) # 给页面加载留出时间
  68. # 下载该页面的所有图片
  69. download_images_from_page(link_url, index_offset=(link_index + 1) * total_links)
  70. # 关闭当前标签页并切换回原始标签页
  71. driver.close()
  72. driver.switch_to.window(driver.window_handles[0])
  73. except Exception as e:
  74. print(f"处理链接 {link_index} 时出错: {e}")
  75. finally:
  76. driver.quit()