运用selenium爬取bing上struts2特征url

  • by

正文:

由于技术太菜,以后只有选择去爬取别人公开的漏洞,打一下这些网站,如果存在,就收录下,进一步进行利用
跟前面博客爬取bing类似,先做个模板在这,要用直接改参数

准备知识:

分割域名:
https://blog.csdn.net/cugb1004101218/article/details/46501461

import urllib
protocol, s1 = urllib.splittype('http://www.freedom.com:8001/img/people')
    # ('http', '//www.freedom.com:8001/img/people')
print protocol
print s1
host, s2=  urllib.splithost(s1)
# ('www.freedom.com:8001', '/img/people')
print host
host, port = urllib.splitport(host)
    # ('www.freedom.com', '8001')
print host

 
模板代码:

#coding=utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import time
from selenium import webdriver
import selenium.webdriver.support.ui as ui
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
#引入ActionChains鼠标操作类
from selenium.webdriver.common.action_chains import ActionChains
import requests
from bs4 import BeautifulSoup
start_url="https://cn.bing.com/search?q=inurl%3aphp%3fid%3d&qs=HS&sc=8-0&cvid=2EEF822D8FE54B6CAAA1CE0169CA5BC5&FORM=QBLH&sp=1"
keys=""             #进行网络查询
urls=range(200)
get_urls=range(50)  #存在公开0day漏洞的网站
s=[1,2,3,4,5,6,7,8,9]
driver=webdriver.Chrome(executable_path="D:/selenium/chrome/chromedriver.exe")
wait=ui.WebDriverWait(driver,20)
driver.get(start_url)
wait.until(lambda x:x.find_element_by_xpath('//*[@id="sb_form_q"]')).send_keys(keys)
driver.find_element_by_xpath('//*[@id="sb_form_go"]').click()
for i in range(3,10):
	for j in s[0:]:
		try:
			print wait.until(lambda x:x.find_element_by_xpath('//*[@id="b_results"]/li['+str(j)+']/h2/a').get_attribute("href"))
			urls[j]=wait.until(lambda x:x.find_element_by_xpath('//*[@id="b_results"]/li['+str(j)+']/h2/a').get_attribute("href"))
			host1,s1=urllib.splittype(urls[j])
			host2,s2=urllib.splithost(s1)
			get_url=host1+"://"+host2+payloads   #进行payloads比对返回页面,符合条件,收录进get_urls列表中
			try:
				s=requests.get(get_url).content
				if xx in s:  #不符合则继续
					continue
				else:   #处理方法根据情况写
					get_urls[k]=urls[j]
					k=k+1
			except Exception as e:
				continue
		except Exception as e:
			continue
	try:
		ActionChains(driver).click(wait.until(lambda x: x.find_element_by_css_selector("#b_results > li.b_pag > nav > ul > li:nth-child("+str(i)+") > a"))).perform()
	except Exception as e:
		continue
with open("urls.txt","w+") as f:
	for url in get_urls[0:]:
		f.write(str(url))
		f.write('\n')
f.close()

 
2018.8.17

发表评论

电子邮件地址不会被公开。 必填项已用*标注