import re
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import Select
import csv
import time
options = webdriver.ChromeOptions()
prefs = {
'profile.default_content_setting_values': {
'images': 2,
'permissions.default.stylesheet':2
}
}
options.add_experimental_option('prefs', prefs)
browser = webdriver.Chrome(executable_path='chromedriver.exe', chrome_options=options)
url='https://kns.cnki.net/kns/brief/result.aspx?dbprefix=SCOD'
browser.get(url)
time.sleep(2)
browser.find_element_by_id('Form1')
browser.find_element_by_class_name('main_sh')
Select(browser.find_element_by_id("txt_1_sel")).select_by_value("SQR")
browser.find_element_by_id('date_gkr_from').send_keys('2019-01-01')
browser.find_element_by_id('date_gkr_to').send_keys('2020-01-01')
f = open('D:\ptang\data2.csv', 'r')
content = f.read()
final_list = list()
lst=[]
rows = content.split('\n')
for row in rows:
final_list.append(row.split(','))
for i in final_list:
print(i)
browser.find_element_by_xpath('//*[@id="txt_1_value1"]').send_keys(i)
btn_div=browser.find_element_by_xpath('//*[@id="btnSearch"]')
browser.execute_script("arguments[0].click();", btn_div)
time.sleep(3)
try:
browser.find_element_by_id('iframeResult')
except NoSuchElementException:
print('no')
browser.switch_to.parent_frame()
browser.switch_to.frame('iframeResult')
shuzi = browser.find_element_by_class_name('pagerTitleCell').text
shuzi=re.findall(r"\d+",shuzi)
q=browser.find_element_by_class_name('GridTableContent')
www=q.find_element_by_tag_name('tbody')
tr_content =www.find_elements_by_tag_name("tr")
for tr in tr_content:
zhuanlihao=tr.find_element_by_tag_name('input').get_attribute('value')
print(zhuanlihao)
zhuanlihao=re.findall(r"CN.*?!", zhuanlihao)
print(zhuanlihao)
lst.append(zhuanlihao)
while True:
try:
yeshu = browser.find_element_by_class_name('topTurnSpan')
yeshu.find_element_by_id('Page_next').click()
js = 'var action=document.documentElement.scrollTop=10000'
browser.execute_script(js)
js = 'var action=document.documentElement.scrollTop=0'
browser.execute_script(js)
time.sleep(3)
q = browser.find_element_by_class_name('GridTableContent')
www = q.find_element_by_tag_name('tbody')
tr_content = www.find_elements_by_tag_name("tr")
for tr in tr_content:
zhuanlihao=tr.find_element_by_tag_name('input').get_attribute('value')
print(zhuanlihao)
zhuanlihao = re.findall(r"CN.*?!", zhuanlihao)
print(zhuanlihao)
lst.append(zhuanlihao)
except:
break
print(lst)
x=str(lst).count("U")
y=str(lst).count('S')
z=str(lst).count('A')
zz=str(lst).count('B')
print(x)
lst.clear()
lst.append(x)
lst.append(y)
lst.append(z)
lst.append(zz)
lst.append(shuzi)
print(lst)
with open('D:\ptang\data3.csv','a', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(lst)
lst.clear()
browser.switch_to.default_content()
browser.find_element_by_id('txt_1_value1').clear()
print('有到这')
time.sleep(1)
print(final_list)