Preface
Wallpaper Recommendations
Blogger Profile
Blogger Introduction:
- I am everything. The meaning is that I want to take kindness first and character first at any time. I like the four lessons of life-building, ways to change, ways to do good, modesty and efficiency in the four training courses. I prefer the more enlightened pen name of reading on the day of submitting a short book every day: March_ Liu Chao . Focus on Go Web back-end, supplementary Python, Java, algorithms, front-end and other fields. WeChat Public looks forward to your attention. In the future, let's go!
Guide Pack
# coding:utf-8 import time import pymysql from selenium import webdriver from selenium.webdriver import Chrome from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
init method initialization time
The init method acts as an initial value of 60ms and 5ms to retrieve front-end index page data if a time-out occurs and the page is not loaded, errors occur, and rollback additions are thrown
def __init__(self): a = 60 b = 5 self.results_A_S_F(a, b)
results_A_S_F
The first step is to connect to the database
# Connect to database connect = pymysql.connect( host='##', # Database Address port=3306, # Database Port user='root', # Database User Name password='######', # Database Password database='#######', # Database table name charset='utf8', # Encoding Method use_unicode=True) # Perform additions, deletions, and changes through cursor cursor = connect.cursor()
Step 2 Writes a URL address to be automatically emulated
url = 'https://wahis.oie.int/#/dashboards/qd-dashboard'
Step 3 Path to add Google environment variables
View the Google Environment version: chrome://settings/help
Download environment variables specifically: https://npm.taobao.org/mirrors/chromedriver/
Here's an example of a path within Linux
# chromedriver has added environment variables driver_Chrome = '/usr/bin/chromedriver'
Step 4 Configure no header file
# Create profile because phantomjs browser has anti-crawl mechanism for this site, so it can't crawl the 59+ version of chrome browser in macos using Google browser, it must be 57+ version in Linux # Configure Object Add Command to Open Interface-Free Mode # Instantiate Configuration Object chrome_options = Options() chrome_options.add_argument('window-size=1920x1080') # Specify browser resolution chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_argument('--headless') chrome_options.add_argument('blink-settings=imagesEnabled=false') # Configuration object adds command to disable gpu chrome_options.add_argument('--disable-gpu') # Change ip proxy, browser must be restarted # chrome_options.add_argument('--proxy-server=http://121.41.195.16:3307') # Replace user-agent # chrome_options.add_argument('--user-agent=Mozilla/5.0 python37')
Step 5 Create a browser object and add header file configuration and environment variables
Represents header file parameter: chrome_options
Environment variable: executable_path
# Create a browser object # self.driver = webdriver.Chrome(executable_path=driver_Chrome) # Interfaced Browser self.driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=driver_Chrome) # No interface browser
Step 6 Accessing URL Addresses
# Access the specified url address self.driver.get(url)
Step 7 Specific Logical Code Implementation
time.sleep(a) one = self.driver.find_element_by_class_name('show') self.driver.switch_to.frame(one) # self.driver.save_screenshot("go.png") self.driver.find_element_by_xpath( '//*[@id="content"]/div/div[5]/div/article/div[1]/div/div/qv-filterpane/div/div/div/div[2]').click() time.sleep(b) self.driver.find_element_by_xpath("//*[text()='African swine fever virus (Inf. with)']").click() time.sleep(b) self.driver.find_element_by_class_name('sel-toolbar-confirm').click() time.sleep(b) self.driver.find_element_by_xpath( '//*[@id="content"]/div/div[8]/div/article/div[1]/div/div/qv-filterpane/div/div/div/div[2]').click() time.sleep(b) self.driver.find_element_by_class_name('sel-toolbar-icon-toggle').click() time.sleep(b) self.driver.find_element_by_xpath("//*[text()='Select all']").click() time.sleep(b) self.driver.find_element_by_class_name('sel-toolbar-confirm').click() time.sleep(b) self.driver.switch_to.default_content() self.driver.find_element_by_xpath('/html/body/app-root/div/app-pi/app-pi-qddashboard/div/div/a/button[2]').click() time.sleep(b) one = self.driver.find_element_by_class_name('show') self.driver.switch_to.frame(one) result = self.driver.find_elements_by_xpath( '//*[@id="content"]/div/div[1]/div/article/div[1]/div/div/div/div[2]/div/table/tbody/tr/td[3]/div') results = 0 for i in range(0, len(result)): a = '' for j in result[i].text: if j != ' ': a += j results += int(a) print(results)
Step 8 Submit the database and close the browser window
cursor.execute("""update results SET result_s=(%s) where id=(%s)""", (results, 1)) # Submit database connect.commit() self.driver.quit()
Step 9 exception handling
If there is a problem, do an additional time rollback to re-automatically simulate the capture of data, assuming that the first time the browser is opened over 200ms, then judge other errors and write them to the log log
except Exception as f: print(f) self.driver.quit() a += 20 b += 5 if a > 200: with open("loging.txt", 'a', encoding='utf-8') as f: f.write("\n") f.write("resultAfricanSwineFever") f.close() else: self.results_A_S_F(a, b)
All Codes
class resultAfricanSwineFever: def __init__(self): a = 60 b = 5 self.results_A_S_F(a, b) def results_A_S_F(self, a, b): print(a, b) try: # Connect to database connect = pymysql.connect( host='##', # Database Address port=3306, # Database Port user='root', # Database User Name password='######', # Database Password database='#######', # Database table name charset='utf8', # Encoding Method use_unicode=True) # Perform additions, deletions, and changes through cursor cursor = connect.cursor() url = 'https://wahis.oie.int/#/dashboards/qd-dashboard' # chromedriver has added environment variables driver_Chrome = '/usr/bin/chromedriver' # Create profile because phantomjs browser has anti-crawl mechanism for this site, so it can't crawl the 59+ version of chrome browser in macos using Google browser, it must be 57+ version in Linux # Configure Object Add Command to Open Interface-Free Mode # Instantiate Configuration Object chrome_options = Options() chrome_options.add_argument('window-size=1920x1080') # Specify browser resolution chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_argument('--headless') chrome_options.add_argument('blink-settings=imagesEnabled=false') # Configuration object adds command to disable gpu chrome_options.add_argument('--disable-gpu') # Change ip proxy, browser must be restarted # chrome_options.add_argument('--proxy-server=http://121.41.195.16:3307') # Replace user-agent # chrome_options.add_argument('--user-agent=Mozilla/5.0 python37') # Create a browser object # self.driver = webdriver.Chrome(executable_path=driver_Chrome) # Interfaced Browser self.driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=driver_Chrome) # No interface browser # Access the specified url address self.driver.get(url) time.sleep(a) one = self.driver.find_element_by_class_name('show') self.driver.switch_to.frame(one) # self.driver.save_screenshot("go.png") self.driver.find_element_by_xpath( '//*[@id="content"]/div/div[5]/div/article/div[1]/div/div/qv-filterpane/div/div/div/div[2]').click() time.sleep(b) self.driver.find_element_by_xpath("//*[text()='African swine fever virus (Inf. with)']").click() time.sleep(b) self.driver.find_element_by_class_name('sel-toolbar-confirm').click() time.sleep(b) self.driver.find_element_by_xpath( '//*[@id="content"]/div/div[8]/div/article/div[1]/div/div/qv-filterpane/div/div/div/div[2]').click() time.sleep(b) self.driver.find_element_by_class_name('sel-toolbar-icon-toggle').click() time.sleep(b) self.driver.find_element_by_xpath("//*[text()='Select all']").click() time.sleep(b) self.driver.find_element_by_class_name('sel-toolbar-confirm').click() time.sleep(b) self.driver.switch_to.default_content() self.driver.find_element_by_xpath('/html/body/app-root/div/app-pi/app-pi-qddashboard/div/div/a/button[2]').click() time.sleep(b) one = self.driver.find_element_by_class_name('show') self.driver.switch_to.frame(one) result = self.driver.find_elements_by_xpath( '//*[@id="content"]/div/div[1]/div/article/div[1]/div/div/div/div[2]/div/table/tbody/tr/td[3]/div') results = 0 for i in range(0, len(result)): a = '' for j in result[i].text: if j != ' ': a += j results += int(a) print(results) cursor.execute("""update results SET result_s=(%s) where id=(%s)""", (results, 1)) # Submit database connect.commit() self.driver.quit() except Exception as f: print(f) self.driver.quit() a += 20 b += 5 if a > 200: with open("loging.txt", 'a', encoding='utf-8') as f: f.write("\n") f.write("resultAfricanSwineFever") f.close() else: self.results_A_S_F(a, b) if __name__ == '__main__': resultAfricanSwineFever1 = resultAfricanSwineFever()
Logical implementation or other need to communicate, please leave a message oh~
This time around, if you want to know more about the golang language or other areas of content, keep updating it weekly after one click, three times a week!