python crawler hotspot item - slider verification code item (take Bili as an example)

1. Simulated website:

Bili bilibili video network:

2. Development environment

This project needs to use






Please install the corresponding version of the library as follows. All other libraries are standard libraries. There is no need to install pip install pilot pip install selenium.


3. Project process introduction

  1. Initialization
  2. Request the login page of bilibili & simulate to input the account password
  3. Verification code picture with shadow puzzle & obtaining verification code picture
  4. Compare two captcha pictures to get the offset of captcha slider
  5. Use offset value to calculate move operation
  6. Operate the slider button to simulate dragging the slider to verify login

5.bilibili analog login - initialization and analog input of account password

class Bilibili(object):

	def __init__(self):
		#Creating browser objects
		self.driver = webdriver.Chrome()
		#Implicit waiting
		self.url = ''
		#User name
		self.user = ''
		self.pwd = ''

	def close(self):
		//Close browser

	def input_user_pwd(self):
		   //Enter user name and password
		#Enter the landing page

		#Text box enter user name
		tb_user = self.driver.find_element_by_id('login-username')
		#Text box enter password
		tb_pwd = self.driver.find_element_by_id('login-passwd')

6. Verification code picture with shadow puzzle & obtain verification code picture

    def get_screenshot(self):
        //Get screenshot
        screenshot = self.driver.get_screenshot_as_png()

        return screenshot
    def update_style(self):
            //Modify the style property of the picture to display the picture without gap
        js = 'document.querySelectorAll("canvas")[3].style="display:block"'

    def get_position(self):
            //Get four sides when intercepting verification code
        #Navigate to the login button
        bt_login = self.driver.find_element_by_xpath('//a[@class="btn btn-login"]')
        #Simulated Click
        #Get captcha picture object
        code_img = self.driver.find_element_by_xpath('//canvas[@class="geetest_canvas_slice geetest_absolute"]')

        location = code_img.location
        size = code_img.size

        #screenshot = self.get_screenshot()

        #Calculate the image capture area (coordinates of left, top, right, bottom)
        left,top,right,buttom = location['x'],location['y'],location['x']+size['width'],location['y']+size['height']
        return left,top,right,buttom

    def get_image(self):
            //Capture captcha image
        #Get verification code location
        position = self.get_position()
        #Extract the gap captcha image from the screenshot
        captcha1 = self.get_screenshot().crop(position)
        #Modify the style attribute to display a picture of the verification code without gaps
        #Pick out the picture of verification code without gap from the screenshot
        captcha2 = self.get_screenshot().crop(position)

        with open('captcha1.png','wb') as f1 ,open('captcha2.png','wb') as f2:

        return captcha1,captcha2

7. Compare two captcha images to get the offset of captcha slider

    def is_pixel_equal(self,img1,img2,x,y):
            //Determine whether the RGB value of the same pixel of two pictures is equal
        pixel1,pixel2= img1.load()[x,y],img2.load()[x,y]
        #Set a baseline
        sub_index = 60

        if abs(pixel1[0]-pixel2[0])< sub_index and abs(pixel1[1]-pixel2[1])< sub_index and abs(pixel1[2]-pixel2[2])< sub_index:
            return True
            return False

    def get_gap_offset(self,img1,img2):
            //Get the offset of the notch
        x = int(img1.size[0]/4.2)
        for i in range(x,img1.size[0]):
            for j in range(img1.size[1]):
                #Compare the two pictures, (i,j) RGB difference of pixel points, if it is too large, x is the offset value.
                if not self.is_pixel_equal(img1,img2,i,j):
                    x = i
                    return x
        return x

8. Use offset value to calculate movement operation (track)

    def get_track(self,offset):
           //Simulation human drag verification code slider
        track = []
        #Slider start x coordinate
        current = 5
        #Variable speed threshold

        border_point = int(offset*3/5)
        #set time interval
        t = 0.2
        #Set initial speed
        offset +=4
        v = 0
        #Loop until exit when sliding to offset
        while current < offset:
            #Change the motion state according to whether the critical point or not
            if current < border_point:
                a = 1
                a =-0.5
            v0 = v
            v = v0 + a*t

            move = v0*t +0.5*a*t*t

            current += move


        return track

9. Operate the slider button to simulate dragging the slider for verification and login.

    def shake_mouse(self):
        //Simulate hand release mouse shake
        :return: None

    def operate_slider(self,track):
           //Drag slider
        #Get drag button
        back_tracks = [-1,-1,-2,-1]
        slider_bt = self.driver.find_element_by_xpath('//div[@class="geetest_slider_button"]')

        #Click the button to drag the verification code

        #Move forward
        for i in track:
           #The acceleration and deceleration effect is not very good.
           #Every time a random pause of 0-1 / 100s is moved, it swindles the polar test and has a high passing rate.
        #Move in reverse track
        for i in back_tracks:
        #Simulate hand shake
        #Release slider button

    def do_captcha(self):
            //Implement processing verification code
        #Pictures with and without gaps
        img1,img2 = self.get_image()
        #Compare two captcha pictures to get the offset of captcha slider
        offset = self.get_gap_offset(img1,img2)

        #Use offset value to calculate move operation
        track = self.get_track(offset)

        #Operate the slider button to simulate dragging the slider to verify login

    def login(self):
        //Implement the main login logic
        #Go to the login interface and enter the account password
        #Process verification code

        #Close browser

    def run(self):

if __name__ == '__main__':
    bili =Bilibili()


Keywords: pip Selenium network Attribute

Added by Marsha on Sat, 26 Oct 2019 00:40:42 +0300